diff --git a/Cargo.toml b/Cargo.toml index b62a011..f4f6864 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,10 @@ rand_distr = "0.6" name = "lib" harness = false +[[bench]] +name = "incremental" +harness = false + [lib] bench = false diff --git a/benches/incremental.rs b/benches/incremental.rs new file mode 100644 index 0000000..2dc8d07 --- /dev/null +++ b/benches/incremental.rs @@ -0,0 +1,253 @@ +use std::hint::black_box; +use std::time::{Duration, Instant}; + +use rand::prelude::*; +use rand_distr::{Distribution, Normal}; + +use frizbee::{Config, IncrementalMatcher, match_list}; + +const ITERS: u32 = 20; + +fn main() { + let config = Config::default(); + + for &count in &[50_000usize, 200_000, 500_000] { + let haystacks = gen_paths(count, 42); + let refs: Vec<&str> = haystacks.iter().map(|s| s.as_str()).collect(); + + println!("--- {} haystacks ---\n", count); + + bench_query(&refs, &["m", "mc", "mch"], &config); + bench_query( + &refs, + &[ + "s", "sr", "src", "src/", "src/c", "src/co", "src/com", "src/comp", + ], + &config, + ); + bench_query(&refs, &["B", "Bt", "BtL", "BtLs"], &config); + bench_query(&refs, &["z", "zx", "zxq"], &config); + + println!(" -- backspace --\n"); + bench_backspace( + &refs, + &["s", "sr", "src", "src/", "src/c", "src/co", "src/com", "src/comp"], + &config, + ); + } +} + +fn bench_query(haystacks: &[&str], steps: &[&str], config: &Config) { + let one_shot_total = time_avg(ITERS, || { + for &n in steps { + black_box(match_list(n, black_box(haystacks), config)); + } + }); + let incr_total = time_avg(ITERS, || { + let mut m = IncrementalMatcher::new(config); + for &n in steps { + black_box(m.match_list(n, black_box(haystacks))); + } + }); + + let label = steps.last().unwrap(); + println!( + " {:?}: one-shot {:>9.2?} incr {:>9.2?} ({:.2}x)", + label, + one_shot_total, + incr_total, + one_shot_total.as_nanos() as f64 / incr_total.as_nanos() as f64, + ); + + println!( + " {:>10} {:>8} {:>10} {:>10} {:>7}", + "needle", "matches", "one-shot", "incr", "speedup" + ); + + for (i, &needle) in steps.iter().enumerate() { + let n_matches = match_list(needle, haystacks, config).len(); + + let os = time_avg(ITERS, || { + black_box(match_list(black_box(needle), black_box(haystacks), config)); + }); + + // replay prior steps then measure just this one + let inc = time_avg(ITERS, || { + let mut m = IncrementalMatcher::new(config); + for &prev in &steps[..i] { + m.match_list(prev, haystacks); + } + black_box(m.match_list(black_box(needle), black_box(haystacks))); + }); + let setup = if i > 0 { + time_avg(ITERS, || { + let mut m = IncrementalMatcher::new(config); + for &prev in &steps[..i] { + m.match_list(prev, haystacks); + } + }) + } else { + Duration::ZERO + }; + let inc_step = inc.saturating_sub(setup); + + if inc_step.as_nanos() == 0 { + println!( + " {:>10} {:>8} {:>10.2?} {:>10} {:>6}x", + format!("{:?}", needle), + n_matches, + os, + "~0", + ">99" + ); + } else { + let speedup = os.as_nanos() as f64 / inc_step.as_nanos() as f64; + println!( + " {:>10} {:>8} {:>10.2?} {:>10.2?} {:>5.1}x", + format!("{:?}", needle), + n_matches, + os, + inc_step, + speedup + ); + } + } + println!(); +} + +fn bench_backspace(haystacks: &[&str], steps: &[&str], config: &Config) { + println!( + " {:>10} {:>10} {:>10} {:>7}", + "backspace", "one-shot", "incr", "speedup" + ); + + for back_to in (0..steps.len() - 1).rev() { + let needle = steps[back_to]; + let os = time_avg(ITERS, || { + black_box(match_list(black_box(needle), black_box(haystacks), config)); + }); + + // type forward to the end, then backspace to `back_to` + let inc = time_avg(ITERS, || { + let mut m = IncrementalMatcher::new(config); + for &s in steps { + m.match_list(s, haystacks); + } + black_box(m.match_list(black_box(needle), black_box(haystacks))); + }); + let setup = time_avg(ITERS, || { + let mut m = IncrementalMatcher::new(config); + for &s in steps { + m.match_list(s, haystacks); + } + }); + let inc_step = inc.saturating_sub(setup); + + let label = format!("{:?}->{:?}", steps.last().unwrap(), needle); + if inc_step.as_nanos() == 0 { + println!( + " {:>10} {:>10.2?} {:>10} {:>6}x", + label, os, "~0", ">99" + ); + } else { + let speedup = os.as_nanos() as f64 / inc_step.as_nanos() as f64; + println!( + " {:>10} {:>10.2?} {:>10.2?} {:>5.1}x", + label, os, inc_step, speedup + ); + } + } + println!(); +} + +fn time_avg(iters: u32, mut f: impl FnMut()) -> Duration { + f(); // warmup + let start = Instant::now(); + for _ in 0..iters { + f(); + } + start.elapsed() / iters +} + +/// Generates paths like "src/components/ButtonList.tsx" +fn gen_paths(count: usize, seed: u64) -> Vec { + let mut rng = StdRng::seed_from_u64(seed); + let depth_dist = Normal::new(3.0, 1.0).unwrap(); + + let dirs = [ + "src", "lib", "test", "docs", "build", "config", "scripts", "assets", "public", "vendor", + "internal", "pkg", "cmd", "api", "web", + ]; + let subdirs = [ + "components", + "utils", + "hooks", + "services", + "models", + "views", + "controllers", + "middleware", + "helpers", + "types", + "store", + "pages", + "layouts", + "widgets", + "auth", + "db", + "cache", + ]; + let names = [ + "Button", "Input", "Modal", "Table", "Form", "List", "Card", "Nav", "Header", "Footer", + "Sidebar", "Menu", "Dialog", "Panel", "Search", "Filter", "Sort", "Page", "App", "User", + "Auth", "Data", "Config", "Cache", "Handler", "Manager", + ]; + let name_suffixes = [ + "", + "Item", + "List", + "View", + "Detail", + "Edit", + "Create", + "Form", + "Page", + "Layout", + "Container", + "Provider", + "Context", + "Service", + "Controller", + "Helper", + ]; + let exts = [ + ".rs", ".ts", ".tsx", ".js", ".py", ".go", ".java", ".css", ".json", ".toml", ".md", + ]; + + (0..count) + .map(|_| { + let depth = (depth_dist.sample(&mut rng) as f64) + .round() + .abs() + .max(1.0) + .min(5.0) as usize; + let mut parts = Vec::with_capacity(depth + 1); + + parts.push(dirs[rng.random_range(0..dirs.len())]); + for _ in 1..depth { + parts.push(subdirs[rng.random_range(0..subdirs.len())]); + } + + let name = names[rng.random_range(0..names.len())]; + let suffix = name_suffixes[rng.random_range(0..name_suffixes.len())]; + let ext = exts[rng.random_range(0..exts.len())]; + let num = if rng.random_ratio(1, 5) { + rng.random_range(1..100u32).to_string() + } else { + String::new() + }; + + format!("{}/{name}{suffix}{num}{ext}", parts.join("/")) + }) + .collect() +} diff --git a/src/incremental.rs b/src/incremental.rs new file mode 100644 index 0000000..2706ef7 --- /dev/null +++ b/src/incremental.rs @@ -0,0 +1,884 @@ +use itertools::Itertools; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::thread; + +use crate::one_shot::Matcher; +use crate::{Config, Match, MatchIndices}; + +fn common_prefix_len(a: &str, b: &str) -> usize { + a.bytes().zip(b.bytes()).take_while(|(x, y)| x == y).count() +} + +/// Incremental fuzzy matcher that reuses previous results when the needle changes. +/// +/// Maintains a history of which haystack indices matched at each needle length. On +/// forward extension (`"fo"` → `"foo"`), narrows the previous match set. On backspace +/// or partial change (`"foo"` → `"fo"` or `"foo"` → `"fob"`), restores the closest +/// historical match set sharing a common prefix, then narrows from there. +/// +/// # Example +/// +/// ```rust +/// use frizbee::{IncrementalMatcher, Config}; +/// +/// let haystacks = ["fooBar", "foo_bar", "prelude", "println!"]; +/// let mut matcher = IncrementalMatcher::new(&Config::default()); +/// +/// let matches = matcher.match_list("f", &haystacks); +/// let matches = matcher.match_list("fo", &haystacks); +/// let matches = matcher.match_list("foo", &haystacks); +/// // backspace: restores "fo" match set instead of full rescore +/// let matches = matcher.match_list("fo", &haystacks); +/// ``` +pub struct IncrementalMatcher { + matcher: Matcher, + prev_needle: String, + matched_indices: Vec, + prev_haystack_count: usize, + index_history: Vec>>, +} + +impl IncrementalMatcher { + pub fn new(config: &Config) -> Self { + Self { + matcher: Matcher::new("", config), + prev_needle: String::new(), + matched_indices: Vec::new(), + prev_haystack_count: 0, + index_history: Vec::new(), + } + } + + pub fn match_list>(&mut self, needle: &str, haystacks: &[S]) -> Vec { + let haystack_count = haystacks.len(); + self.matcher.set_needle(needle); + + if needle.is_empty() { + self.reset(); + self.prev_haystack_count = haystack_count; + return (0..haystack_count).map(Match::from_index).collect(); + } + + if haystack_count == self.prev_haystack_count && !self.prev_needle.is_empty() { + let level = self.find_reusable_level(needle); + if level > 0 { + self.restore_or_save(needle, level); + let mut matches = self.match_narrowed_unsorted(haystacks); + self.save_to_history(needle.len()); + if self.matcher.config.sort { + matches.sort_unstable(); + } + self.set_prev(needle, haystack_count); + return matches; + } + } else if self.is_prefix_extension(needle) && haystack_count > self.prev_haystack_count { + let matches = self.match_narrowed_with_growth(haystacks); + self.index_history.clear(); + self.set_prev(needle, haystack_count); + return matches; + } + + self.index_history.clear(); + let result = self.full_rescore(haystacks, needle, haystack_count); + self.save_to_history(needle.len()); + result + } + + pub fn match_list_indices>( + &mut self, + needle: &str, + haystacks: &[S], + ) -> Vec { + let haystack_count = haystacks.len(); + self.matcher.set_needle(needle); + + if needle.is_empty() { + self.reset(); + self.prev_haystack_count = haystack_count; + return (0..haystack_count).map(MatchIndices::from_index).collect(); + } + + if haystack_count == self.prev_haystack_count && !self.prev_needle.is_empty() { + let level = self.find_reusable_level(needle); + if level > 0 { + self.restore_or_save(needle, level); + let mut matches = self.match_narrowed_indices_unsorted(haystacks); + self.save_to_history(needle.len()); + if self.matcher.config.sort { + matches.sort_unstable(); + } + self.set_prev(needle, haystack_count); + return matches; + } + } else if self.is_prefix_extension(needle) && haystack_count > self.prev_haystack_count { + let matches = self.match_narrowed_indices_with_growth(haystacks); + self.index_history.clear(); + self.set_prev(needle, haystack_count); + return matches; + } + + self.index_history.clear(); + let result = self.full_rescore_indices(haystacks, needle, haystack_count); + self.save_to_history(needle.len()); + result + } + + pub fn match_list_parallel + Sync>( + &mut self, + needle: &str, + haystacks: &[S], + threads: usize, + ) -> Vec { + let haystack_count = haystacks.len(); + self.matcher.set_needle(needle); + + if needle.is_empty() { + self.reset(); + self.prev_haystack_count = haystack_count; + return (0..haystack_count).map(Match::from_index).collect(); + } + + if haystack_count == self.prev_haystack_count && !self.prev_needle.is_empty() { + let level = self.find_reusable_level(needle); + if level > 0 { + self.restore_or_save(needle, level); + let matches = self.match_narrowed_parallel(haystacks, threads); + self.save_to_history(needle.len()); + self.set_prev(needle, haystack_count); + return matches; + } + } else if self.is_prefix_extension(needle) && haystack_count > self.prev_haystack_count { + let matches = self.match_narrowed_parallel(haystacks, threads); + self.index_history.clear(); + self.set_prev(needle, haystack_count); + return matches; + } + + self.index_history.clear(); + let matches = self.full_rescore_parallel(haystacks, threads); + self.update_state_from_matches(needle, haystack_count, matches.iter().map(|m| m.index)); + self.save_to_history(needle.len()); + matches + } + + pub fn reset(&mut self) { + self.prev_needle.clear(); + self.matched_indices.clear(); + self.prev_haystack_count = 0; + self.index_history.clear(); + } + + pub fn matcher(&self) -> &Matcher { + &self.matcher + } + + #[inline(always)] + fn is_prefix_extension(&self, needle: &str) -> bool { + !self.prev_needle.is_empty() + && needle.len() > self.prev_needle.len() + && needle.starts_with(&self.prev_needle) + } + + fn find_reusable_level(&self, needle: &str) -> usize { + if needle.starts_with(&self.prev_needle) { + return self.prev_needle.len(); + } + let common = common_prefix_len(needle, &self.prev_needle); + for level in (1..=common).rev() { + if level <= self.index_history.len() && self.index_history[level - 1].is_some() { + return level; + } + } + 0 + } + + fn restore_or_save(&mut self, needle: &str, level: usize) { + if level == self.prev_needle.len() && needle.starts_with(&self.prev_needle) { + self.save_to_history(level); + } else { + self.matched_indices = self.index_history[level - 1].clone().unwrap(); + self.index_history.truncate(level); + } + } + + fn save_to_history(&mut self, needle_len: usize) { + if needle_len == 0 { + return; + } + let idx = needle_len - 1; + if self.index_history.len() <= idx { + self.index_history.resize_with(idx + 1, || None); + } + self.index_history[idx] = Some(self.matched_indices.clone()); + } + + #[inline] + fn set_prev(&mut self, needle: &str, haystack_count: usize) { + self.prev_needle.clear(); + self.prev_needle.push_str(needle); + self.prev_haystack_count = haystack_count; + } + + #[inline] + fn update_state_from_matches( + &mut self, + needle: &str, + haystack_count: usize, + indices: impl Iterator, + ) { + self.prev_needle.clear(); + self.prev_needle.push_str(needle); + self.matched_indices.clear(); + self.matched_indices.extend(indices); + self.matched_indices.sort_unstable(); + self.prev_haystack_count = haystack_count; + } + + #[inline] + fn full_rescore>( + &mut self, + haystacks: &[S], + needle: &str, + haystack_count: usize, + ) -> Vec { + let mut matches = Vec::new(); + self.matcher.match_list_into(haystacks, 0, &mut matches); + + self.matched_indices.clear(); + self.matched_indices.extend(matches.iter().map(|m| m.index)); + + if self.matcher.config.sort { + matches.sort_unstable(); + } + + self.set_prev(needle, haystack_count); + matches + } + + #[inline] + fn full_rescore_indices>( + &mut self, + haystacks: &[S], + needle: &str, + haystack_count: usize, + ) -> Vec { + let mut matches = Vec::new(); + self.matcher + .match_list_indices_into(haystacks, 0, &mut matches); + + self.matched_indices.clear(); + self.matched_indices.extend(matches.iter().map(|m| m.index)); + + if self.matcher.config.sort { + matches.sort_unstable(); + } + + self.set_prev(needle, haystack_count); + matches + } + + #[inline] + fn match_narrowed_unsorted>(&mut self, haystacks: &[S]) -> Vec { + let mut matches = Vec::with_capacity(self.matched_indices.len()); + let max_typos = self.matcher.config.max_typos; + let needle_len = self.matcher.needle.len(); + let min_haystack_len = max_typos + .map(|max| needle_len.saturating_sub(max as usize)) + .unwrap_or(0); + + let mut write = 0usize; + for read in 0..self.matched_indices.len() { + let idx = self.matched_indices[read]; + let haystack = haystacks[idx as usize].as_ref().as_bytes(); + + if haystack.len() < min_haystack_len { + continue; + } + + let (matched, skipped_chunks) = match max_typos { + Some(max) => self.matcher.prefilter.match_haystack(haystack, max), + None => (true, 0), + }; + if !matched { + continue; + } + + let trimmed = &haystack[skipped_chunks * 16..]; + if let Some(m) = + self.matcher + .smith_waterman_one(trimmed, idx, skipped_chunks == 0) + { + self.matched_indices[write] = idx; + write += 1; + matches.push(m); + } + } + self.matched_indices.truncate(write); + + matches + } + + fn match_narrowed_with_growth>(&mut self, haystacks: &[S]) -> Vec { + let mut matches = self.match_narrowed_unsorted(haystacks); + + let prev_count = self.prev_haystack_count; + let matches_before_tail = matches.len(); + self.matcher + .match_list_into(&haystacks[prev_count..], prev_count as u32, &mut matches); + self.matched_indices + .extend(matches[matches_before_tail..].iter().map(|m| m.index)); + + if self.matcher.config.sort { + matches.sort_unstable(); + } + + matches + } + + #[inline] + fn match_narrowed_indices_unsorted>( + &mut self, + haystacks: &[S], + ) -> Vec { + let mut matches = Vec::with_capacity(self.matched_indices.len()); + let max_typos = self.matcher.config.max_typos; + let needle_len = self.matcher.needle.len(); + let min_haystack_len = max_typos + .map(|max| needle_len.saturating_sub(max as usize)) + .unwrap_or(0); + + let mut write = 0usize; + for read in 0..self.matched_indices.len() { + let idx = self.matched_indices[read]; + let haystack = haystacks[idx as usize].as_ref().as_bytes(); + + if haystack.len() < min_haystack_len { + continue; + } + + let (matched, skipped_chunks) = match max_typos { + Some(max) => self.matcher.prefilter.match_haystack(haystack, max), + None => (true, 0), + }; + if !matched { + continue; + } + + let trimmed = &haystack[skipped_chunks * 16..]; + if let Some(m) = self.matcher.smith_waterman_indices_one( + trimmed, + skipped_chunks, + idx, + skipped_chunks == 0, + ) { + self.matched_indices[write] = idx; + write += 1; + matches.push(m); + } + } + self.matched_indices.truncate(write); + + matches + } + + fn match_narrowed_indices_with_growth>( + &mut self, + haystacks: &[S], + ) -> Vec { + let mut matches = self.match_narrowed_indices_unsorted(haystacks); + + let prev_count = self.prev_haystack_count; + let matches_before_tail = matches.len(); + self.matcher + .match_list_indices_into(&haystacks[prev_count..], prev_count as u32, &mut matches); + self.matched_indices + .extend(matches[matches_before_tail..].iter().map(|m| m.index)); + + if self.matcher.config.sort { + matches.sort_unstable(); + } + + matches + } + + fn full_rescore_parallel + Sync>( + &self, + haystacks: &[S], + threads: usize, + ) -> Vec { + if haystacks.is_empty() { + return vec![]; + } + + let chunk_size = 512; + let num_chunks = haystacks.len().div_ceil(chunk_size); + let next_chunk = AtomicUsize::new(0); + let matcher = &self.matcher; + let config = &matcher.config; + + thread::scope(|s| { + let handles: Vec<_> = (0..threads) + .map(|_| { + s.spawn(|| { + let mut local_matches = Vec::new(); + let mut thread_matcher = matcher.clone(); + + loop { + let chunk_idx = next_chunk.fetch_add(1, Ordering::Relaxed); + if chunk_idx >= num_chunks { + break; + } + + let start = chunk_idx * chunk_size; + let end = (start + chunk_size).min(haystacks.len()); + + thread_matcher.match_list_into( + &haystacks[start..end], + start as u32, + &mut local_matches, + ); + } + + if config.sort { + local_matches.sort_unstable(); + } + + local_matches + }) + }) + .collect(); + + if config.sort { + handles + .into_iter() + .map(|h| h.join().unwrap()) + .kmerge() + .collect() + } else { + handles + .into_iter() + .flat_map(|h| h.join().unwrap()) + .collect() + } + }) + } + + fn match_narrowed_parallel + Sync>( + &mut self, + haystacks: &[S], + threads: usize, + ) -> Vec { + let mut new_tail_matches = Vec::new(); + let mut new_tail_indices = Vec::new(); + if haystacks.len() > self.prev_haystack_count { + let prev_count = self.prev_haystack_count; + self.matcher.match_list_into( + &haystacks[prev_count..], + prev_count as u32, + &mut new_tail_matches, + ); + new_tail_indices.extend(new_tail_matches.iter().map(|m| m.index)); + } + + if self.matched_indices.is_empty() { + self.matched_indices = new_tail_indices; + if self.matcher.config.sort { + new_tail_matches.sort_unstable(); + } + return new_tail_matches; + } + + let chunk_size = 512; + let num_chunks = self.matched_indices.len().div_ceil(chunk_size); + let next_chunk = AtomicUsize::new(0); + + let matched_indices = &self.matched_indices; + let matcher = &self.matcher; + let config = &matcher.config; + let max_typos = config.max_typos; + let needle_len = matcher.needle.len(); + let min_haystack_len = max_typos + .map(|max| needle_len.saturating_sub(max as usize)) + .unwrap_or(0); + + let (thread_matches, new_indices) = thread::scope(|s| { + let handles: Vec<_> = (0..threads) + .map(|_| { + s.spawn(|| { + let mut local_matches = Vec::new(); + let mut local_indices = Vec::new(); + let mut thread_matcher = matcher.clone(); + + loop { + let chunk_idx = next_chunk.fetch_add(1, Ordering::Relaxed); + if chunk_idx >= num_chunks { + break; + } + + let start = chunk_idx * chunk_size; + let end = (start + chunk_size).min(matched_indices.len()); + + for &idx in &matched_indices[start..end] { + let haystack = haystacks[idx as usize].as_ref().as_bytes(); + + if haystack.len() < min_haystack_len { + continue; + } + + let (matched, skipped_chunks) = match max_typos { + Some(max) => { + thread_matcher.prefilter.match_haystack(haystack, max) + } + None => (true, 0), + }; + if !matched { + continue; + } + + let trimmed = &haystack[skipped_chunks * 16..]; + if let Some(m) = thread_matcher.smith_waterman_one( + trimmed, + idx, + skipped_chunks == 0, + ) { + local_matches.push(m); + local_indices.push(idx); + } + } + } + + if config.sort { + local_matches.sort_unstable(); + } + + (local_matches, local_indices) + }) + }) + .collect(); + + let mut all_indices = Vec::new(); + let thread_matches = if config.sort { + let mut match_vecs = Vec::with_capacity(handles.len()); + for h in handles { + let (matches, indices) = h.join().unwrap(); + all_indices.extend(indices); + match_vecs.push(matches); + } + match_vecs.into_iter().kmerge().collect::>() + } else { + let mut all_matches = Vec::new(); + for h in handles { + let (matches, indices) = h.join().unwrap(); + all_indices.extend(indices); + all_matches.extend(matches); + } + all_matches + }; + (thread_matches, all_indices) + }); + + self.matched_indices = new_indices; + self.matched_indices.sort_unstable(); + self.matched_indices.extend(new_tail_indices); + + if new_tail_matches.is_empty() { + thread_matches + } else if config.sort { + thread_matches + .into_iter() + .merge(new_tail_matches) + .collect() + } else { + let mut result = thread_matches; + result.extend(new_tail_matches); + result + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::match_list; + + fn assert_match_parity(needle: &str, haystacks: &[&str], config: &Config) { + let expected = match_list(needle, haystacks, config); + let mut incr = IncrementalMatcher::new(config); + let actual = incr.match_list(needle, haystacks); + assert_eq!( + actual, expected, + "mismatch for needle {:?}: actual={:?}, expected={:?}", + needle, actual, expected + ); + } + + #[test] + fn incremental_matches_one_shot() { + let haystacks = [ + "fooBar", + "foo_bar", + "prelude", + "println!", + "fizzBuzz", + "format!", + ]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + for needle in ["f", "fo", "foo", "fooB", "fooBar"] { + let expected = match_list(needle, &haystacks, &config); + let actual = incr.match_list(needle, &haystacks); + assert_eq!(actual, expected, "mismatch for needle {:?}", needle); + } + } + + #[test] + fn prefix_extension_narrows() { + let haystacks = ["fooBar", "foo_bar", "prelude", "println!", "format!"]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + let m1 = incr.match_list("f", &haystacks); + assert!(!m1.is_empty()); + + let m2 = incr.match_list("fo", &haystacks); + assert!(m2.len() <= m1.len()); + + let m3 = incr.match_list("foo", &haystacks); + assert!(m3.len() <= m2.len()); + + for needle in ["f", "fo", "foo"] { + assert_match_parity(needle, &haystacks, &config); + } + } + + #[test] + fn non_prefix_change_full_rescore() { + let haystacks = ["fooBar", "barBaz", "bazQux"]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + let m1 = incr.match_list("foo", &haystacks); + let m2 = incr.match_list("bar", &haystacks); + + let expected = match_list("bar", &haystacks, &config); + assert_eq!(m2, expected); + assert_ne!( + m1.iter().map(|m| m.index).collect::>(), + m2.iter().map(|m| m.index).collect::>() + ); + } + + #[test] + fn backspace_uses_history() { + let haystacks = ["fooBar", "foo_bar", "fBaz", "format!", "prelude"]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + for needle in ["f", "fo", "foo"] { + let expected = match_list(needle, &haystacks, &config); + let actual = incr.match_list(needle, &haystacks); + assert_eq!(actual, expected, "forward mismatch for {:?}", needle); + } + + let m = incr.match_list("fo", &haystacks); + let expected = match_list("fo", &haystacks, &config); + assert_eq!(m, expected); + + let m = incr.match_list("f", &haystacks); + let expected = match_list("f", &haystacks, &config); + assert_eq!(m, expected); + } + + #[test] + fn backspace_then_retype() { + let haystacks = ["fooBar", "fobBaz", "format!", "prelude"]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + incr.match_list("f", &haystacks); + incr.match_list("fo", &haystacks); + incr.match_list("foo", &haystacks); + + let m = incr.match_list("fob", &haystacks); + let expected = match_list("fob", &haystacks, &config); + assert_eq!(m, expected); + } + + #[test] + fn multi_backspace() { + let haystacks = ["fooBarBaz", "fooBat", "fooXyz", "prelude"]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + for needle in ["f", "fo", "foo", "fooB", "fooBar"] { + incr.match_list(needle, &haystacks); + } + + let m = incr.match_list("fo", &haystacks); + let expected = match_list("fo", &haystacks, &config); + assert_eq!(m, expected); + + let m = incr.match_list("foo", &haystacks); + let expected = match_list("foo", &haystacks, &config); + assert_eq!(m, expected); + } + + #[test] + fn empty_needle_returns_all() { + let haystacks = ["foo", "bar", "baz"]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + let m = incr.match_list("", &haystacks); + assert_eq!(m.len(), 3); + for m in &m { + assert_eq!(m.score, 0); + } + } + + #[test] + fn haystack_growth() { + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + let haystacks_small: Vec<&str> = vec!["fooBar", "foo_bar", "prelude"]; + incr.match_list("f", &haystacks_small); + + let haystacks_big: Vec<&str> = vec!["fooBar", "foo_bar", "prelude", "format!", "fizz"]; + let m2 = incr.match_list("fo", &haystacks_big); + + let expected = match_list("fo", &haystacks_big, &config); + assert_eq!(m2, expected); + } + + #[test] + fn reset_forces_full_rescore() { + let haystacks = ["fooBar", "foo_bar"]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + incr.match_list("f", &haystacks); + incr.reset(); + let m = incr.match_list("fo", &haystacks); + let expected = match_list("fo", &haystacks, &config); + assert_eq!(m, expected); + } + + #[test] + fn max_typos_none() { + let haystacks = ["fooBar", "fxoBxr", "completely_different"]; + let config = Config { + max_typos: None, + ..Config::default() + }; + let mut incr = IncrementalMatcher::new(&config); + + for needle in ["f", "fo", "foo", "fooB"] { + let expected = match_list(needle, &haystacks, &config); + let actual = incr.match_list(needle, &haystacks); + assert_eq!(actual, expected, "mismatch for needle {:?}", needle); + } + } + + #[test] + fn max_typos_one() { + let haystacks = ["fooBar", "fxoBar", "fxxBar", "completely_different"]; + let config = Config { + max_typos: Some(1), + ..Config::default() + }; + let mut incr = IncrementalMatcher::new(&config); + + for needle in ["f", "fo", "foo", "fooB"] { + let expected = match_list(needle, &haystacks, &config); + let actual = incr.match_list(needle, &haystacks); + assert_eq!(actual, expected, "mismatch for needle {:?}", needle); + } + } + + #[test] + fn high_selectivity() { + let mut haystacks: Vec = (0..1000).map(|i| format!("item_{}", i)).collect(); + haystacks.push("fooBar".to_string()); + haystacks.push("fooBaz".to_string()); + + let refs: Vec<&str> = haystacks.iter().map(|s| s.as_str()).collect(); + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + for needle in ["f", "fo", "foo", "fooB", "fooBar"] { + let expected = match_list(needle, &refs, &config); + let actual = incr.match_list(needle, &refs); + assert_eq!(actual, expected, "mismatch for needle {:?}", needle); + } + } + + #[test] + fn low_selectivity() { + let haystacks: Vec = (0..100).map(|i| format!("foo_{}", i)).collect(); + let refs: Vec<&str> = haystacks.iter().map(|s| s.as_str()).collect(); + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + for needle in ["f", "fo", "foo", "foo_"] { + let expected = match_list(needle, &refs, &config); + let actual = incr.match_list(needle, &refs); + assert_eq!(actual, expected, "mismatch for needle {:?}", needle); + } + } + + #[test] + fn match_list_indices_parity() { + let haystacks = ["fooBar", "foo_bar", "prelude", "println!"]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + for needle in ["f", "fo", "foo", "fooB"] { + let expected = crate::match_list_indices(needle, &haystacks, &config); + let actual = incr.match_list_indices(needle, &haystacks); + assert_eq!( + actual.len(), + expected.len(), + "length mismatch for needle {:?}", + needle + ); + for (a, e) in actual.iter().zip(expected.iter()) { + assert_eq!(a.index, e.index, "index mismatch for needle {:?}", needle); + assert_eq!(a.score, e.score, "score mismatch for needle {:?}", needle); + assert_eq!(a.exact, e.exact, "exact mismatch for needle {:?}", needle); + assert_eq!( + a.indices, e.indices, + "indices mismatch for needle {:?}", + needle + ); + } + } + } + + #[test] + fn parallel_parity() { + let haystacks = [ + "fooBar", "foo_bar", "prelude", "println!", "format!", "fizzBuzz", + ]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + for needle in ["f", "fo", "foo", "fooB"] { + let expected = match_list(needle, &haystacks, &config); + let actual = incr.match_list_parallel(needle, &haystacks, 2); + assert_eq!(actual, expected, "parallel mismatch for needle {:?}", needle); + } + } + + #[test] + fn same_needle_full_rescore() { + let haystacks = ["fooBar", "foo_bar"]; + let config = Config::default(); + let mut incr = IncrementalMatcher::new(&config); + + let first = incr.match_list("foo", &haystacks); + let second = incr.match_list("foo", &haystacks); + assert_eq!(first, second); + } +} diff --git a/src/lib.rs b/src/lib.rs index 1265728..fa19022 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -116,11 +116,13 @@ use std::cmp::Ordering; use serde::{Deserialize, Serialize}; mod r#const; +mod incremental; mod one_shot; pub mod prefilter; mod simd; pub mod smith_waterman; +pub use incremental::IncrementalMatcher; pub use one_shot::{Matcher, match_list, match_list_indices, match_list_parallel}; use r#const::*; diff --git a/src/smith_waterman/simd/alignment_iter.rs b/src/smith_waterman/simd/alignment_iter.rs index 13fd4c2..cadc9f1 100644 --- a/src/smith_waterman/simd/alignment_iter.rs +++ b/src/smith_waterman/simd/alignment_iter.rs @@ -116,20 +116,20 @@ impl<'a> Iterator for AlignmentPathIter<'a> { return None; } - if let Some(max_typos) = self.max_typos - && self.typo_count > max_typos - { - self.finished = true; - return Some(None); + if let Some(max_typos) = self.max_typos { + if self.typo_count > max_typos { + self.finished = true; + return Some(None); + } } // Must be moving up only (at left edge), or lost alignment if self.col_idx < 16 || self.score == 0 { - if let Some(max_typos) = self.max_typos - && (self.typo_count + self.row_idx as u16) > max_typos - { - self.finished = true; - return Some(None); + if let Some(max_typos) = self.max_typos { + if (self.typo_count + self.row_idx as u16) > max_typos { + self.finished = true; + return Some(None); + } } return None; }