diff --git a/.gitignore b/.gitignore index 01b6dd90..4a8f00b9 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,9 @@ codecov.json *.profraw benches/fixtures/*.txt + +# Profiling +profile.json.gz +perf.data.old +perf.data +flamegraph.svg diff --git a/Cargo.lock b/Cargo.lock index c3af2ae8..b597cf32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -810,7 +810,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -1189,9 +1189,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jiff" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c867c356cc096b33f4981825ab281ecba3db0acefe60329f044c1789d94c6543" +checksum = "b3e3d65f018c6ae946ab16e80944b97096ed73c35b221d1c478a6c81d8f57940" dependencies = [ "jiff-static", "log", @@ -1202,9 +1202,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5" +checksum = "a17c2b211d863c7fde02cbea8a3c1a439b98e109286554f2860bdded7ff83818" dependencies = [ "proc-macro2", "quote", @@ -1498,9 +1498,9 @@ dependencies = [ [[package]] name = "owo-colors" -version = "4.2.3" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" +checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" [[package]] name = "page_size" @@ -1727,9 +1727,9 @@ dependencies = [ [[package]] name = "pulldown-cmark" -version = "0.13.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" +checksum = "83c41efbf8f90ac44de7f3a868f0867851d261b56291732d0cbf7cceaaeb55a6" dependencies = [ "bitflags 2.11.0", "memchr", @@ -1992,7 +1992,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2205,6 +2205,7 @@ dependencies = [ "interprocess", "kanal", "log", + "memchr", "nix 0.31.1", "portable-pty", "rand 0.10.0", @@ -2314,7 +2315,7 @@ dependencies = [ "getrandom 0.4.1", "once_cell", "rustix", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -2937,7 +2938,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index fcd5df29..a3ca9c66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,11 @@ strip = true [profile.dist] inherits = "release" +[profile.release-debug] +inherits = "release" +debug = true +strip = false + [lib] name = "skim" @@ -57,6 +62,7 @@ thiserror = "=2.0.18" tempfile = "=3.25.0" crossterm = { version = ">=0.0.0", features = ["event-stream", "use-dev-tty", "libc"] } thread_local = "=1.1.9" +memchr = "=2.8.0" clap_complete_nushell = "=4.5.10" interprocess = { version = "=2.4.0", features = ["tokio"] } serde = { version = "=1.0.228", features = ["derive"] } @@ -95,3 +101,7 @@ harness = false [[bench]] name = "partial" harness = false + +[[bench]] +name = "matcher_micro" +harness = false diff --git a/README.md b/README.md index e042e6ff..7281dc0d 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ Skim provides a single executable called `sk`. Think of it as a smarter alternat * [Interactive mode](#interactive-mode) + [How does it work?](#how-does-it-work) * [Executing external programs](#executing-external-programs) + * [Algorithms](#algorithms) * [Preview Window](#preview-window) + [How does it work?](#how-does-it-work-1) * [Fields support](#fields-support) @@ -84,7 +85,7 @@ The skim project contains several components: | macOS | MacPorts | `sudo port install skim` | | Alpine | apk | `apk add skim` | | Arch | pacman | `pacman -S skim` | -| Fedora | COPR | see below | +| Fedora | COPR | see below | | Gentoo | Portage | `emerge --ask app-misc/skim` | | Guix | guix | `guix install skim` | | Void | XBPS | `xbps-install -S skim` | @@ -452,6 +453,14 @@ You can configure key bindings to start external processes without leaving Skim sk --bind 'f1:execute(less -f {}),ctrl-y:execute-silent(echo {} | pbcopy)+abort' ``` +## Algorithms + +Skim offers multiple algorithms, check the help or manpage for an exhaustive list. Among them are: +- `skim_v2`, the default algorithm, loosely based on `fzf`'s algorithm +- `frizbee`([crate](https://crates.io/frizbee), the typo-resistant algorithm used in the [blink.cmp](https://github.com/saghen/blink.cmp) neovim plugin +- `fzy`, based on [fzy](https://github.com/jhawthorn/fzy/)'s algorithm expanded for basic typo-resistance +- `arinae`, skim's newest algorithm, designed in-house with typo-resistance in mind, expanding on all the above to make typo-resistant matching feel more natural while keeping the per-item performance up to the best standards + ## Preview Window This is a great feature of fzf that skim borrows. For example, we use 'ag' to diff --git a/benches/filter.rs b/benches/filter.rs index cb7fd930..9b665b79 100644 --- a/benches/filter.rs +++ b/benches/filter.rs @@ -75,6 +75,27 @@ fn criterion_benchmark_10m(c: &mut Criterion) { Skim::run_with(opts, None) }); }); + c.bench_function("filter_10M_arinae", |b| { + b.iter(|| { + let opts = SkimOptionsBuilder::default() + .cmd("cat benches/fixtures/10M.txt") + .filter("test") + .algorithm(FuzzyAlgorithm::Arinae) + .build()?; + Skim::run_with(opts, None) + }); + }); + c.bench_function("filter_10M_arinae_typos", |b| { + b.iter(|| { + let opts = SkimOptionsBuilder::default() + .cmd("cat benches/fixtures/10M.txt") + .filter("test") + .typos(Typos::Smart) + .algorithm(FuzzyAlgorithm::Arinae) + .build()?; + Skim::run_with(opts, None) + }); + }); } fn criterion_benchmark_1m(c: &mut Criterion) { @@ -150,6 +171,27 @@ fn criterion_benchmark_1m(c: &mut Criterion) { Skim::run_with(opts, None) }); }); + c.bench_function("filter_1M_arinae", |b| { + b.iter(|| { + let opts = SkimOptionsBuilder::default() + .cmd("cat benches/fixtures/1M.txt") + .filter("test") + .algorithm(FuzzyAlgorithm::Arinae) + .build()?; + Skim::run_with(opts, None) + }); + }); + c.bench_function("filter_1M_arinae_typos", |b| { + b.iter(|| { + let opts = SkimOptionsBuilder::default() + .cmd("cat benches/fixtures/1M.txt") + .filter("test") + .typos(Typos::Smart) + .algorithm(FuzzyAlgorithm::Arinae) + .build()?; + Skim::run_with(opts, None) + }); + }); c.bench_function("filter_1M_andor", |b| { b.iter(|| { diff --git a/benches/matcher_micro.rs b/benches/matcher_micro.rs new file mode 100644 index 00000000..a7ea4917 --- /dev/null +++ b/benches/matcher_micro.rs @@ -0,0 +1,133 @@ +//! Microbenchmark that isolates the fuzzy matcher DP from all other overhead +//! (I/O, threading, sorting). + +use std::fs; + +use criterion::{Criterion, criterion_group, criterion_main}; + +use skim::CaseMatching; +use skim::fuzzy_matcher::FuzzyMatcher; +use skim::fuzzy_matcher::arinae::ArinaeMatcher; +use skim::fuzzy_matcher::frizbee::FrizbeeMatcher; +use skim::prelude::SkimMatcherV2; + +fn load_lines() -> Vec { + let data = fs::read_to_string("benches/fixtures/1M.txt").expect("1M.txt missing"); + data.lines().map(|l| l.to_string()).collect() +} + +fn bench_matcher(c: &mut Criterion) { + let lines = load_lines(); + + c.bench_function("micro_skim_v2", |b| { + let m = SkimMatcherV2::default().smart_case(); + b.iter(|| { + let mut count = 0u64; + for line in &lines { + if m.fuzzy_indices(line, "test").is_some() { + count += 1; + } + } + count + }); + }); + c.bench_function("micro_frizbee", |b| { + let m = FrizbeeMatcher::default().case(CaseMatching::Smart).max_typos(Some(0)); + b.iter(|| { + let mut count = 0u64; + for line in &lines { + if m.fuzzy_indices(line, "test").is_some() { + count += 1; + } + } + count + }); + }); + c.bench_function("micro_typos_frizbee", |b| { + let m = FrizbeeMatcher::default().case(CaseMatching::Smart).max_typos(Some(1)); + b.iter(|| { + let mut count = 0u64; + for line in &lines { + if m.fuzzy_indices(line, "test").is_some() { + count += 1; + } + } + count + }); + }); + c.bench_function("micro_arinae", |b| { + let m = ArinaeMatcher::new(CaseMatching::Smart, false); + b.iter(|| { + let mut count = 0u64; + for line in &lines { + if m.fuzzy_indices(line, "test").is_some() { + count += 1; + } + } + count + }); + }); + c.bench_function("micro_arinae_range", |b| { + let m = ArinaeMatcher::new(CaseMatching::Smart, false); + b.iter(|| { + let mut count = 0u64; + for line in &lines { + if m.fuzzy_match_range(line, "test").is_some() { + count += 1; + } + } + count + }); + }); + c.bench_function("micro_arinae_score", |b| { + let m = ArinaeMatcher::new(CaseMatching::Smart, false); + b.iter(|| { + let mut count = 0u64; + for line in &lines { + if m.fuzzy_match(line, "test").is_some() { + count += 1; + } + } + count + }); + }); + c.bench_function("micro_typos_arinae", |b| { + let m = ArinaeMatcher::new(CaseMatching::Smart, true); + b.iter(|| { + let mut count = 0u64; + for line in &lines { + if m.fuzzy_indices(line, "test").is_some() { + count += 1; + } + } + count + }); + }); + c.bench_function("micro_typos_arinae_range", |b| { + let m = ArinaeMatcher::new(CaseMatching::Smart, true); + b.iter(|| { + let mut count = 0u64; + for line in &lines { + if m.fuzzy_match_range(line, "test").is_some() { + count += 1; + } + } + count + }); + }); + c.bench_function("micro_typos_arinae_score", |b| { + let m = ArinaeMatcher::new(CaseMatching::Smart, true); + b.iter(|| { + let mut count = 0u64; + for line in &lines { + if m.fuzzy_match(line, "test").is_some() { + count += 1; + } + } + count + }); + }); +} + +criterion_group!(benches, bench_matcher); +criterion_main!(benches); diff --git a/benches/read_and_match.rs b/benches/read_and_match.rs index 67bbcc35..c7a402a5 100644 --- a/benches/read_and_match.rs +++ b/benches/read_and_match.rs @@ -28,6 +28,60 @@ fn criterion_benchmark(c: &mut Criterion) { b.to_async(rt) .iter(async || wait_until_done(SkimOptionsBuilder::default().query("test").build().unwrap()).await); }); + c.bench_function("query_frizbee", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(rt).iter(async || { + wait_until_done( + SkimOptionsBuilder::default() + .query("test") + .algorithm(FuzzyAlgorithm::Frizbee) + .no_typos(true) + .build() + .unwrap(), + ) + .await + }); + }); + c.bench_function("query_ari", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(rt).iter(async || { + wait_until_done( + SkimOptionsBuilder::default() + .query("test") + .algorithm(FuzzyAlgorithm::Arinae) + .no_typos(true) + .build() + .unwrap(), + ) + .await + }); + }); + c.bench_function("query_frizbee_typos", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(rt).iter(async || { + wait_until_done( + SkimOptionsBuilder::default() + .query("test") + .algorithm(FuzzyAlgorithm::Frizbee) + .build() + .unwrap(), + ) + .await + }); + }); + c.bench_function("query_ari_typos", |b| { + let rt = tokio::runtime::Runtime::new().unwrap(); + b.to_async(rt).iter(async || { + wait_until_done( + SkimOptionsBuilder::default() + .query("test") + .algorithm(FuzzyAlgorithm::Arinae) + .build() + .unwrap(), + ) + .await + }); + }); c.bench_function("typing", |b| { let rt = tokio::runtime::Runtime::new().unwrap(); b.to_async(rt).iter(async || { @@ -46,7 +100,7 @@ fn criterion_benchmark(c: &mut Criterion) { } else { done_since = 1; } - if sent && done_since > 5 { + if sent && done_since > 50 { s.send(Event::Action(Action::Accept(None))).await?; } else if !sent { s.send(Event::Action(Action::AddChar('t'))).await?; diff --git a/man/man1/sk.1 b/man/man1/sk.1 index d94bb306..1d5ef768 100644 --- a/man/man1/sk.1 +++ b/man/man1/sk.1 @@ -100,7 +100,9 @@ clangd: Clangd fuzzy matching algorithm .IP \(bu 2 fzy: Fzy matching algorithm (https://github.com/jhawthorn/fzy) .IP \(bu 2 -frizbee: Frizbee matching algorithm, typo resistant Will fallback to SkimV2 if the feature is not enabled +frizbee: Frizbee matching algorithm, typo resistant +.IP \(bu 2 +arinae: Arinae: typo\-resistant & natural algorithm .RE .TP \fB\-\-case\fR \fI\fR [default: smart] diff --git a/shell/completion.bash b/shell/completion.bash index d1b3daff..7cb1e7f7 100644 --- a/shell/completion.bash +++ b/shell/completion.bash @@ -62,7 +62,7 @@ _sk() { return 0 ;; --algo) - COMPREPLY=($(compgen -W "skim_v1 skim_v2 clangd fzy frizbee" -- "${cur}")) + COMPREPLY=($(compgen -W "skim_v1 skim_v2 clangd fzy frizbee arinae" -- "${cur}")) return 0 ;; --case) @@ -250,7 +250,7 @@ _sk() { return 0 ;; --flags) - COMPREPLY=($(compgen -W "no-preview-pty" -- "${cur}")) + COMPREPLY=($(compgen -W "no-preview-pty show-score" -- "${cur}")) return 0 ;; --hscroll-off) diff --git a/shell/completion.fish b/shell/completion.fish index 3296067b..eed04374 100644 --- a/shell/completion.fish +++ b/shell/completion.fish @@ -16,7 +16,8 @@ complete -c sk -l algo -d 'Fuzzy matching algorithm' -r -f -a "skim_v1\t'Origina skim_v2\t'Improved skim fuzzy matching algorithm (v2, default)' clangd\t'Clangd fuzzy matching algorithm' fzy\t'Fzy matching algorithm (https://github.com/jhawthorn/fzy)' -frizbee\t'Frizbee matching algorithm, typo resistant Will fallback to SkimV2 if the feature is not enabled'" +frizbee\t'Frizbee matching algorithm, typo resistant' +arinae\t'Arinae: typo-resistant & natural algorithm'" complete -c sk -l case -d 'Case sensitivity' -r -f -a "respect\t'Case-sensitive matching' ignore\t'Case-insensitive matching' smart\t'Smart case: case-insensitive unless query contains uppercase'" @@ -80,7 +81,8 @@ complete -c sk -l listen -d 'Run an IPC socket with optional name (defaults to s complete -c sk -l remote -d 'Send commands to an IPC socket with optional name (defaults to sk)' -r complete -c sk -l tmux -d 'Run in a tmux popup' -r complete -c sk -l log-file -d 'Pipe log output to a file' -r -complete -c sk -l flags -d 'Feature flags' -r -f -a "no-preview-pty\t'Disable preview PTY on linux'" +complete -c sk -l flags -d 'Feature flags' -r -f -a "no-preview-pty\t'Disable preview PTY on linux' +show-score\t'Display the item\'s match score before its value in the item list (for matcher debugging)'" complete -c sk -l hscroll-off -r complete -c sk -l jump-labels -r complete -c sk -l scheme -r diff --git a/shell/completion.nu b/shell/completion.nu index 4f4302b3..a94c4af6 100644 --- a/shell/completion.nu +++ b/shell/completion.nu @@ -5,7 +5,7 @@ module completions { } def "nu-complete sk algorithm" [] { - [ "skim_v1" "skim_v2" "clangd" "fzy" "frizbee" ] + [ "skim_v1" "skim_v2" "clangd" "fzy" "frizbee" "arinae" ] } def "nu-complete sk case" [] { @@ -29,7 +29,7 @@ module completions { } def "nu-complete sk flags" [] { - [ "no-preview-pty" ] + [ "no-preview-pty" "show-score" ] } # Fuzzy Finder in rust! diff --git a/shell/completion.zsh b/shell/completion.zsh index 85618263..c7a8e670 100644 --- a/shell/completion.zsh +++ b/shell/completion.zsh @@ -27,7 +27,8 @@ _sk() { skim_v2\:"Improved skim fuzzy matching algorithm (v2, default)" clangd\:"Clangd fuzzy matching algorithm" fzy\:"Fzy matching algorithm (https\://github.com/jhawthorn/fzy)" -frizbee\:"Frizbee matching algorithm, typo resistant Will fallback to SkimV2 if the feature is not enabled"))' \ +frizbee\:"Frizbee matching algorithm, typo resistant" +arinae\:"Arinae\: typo-resistant & natural algorithm"))' \ '--case=[Case sensitivity]:CASE:((respect\:"Case-sensitive matching" ignore\:"Case-insensitive matching" smart\:"Smart case\: case-insensitive unless query contains uppercase"))' \ @@ -83,7 +84,8 @@ zsh\:"Zsh"))' \ '--remote=[Send commands to an IPC socket with optional name (defaults to sk)]::REMOTE:_default' \ '--tmux=[Run in a tmux popup]::TMUX:_default' \ '--log-file=[Pipe log output to a file]:LOG_FILE:_default' \ -'*--flags=[Feature flags]:FLAGS:((no-preview-pty\:"Disable preview PTY on linux"))' \ +'*--flags=[Feature flags]:FLAGS:((no-preview-pty\:"Disable preview PTY on linux" +show-score\:"Display the item'\''s match score before its value in the item list (for matcher debugging)"))' \ '--hscroll-off=[]:HSCROLL_OFF:_default' \ '--jump-labels=[]:JUMP_LABELS:_default' \ '--scheme=[]:SCHEME:_default' \ diff --git a/src/engine/andor.rs b/src/engine/andor.rs index 34d8c75b..bf6b06b1 100644 --- a/src/engine/andor.rs +++ b/src/engine/andor.rs @@ -1,5 +1,6 @@ use std::fmt::{Display, Error, Formatter}; +use crate::fuzzy_matcher::MatchIndices; use crate::{MatchEngine, MatchRange, MatchResult, SkimItem}; //------------------------------------------------------------------------------ @@ -70,7 +71,7 @@ impl AndEngine { } fn merge_matched_items(&self, items: Vec, text: &str) -> MatchResult { - let mut ranges = vec![]; + let mut ranges = MatchIndices::new(); let mut rank = items[0].rank; for item in items { match item.matched_range { @@ -78,7 +79,7 @@ impl AndEngine { ranges.extend(item.range_char_indices(text)); } MatchRange::Chars(vec) => { - ranges.extend(vec.iter()); + ranges.extend(vec.iter().copied()); } } rank.score = rank.score.max(item.rank.score); diff --git a/src/engine/factory.rs b/src/engine/factory.rs index 57e6087e..a6a0bdcb 100644 --- a/src/engine/factory.rs +++ b/src/engine/factory.rs @@ -19,6 +19,7 @@ pub struct ExactOrFuzzyEngineFactory { fuzzy_algorithm: FuzzyAlgorithm, rank_builder: Arc, typos: Typos, + filter_mode: bool, } impl ExactOrFuzzyEngineFactory { @@ -29,6 +30,7 @@ impl ExactOrFuzzyEngineFactory { fuzzy_algorithm: FuzzyAlgorithm::SkimV2, rank_builder: Default::default(), typos: Typos::Disabled, + filter_mode: false, } } @@ -60,6 +62,12 @@ impl ExactOrFuzzyEngineFactory { self } + /// Sets filter mode (skips per-character match indices for faster matching) + pub fn filter_mode(mut self, filter_mode: bool) -> Self { + self.filter_mode = filter_mode; + self + } + /// Builds the factory (currently a no-op, returns self) pub fn build(self) -> Self { self @@ -126,6 +134,7 @@ impl MatchEngineFactory for ExactOrFuzzyEngineFactory { .algorithm(self.fuzzy_algorithm) .case(case) .typos(self.typos) + .filter_mode(self.filter_mode) .rank_builder(self.rank_builder.clone()) .build(), ) diff --git a/src/engine/fuzzy.rs b/src/engine/fuzzy.rs index e8d364af..964b5be8 100644 --- a/src/engine/fuzzy.rs +++ b/src/engine/fuzzy.rs @@ -2,10 +2,10 @@ use std::cmp::min; use std::fmt::{Display, Error, Formatter}; use std::sync::Arc; +use crate::fuzzy_matcher::MatchIndices; +use crate::fuzzy_matcher::arinae::ArinaeMatcher; use crate::fuzzy_matcher::frizbee::FrizbeeMatcher; -use crate::fuzzy_matcher::{ - FuzzyMatcher, IndexType, ScoreType, clangd::ClangdMatcher, fzy::FzyMatcher, skim::SkimMatcherV2, -}; +use crate::fuzzy_matcher::{FuzzyMatcher, clangd::ClangdMatcher, fzy::FzyMatcher, skim::SkimMatcherV2}; use crate::item::RankBuilder; use crate::{CaseMatching, MatchEngine, Typos}; @@ -27,8 +27,10 @@ pub enum FuzzyAlgorithm { /// Fzy matching algorithm (https://github.com/jhawthorn/fzy) Fzy, /// Frizbee matching algorithm, typo resistant - /// Will fallback to SkimV2 if the feature is not enabled Frizbee, + /// Arinae: typo-resistant & natural algorithm + #[cfg_attr(feature = "cli", clap(alias = "ari"))] + Arinae, } const BYTES_1M: usize = 1024 * 1024 * 1024; @@ -46,6 +48,10 @@ pub struct FuzzyEngineBuilder { /// - `Typos::Smart`: adaptive (pattern_length / 4) /// - `Typos::Fixed(n)`: exactly n typos allowed typos: Typos, + /// When true, use `fuzzy_match_range` instead of `fuzzy_indices` to avoid + /// per-character index computation (useful in filter mode where highlighting + /// is not needed). + filter_mode: bool, } impl FuzzyEngineBuilder { @@ -74,6 +80,11 @@ impl FuzzyEngineBuilder { self } + pub fn filter_mode(mut self, filter_mode: bool) -> Self { + self.filter_mode = filter_mode; + self + } + /// Compute the effective max_typos for the given query. /// /// - `Typos::Disabled` → `None` (no typo tolerance) @@ -129,12 +140,20 @@ impl FuzzyEngineBuilder { debug!("Initialized Fzy algorithm (max_typos: {:?})", max_typos); Box::new(matcher) } + FuzzyAlgorithm::Arinae => { + let mut matcher = ArinaeMatcher::default(); + matcher.case = self.case; + matcher.allow_typos = !matches!(self.typos, Typos::Disabled); + debug!("Initialized Arinae algorithm"); + Box::new(matcher) + } }; FuzzyEngine { matcher, query: self.query, rank_builder: self.rank_builder, + filter_mode: self.filter_mode, } } } @@ -144,6 +163,7 @@ pub struct FuzzyEngine { query: String, matcher: Box, rank_builder: Arc, + filter_mode: bool, } impl FuzzyEngine { @@ -151,58 +171,92 @@ impl FuzzyEngine { pub fn builder() -> FuzzyEngineBuilder { FuzzyEngineBuilder::default() } - - fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { - if pattern.is_empty() { - return Some((0, Vec::new())); - } else if choice.is_empty() { - return None; - } - - self.matcher.fuzzy_indices(choice, pattern) - } } impl MatchEngine for FuzzyEngine { fn match_item(&self, item: &dyn SkimItem) -> Option { - // iterate over all matching fields: - let mut matched_result = None; let item_text = item.text(); let default_range = [(0, item_text.len())]; - for &(start, end) in item.get_matching_ranges().unwrap_or(&default_range) { - let start = min(start, item_text.len()); - let end = min(end, item_text.len()); - matched_result = self.fuzzy_match(&item_text[start..end], &self.query).map(|(s, vec)| { - if start != 0 { - let start_char = &item_text[..start].chars().count(); - (s, vec.iter().map(|x| x + start_char).collect()) + + if self.filter_mode { + // Fast path: use fuzzy_match_range to avoid per-character index computation + let mut best: Option<(i64, usize, usize)> = None; + for &(start, end) in item.get_matching_ranges().unwrap_or(&default_range) { + let start = min(start, item_text.len()); + let end = min(end, item_text.len()); + + let result = if self.query.is_empty() { + Some((0i64, 0, 0)) + } else if item_text[start..end].is_empty() { + None } else { - (s, vec) - } - }); + self.matcher + .fuzzy_match_range(&item_text[start..end], &self.query) + .map(|(s, b, e)| { + let offset = if start != 0 { + item_text[..start].chars().count() + } else { + 0 + }; + (s, b + offset, e + offset) + }) + }; - if matched_result.is_some() { - break; + if result.is_some() { + best = result; + break; + } } - } - let (score, matched_range) = matched_result?; + let (score, begin, end) = best?; + let item_len = item_text.len(); + Some(MatchResult { + rank: self + .rank_builder + .build_rank(score as i32, begin, end, item_len, item.get_index()), + matched_range: MatchRange::ByteRange(begin, end), + }) + } else { + let mut matched_result = None; + for &(start, end) in item.get_matching_ranges().unwrap_or(&default_range) { + let start = min(start, item_text.len()); + let end = min(end, item_text.len()); + + let result = if self.query.is_empty() { + Some((0i64, MatchIndices::new())) + } else if item_text[start..end].is_empty() { + None + } else { + self.matcher.fuzzy_indices(&item_text[start..end], &self.query) + }; - let begin = *matched_range.first().unwrap_or(&0); - let end = *matched_range.last().unwrap_or(&0); + matched_result = result.map(|(s, vec)| { + if start != 0 { + let start_char = item_text[..start].chars().count(); + (s, vec.iter().map(|x| x + start_char).collect::()) + } else { + (s, vec) + } + }); - let item_len = item_text.len(); + if matched_result.is_some() { + break; + } + } - // Use individual character indices for highlighting instead of byte range - // This allows each matched character to be highlighted individually - let matched_range = MatchRange::Chars(matched_range); + let (score, matched_indices) = matched_result?; + let begin = *matched_indices.first().unwrap_or(&0); + let end = *matched_indices.last().unwrap_or(&0); + let item_len = item_text.len(); + let matched_range = MatchRange::Chars(matched_indices); - Some(MatchResult { - rank: self - .rank_builder - .build_rank(score as i32, begin, end, item_len, item.get_index()), - matched_range, - }) + Some(MatchResult { + rank: self + .rank_builder + .build_rank(score as i32, begin, end, item_len, item.get_index()), + matched_range, + }) + } } } diff --git a/src/engine/split.rs b/src/engine/split.rs index 068bd679..cca745c5 100644 --- a/src/engine/split.rs +++ b/src/engine/split.rs @@ -3,6 +3,7 @@ //! This engine splits both the query and item text on a delimiter character, then matches //! the query parts against the corresponding item parts. +use crate::fuzzy_matcher::MatchIndices; use crate::{MatchEngine, MatchEngineFactory, MatchRange, MatchResult, SkimItem}; use std::fmt::{Display, Error, Formatter}; @@ -51,7 +52,7 @@ impl MatchEngine for SplitMatchEngine { // Combine the results - use rank from first result (like AndEngine does) let rank = before_result.rank; - let mut combined_indices: Vec = match before_result.matched_range { + let mut combined_indices: MatchIndices = match before_result.matched_range { MatchRange::Chars(indices) => indices, MatchRange::ByteRange(start, end) => { // Convert byte range to char indices for the before part @@ -67,7 +68,7 @@ impl MatchEngine for SplitMatchEngine { // Offset for the "after" part: delimiter_char_idx + 1 (to skip the delimiter) let offset = delimiter_char_idx + 1; - let after_indices: Vec = match after_result.matched_range { + let after_indices: MatchIndices = match after_result.matched_range { MatchRange::Chars(indices) => indices.into_iter().map(|i| i + offset).collect(), MatchRange::ByteRange(start, end) => { // Convert byte range to char indices for the after part diff --git a/src/engine/util.rs b/src/engine/util.rs index eb42f9b9..c2ca7d3c 100644 --- a/src/engine/util.rs +++ b/src/engine/util.rs @@ -1,3 +1,4 @@ +use crate::fuzzy_matcher::MatchIndices; use regex::Regex; use unicode_normalization::UnicodeNormalization; @@ -26,7 +27,7 @@ pub fn normalize_with_char_mapping(s: &str) -> (String, Vec) { /// /// Given indices into a normalized string and the char mapping from normalize_with_char_mapping, /// returns the corresponding indices in the original string. -pub fn map_char_indices_to_original(normalized_indices: &[usize], char_mapping: &[usize]) -> Vec { +pub fn map_char_indices_to_original(normalized_indices: &[usize], char_mapping: &[usize]) -> MatchIndices { normalized_indices .iter() .filter_map(|&idx| char_mapping.get(idx).copied()) diff --git a/src/fuzzy_matcher/arinae/algo.rs b/src/fuzzy_matcher/arinae/algo.rs new file mode 100644 index 00000000..9df907a9 --- /dev/null +++ b/src/fuzzy_matcher/arinae/algo.rs @@ -0,0 +1,573 @@ +//! Arinae's algo itself + +use std::cell::RefCell; + +use thread_local::ThreadLocal; + +use crate::fuzzy_matcher::{IndexType, MatchIndices}; + +use super::banding::{compute_banding, typo_vband_row}; +use super::constants::*; +use super::{Atom, CELL_ZERO, Cell, Dir, SWMatrix, Score}; + +/// Core cell scoring kernel shared by both score-only and full DP. +/// +/// Computes the best score and direction for a single DP cell from its +/// three neighbours (diagonal, up, left). The caller is responsible for +/// fetching the neighbour values from whatever storage layout it uses. +/// +/// Returns `(best_score, direction)`. The direction is `Dir::None` when +/// `best_score <= 0`. +/// +/// This function is written in a branchless style: all scoring arithmetic +/// uses `bool as Score` multipliers and `max` instead of if/else, and the +/// final direction is selected via a branchless cascade of conditional moves. +#[inline(always)] +#[allow(clippy::too_many_arguments)] +fn compute_cell( + is_match: bool, + is_first: bool, + bonus_j: Score, + diag_score: Score, + diag_was_diag: bool, + up_score: Score, + left_score: Score, + left_was_diag: bool, +) -> (Score, Dir) { + // --- Bonus (branchless) --- + // consecutive bonus added when diag_was_diag, first-char multiplier doubles the bonus. + // `bool as Score` is 0 or 1 — no branch. + let bonus = (bonus_j + CONSECUTIVE_BONUS * (diag_was_diag as Score)) * (1 + is_first as Score); + + // --- DIAGONAL (branchless) --- + // Match path: diag_score + MATCH_BONUS + bonus, masked by is_match. + // Mismatch path (typos only): diag_score - MISMATCH_PENALTY, masked by !is_match. + let match_val = (diag_score + MATCH_BONUS + bonus) * (is_match as Score); + let mismatch_val = if ALLOW_TYPOS { + (diag_score - MISMATCH_PENALTY) * (!is_match as Score) + } else { + 0 + }; + let diag_val = match_val + mismatch_val; + + // --- UP (skip pattern char, typos only — const-generic elides entirely) --- + let up_val = if ALLOW_TYPOS { up_score - TYPO_PENALTY } else { 0 }; + + // --- LEFT (skip choice char, branchless gap penalty) --- + // GAP_OPEN when left_was_diag, GAP_EXTEND otherwise. + // pen = GAP_EXTEND + (GAP_OPEN - GAP_EXTEND) * left_was_diag + let left_val = left_score - (GAP_EXTEND + (GAP_OPEN - GAP_EXTEND) * (left_was_diag as Score)); + + // --- Best score (branchless max chain) --- + let best = diag_val.max(up_val).max(left_val); + + // --- Direction (branchless select) --- + // We encode direction as a u8 and build it without branches. + // Priority: Diag > Up > Left > None (when best <= 0). + // + // Start with Left (2), override with Up if up wins, override with Diag + // if diag wins, override with None if best <= 0. + // For exact mode (ALLOW_TYPOS=false), Diag is only valid when is_match. + let diag_wins = if ALLOW_TYPOS { + diag_val >= up_val && diag_val >= left_val + } else { + is_match && diag_val >= left_val + }; + let up_wins = ALLOW_TYPOS && !diag_wins && up_val >= left_val; + + // Branchless cascade: select dir as integer. + // Dir encoding: None=0, Diag=1, Up=2, Left=3. + // Base is Left(3); subtract 1 if Up wins, subtract 2 if Diag wins. + let dir_bits: u8 = Dir::Left as u8 - (up_wins as u8) - (diag_wins as u8) * 2; + // If best <= 0, force Dir::None (0) — achieved by ANDing with all-zeros. + let positive = best > 0; + // When positive: dir_bits; when not: 0 (Dir::None). + let dir_val = dir_bits & (positive as u8).wrapping_neg(); + + // SAFETY: dir_val is in 0..=3 because of the construction above. + let dir: Dir = unsafe { std::mem::transmute(dir_val) }; + + (best, dir) +} + +// --------------------------------------------------------------------------- +// Full DP with traceback — packed Cell (u32 = score + dir) +// --------------------------------------------------------------------------- + +/// Full DP for byte slices using packed cells. +/// +/// Implements two pruning strategies: +/// +/// 1. **Row-range banding** – for each row `i` only compute columns +/// `j_lo..=j_hi` that can participate in a valid alignment. +/// - Exact mode: bounded by precomputed first/last match columns. +/// - Typo mode: bounded by diagonal ± bandwidth. +/// +/// 2. **Interpair max-score pruning** – after processing a row, if no +/// column produced a non-zero score, all active alignments for this +/// and subsequent rows are dead (since UP/LEFT can only propagate +/// existing scores). We track this and allow early termination. +pub(super) fn full_dp( + cho: &[C], + pat: &[C], + bonuses: &[Score], + respect_case: bool, + full_buf: &ThreadLocal>, + indices_buf: &ThreadLocal>, +) -> Option<(Score, MatchIndices)> { + let n = pat.len(); + let m = cho.len(); + + let banding = compute_banding::(pat, cho, respect_case)?; + let j_start = banding.j_first; // earliest match — skip columns before this + + // Column offset: the matrix stores only columns from j_start onward. + // Matrix column 0 is the left wall (all zeros); matrix column `jm` + // corresponds to original 1-indexed column `j = jm + j_start - 1`. + let col_off = j_start - 1; // subtract from original j to get matrix col + let mcols = m - col_off + 1; // matrix columns: 0 ..= (m - col_off) + + let mut buf = full_buf + .get_or(|| RefCell::new(SWMatrix::zero(n + 1, mcols))) + .borrow_mut(); + buf.resize(n + 1, mcols); + + // Hoist pointer and stride before initialization to use raw access. + let base_ptr = buf.data.as_mut_ptr(); + let cols = buf.cols; + + // Initialize row 0 to CELL_ZERO (all-zero bytes: score=0, dir=None=0). + // Column 0 of each subsequent row is also CELL_ZERO. + // SAFETY: base_ptr points to a valid allocation of (n+1)*cols Cells. + unsafe { + // Row 0: mcols contiguous Cells starting at base_ptr. + std::ptr::write_bytes(base_ptr, 0, mcols); + // Column 0 of rows 1..=n: one Cell per row, stride = cols. + for i in 1..=n { + *base_ptr.add(i * cols) = CELL_ZERO; + } + } + + // base_ptr and cols already set above + + // Pre-extract row bounds once (avoids repeated unwrap inside the loop). + // For exact mode we copy the arrays out; for typo mode these are unused. + let (row_lo_arr, row_hi_arr) = if !ALLOW_TYPOS { + let (lo, hi) = banding.row_bounds.as_ref().unwrap(); + (*lo, *hi) + } else { + ([0usize; MAX_PAT_LEN], [0usize; MAX_PAT_LEN]) + }; + + // Hoist invariant pointers outside the row loop. + let cho_ptr = cho.as_ptr(); + let bonuses_ptr = bonuses.as_ptr(); + + for i in 1..=n { + let pi = pat[i - 1]; + let is_first = i == 1; + + // --- Compute column bounds for this row (original 1-indexed space) --- + let (j_lo, j_hi) = typo_vband_row(i, m, banding.bandwidth, banding.j_first); + + if j_lo > j_hi || j_lo > m { + // Entire row is outside the band. Only zero the cells the next + // row's Diag (reads [i][jm-1]) and Up (reads [i][jm]) will touch. + // Peek at the next row's bounds to limit work. + if i < n { + let (nj_lo, nj_hi) = if ALLOW_TYPOS { + typo_vband_row(i + 1, m, banding.bandwidth, banding.j_first) + } else { + (row_lo_arr[i], row_hi_arr[i]) + }; + let nj_lo = nj_lo.max(j_start); + if nj_lo <= nj_hi && nj_lo <= m { + let njm_lo = nj_lo - col_off; + let njm_hi = (nj_hi - col_off).min(mcols - 1); + // Diag reads jm-1, Up reads jm → need [njm_lo-1 .. njm_hi]. + let zero_lo = njm_lo.saturating_sub(1); + let zero_hi = njm_hi.min(mcols - 1); + // SAFETY: row i is within the allocated matrix. + unsafe { + let row_ptr = base_ptr.add(i * cols); + for k in zero_lo..=zero_hi { + *row_ptr.add(k) = CELL_ZERO; + } + } + } + } + continue; + } + + // Convert to matrix-local column indices (safe: j_lo >= j_start here). + let jm_lo = j_lo - col_off; + let jm_hi = j_hi - col_off; + let jm_max = mcols - 1; // last valid matrix column + + // Zero only the boundary cells that Diag/Left/Up moves will read: + // - Cell at jm_lo-1: read by Left at jm_lo and Diag from next row. + // - Cell at jm_hi+1: read by Up from next row at jm_hi+1 (if in next band). + // SAFETY: indices are within the row's allocation. + unsafe { + let row_ptr = base_ptr.add(i * cols); + if jm_lo > 1 { + *row_ptr.add(jm_lo - 1) = CELL_ZERO; + } + if jm_hi < jm_max { + *row_ptr.add(jm_hi + 1) = CELL_ZERO; + } + } + + // Get prev_row as immutable slice, cur_row as mutable slice. + // SAFETY: i >= 1 so rows i-1 and i are distinct; each row is + // cols-aligned inside the contiguous data vec. base_ptr/cols are + // hoisted outside the loop. + let (prev_row, cur_row) = unsafe { + let pr = std::slice::from_raw_parts(base_ptr.add((i - 1) * cols), cols); + let cr = std::slice::from_raw_parts_mut(base_ptr.add(i * cols), cols); + (pr, cr) + }; + + // Hoist raw pointers for unchecked access inside the hot loop. + let prev_ptr = prev_row.as_ptr(); + let cur_ptr = cur_row.as_mut_ptr(); + + for j in j_lo..=j_hi { + let jm = j - col_off; // matrix column + // SAFETY: j and jm are inside the band and within array bounds. + let cj = unsafe { *cho_ptr.add(j - 1) }; + let is_match = pi.eq(cj, respect_case); + + // Fetch neighbour values from the matrix. + let diag_cell = unsafe { *prev_ptr.add(jm - 1) }; + let up_score = if ALLOW_TYPOS { + let up_cell = unsafe { *prev_ptr.add(jm) }; + up_cell.score() + } else { + 0 + }; + let left_cell = unsafe { *cur_ptr.add(jm - 1) }; + + let (best, dir) = compute_cell::( + is_match, + is_first, + unsafe { *bonuses_ptr.add(j - 1) }, + diag_cell.score(), + diag_cell.is_diag(), + up_score, + left_cell.score(), + left_cell.is_diag(), + ); + + unsafe { + *cur_ptr.add(jm) = Cell::new(best, dir); + } + } + } + + // --- Find best score in the last row (row n) --- + // Moved out of the inner loop to eliminate the `i == n` branch per cell. + let mut best_score: Score = 0; + let mut best_j = 0usize; // stored in original 1-indexed space + { + let (last_j_lo, last_j_hi) = if ALLOW_TYPOS { + typo_vband_row(n, m, banding.bandwidth, banding.j_first) + } else { + (row_lo_arr[n - 1], row_hi_arr[n - 1]) + }; + let last_j_lo = last_j_lo.max(j_start); + if last_j_lo <= last_j_hi && last_j_lo <= m { + let last_row_ptr = unsafe { base_ptr.add(n * cols) }; + for j in last_j_lo..=last_j_hi { + let jm = j - col_off; + let s = unsafe { (*last_row_ptr.add(jm)).score() }; + // Branchless max: update best_score and best_j together. + let better = s > best_score; + // Use conditional moves instead of a branch. + best_score = if better { s } else { best_score }; + best_j = if better { j } else { best_j }; + } + } + } + + if best_score <= 0 { + return None; + } + + if COMPUTE_INDICES { + // Traceback — j walks in original 1-indexed space, convert to matrix + // column for buf access; output indices in original 0-indexed space. + // Reuse a thread-local Vec to avoid per-call allocation. + let indices_ref_cell = indices_buf.get_or(|| RefCell::new(Vec::new())); + let mut indices_ref = indices_ref_cell.borrow_mut(); + indices_ref.clear(); + let mut i = n; + let mut j = best_j; + let mut true_matches = 0usize; + + while i > 0 && j >= j_start { + let jm = j - col_off; + // SAFETY: jm and i are within the matrix bounds established above. + let c = unsafe { *base_ptr.add(i * cols).add(jm) }; + match c.dir() { + Dir::Diag => { + if pat[i - 1].eq(cho[j - 1], respect_case) { + indices_ref.push((j - 1) as IndexType); + true_matches += 1; + } + i -= 1; + j -= 1; + } + Dir::Up => { + i -= 1; + } + Dir::Left => { + j -= 1; + } + Dir::None => break, + } + } + + if true_matches < banding.min_true_matches { + return None; + } + + // Traceback produces indices in reverse order; reverse is O(n) + // vs sort_unstable's O(n log n). + indices_ref.reverse(); + + // Move ownership out of the thread-local buffer by cloning the vec's + // contents into a fresh Vec (cheap since MatchIndices is Vec), + // but avoid an extra clone by using `to_vec()` which reallocates once. + let out = indices_ref.to_vec(); + Some((best_score, out)) + } else { + Some((best_score, Vec::default())) + } +} + +// --------------------------------------------------------------------------- +// Range DP — full matrix, minimal traceback (begin + end only) +// --------------------------------------------------------------------------- + +/// Full matrix DP followed by a traceback that only records the first and +/// last matched positions (not every index). Used by `fuzzy_match_range` to +/// avoid allocating and populating the full index vec when only the span is +/// needed. +pub(super) fn range_dp( + cho: &[C], + pat: &[C], + bonuses: &[Score], + respect_case: bool, + full_buf: &ThreadLocal>, +) -> Option<(Score, usize, usize)> { + let n = pat.len(); + let m = cho.len(); + + let banding = compute_banding::(pat, cho, respect_case)?; + let j_start = banding.j_first; + let col_off = j_start - 1; + let mcols = m - col_off + 1; + + let mut buf = full_buf + .get_or(|| RefCell::new(SWMatrix::zero(n + 1, mcols))) + .borrow_mut(); + buf.resize(n + 1, mcols); + + let base_ptr = buf.data.as_mut_ptr(); + let cols = buf.cols; + + // Initialize row 0 to CELL_ZERO (all-zero bytes: score=0, dir=None=0). + // Column 0 of each subsequent row is also CELL_ZERO. + // SAFETY: base_ptr points to a valid allocation of (n+1)*cols Cells. + unsafe { + std::ptr::write_bytes(base_ptr, 0, mcols); + for i in 1..=n { + *base_ptr.add(i * cols) = CELL_ZERO; + } + } + + let (row_lo_arr, row_hi_arr) = if !ALLOW_TYPOS { + let (lo, hi) = banding.row_bounds.as_ref().unwrap(); + (*lo, *hi) + } else { + ([0usize; MAX_PAT_LEN], [0usize; MAX_PAT_LEN]) + }; + + let cho_ptr = cho.as_ptr(); + let bonuses_ptr = bonuses.as_ptr(); + let mut dead_rows = 0u32; + + for i in 1..=n { + let pi = pat[i - 1]; + let is_first = i == 1; + + let (j_lo, j_hi) = if ALLOW_TYPOS { + typo_vband_row(i, m, banding.bandwidth, banding.j_first) + } else { + (row_lo_arr[i - 1], row_hi_arr[i - 1]) + }; + let j_lo = j_lo.max(j_start); + + if j_lo > j_hi || j_lo > m { + if i < n { + let (nj_lo, nj_hi) = if ALLOW_TYPOS { + typo_vband_row(i + 1, m, banding.bandwidth, banding.j_first) + } else { + (row_lo_arr[i], row_hi_arr[i]) + }; + let nj_lo = nj_lo.max(j_start); + if nj_lo <= nj_hi && nj_lo <= m { + let njm_lo = nj_lo - col_off; + let njm_hi = (nj_hi - col_off).min(mcols - 1); + let zero_lo = njm_lo.saturating_sub(1); + let zero_hi = njm_hi.min(mcols - 1); + unsafe { + let row_ptr = base_ptr.add(i * cols); + for k in zero_lo..=zero_hi { + *row_ptr.add(k) = CELL_ZERO; + } + } + } + } + dead_rows += 1; + if dead_rows >= 2 { + return None; + } + continue; + } + + let jm_lo = j_lo - col_off; + let jm_hi = j_hi - col_off; + let jm_max = mcols - 1; + + unsafe { + let row_ptr = base_ptr.add(i * cols); + if jm_lo > 1 { + *row_ptr.add(jm_lo - 1) = CELL_ZERO; + } + if jm_hi < jm_max { + *row_ptr.add(jm_hi + 1) = CELL_ZERO; + } + } + + let (prev_row, cur_row) = unsafe { + let pr = std::slice::from_raw_parts(base_ptr.add((i - 1) * cols), cols); + let cr = std::slice::from_raw_parts_mut(base_ptr.add(i * cols), cols); + (pr, cr) + }; + + let prev_ptr = prev_row.as_ptr(); + let cur_ptr = cur_row.as_mut_ptr(); + + let mut row_positive = false; + for j in j_lo..=j_hi { + let jm = j - col_off; + let cj = unsafe { *cho_ptr.add(j - 1) }; + let is_match = pi.eq(cj, respect_case); + + let diag_cell = unsafe { *prev_ptr.add(jm - 1) }; + let up_score = if ALLOW_TYPOS { + let up_cell = unsafe { *prev_ptr.add(jm) }; + up_cell.score() + } else { + 0 + }; + let left_cell = unsafe { *cur_ptr.add(jm - 1) }; + + let (best, dir) = compute_cell::( + is_match, + is_first, + unsafe { *bonuses_ptr.add(j - 1) }, + diag_cell.score(), + diag_cell.is_diag(), + up_score, + left_cell.score(), + left_cell.is_diag(), + ); + + row_positive |= best > 0; + unsafe { + *cur_ptr.add(jm) = Cell::new(best, dir); + } + } + + if row_positive { + dead_rows = 0; + } else { + dead_rows += 1; + if dead_rows >= 2 { + return None; + } + } + } + + // Find best score in the last row. + let mut best_score: Score = 0; + let mut best_j = 0usize; + { + let (last_j_lo, last_j_hi) = if ALLOW_TYPOS { + typo_vband_row(n, m, banding.bandwidth, banding.j_first) + } else { + (row_lo_arr[n - 1], row_hi_arr[n - 1]) + }; + let last_j_lo = last_j_lo.max(j_start); + if last_j_lo <= last_j_hi && last_j_lo <= m { + let last_row_ptr = unsafe { base_ptr.add(n * cols) }; + for j in last_j_lo..=last_j_hi { + let jm = j - col_off; + let s = unsafe { (*last_row_ptr.add(jm)).score() }; + let better = s > best_score; + best_score = if better { s } else { best_score }; + best_j = if better { j } else { best_j }; + } + } + } + + if best_score <= 0 { + return None; + } + + // Minimal traceback: walk back until we can go no further, recording + // only the final j (which becomes `begin`). `end` is best_j - 1. + let end_0 = best_j - 1; // 0-indexed end + let mut i = n; + let mut j = best_j; + let mut true_matches = 0usize; + + while i > 0 && j >= j_start { + let jm = j - col_off; + let c = unsafe { *base_ptr.add(i * cols).add(jm) }; + match c.dir() { + Dir::Diag => { + if pat[i - 1].eq(cho[j - 1], respect_case) { + true_matches += 1; + } + i -= 1; + j -= 1; + } + Dir::Up => { + i -= 1; + } + Dir::Left => { + j -= 1; + } + Dir::None => break, + } + } + + if true_matches < banding.min_true_matches { + return None; + } + + // `j` after traceback is one step before the first matched column; + // the first match is at `j` (0-indexed: `j` since j is 1-indexed here + // but we stepped past it). We need the earliest index that was recorded. + // After the loop, j points to the column just before the alignment start, + // so begin = j (0-indexed) because the first Diag step decremented j before + // breaking. Re-scan the last row of the traceback to find begin precisely: + // We track the last diagonal j we visited. + let begin_0 = j; // j is 1-indexed after the last decrement; 0-indexed = j + + Some((best_score, begin_0, end_0)) +} diff --git a/src/fuzzy_matcher/arinae/atom.rs b/src/fuzzy_matcher/arinae/atom.rs new file mode 100644 index 00000000..8a81ed90 --- /dev/null +++ b/src/fuzzy_matcher/arinae/atom.rs @@ -0,0 +1,90 @@ +//! Byte/Char helpers +use super::Score; +use super::constants::SEPARATOR_TABLE; +use memchr::memchr; + +pub(super) trait Atom: PartialEq + Into + Copy { + #[inline(always)] + fn eq(self, other: Self, respect_case: bool) -> bool + where + Self: PartialEq + Sized, + { + if respect_case { + self == other + } else { + self.eq_ignore_case(other) + } + } + fn eq_ignore_case(self, other: Self) -> bool; + fn is_lowercase(self) -> bool; + + /// Return the index of the first occurrence of `self` in `haystack`, + /// or `None` if not found. + /// + /// Implementations may override this with a SIMD-backed search (e.g. + /// `memchr` for `u8` in case-sensitive mode). + #[inline] + fn find_first_in(self, haystack: &[Self], respect_case: bool) -> Option { + haystack.iter().position(|&c| self.eq(c, respect_case)) + } + /// Return the word-separator bonus for this character, or `0` if it is not + /// a separator. Uses a table lookup — a single bounds check replaces + /// several branches and the returned value encodes both *whether* the + /// character is a separator and *how much* bonus it carries. + #[inline(always)] + fn separator_bonus(self) -> Score { + let ch = self.into() as usize; + // For ch < 128 we do a table lookup; for ch >= 128 we return 0. + // The `get` returns None for out-of-range, and `copied().unwrap_or(0)` is + // typically compiled as a conditional move (branchless). + SEPARATOR_TABLE.get(ch).copied().unwrap_or(0) + } +} + +impl Atom for u8 { + #[inline(always)] + fn eq_ignore_case(self, b: Self) -> bool { + self.eq_ignore_ascii_case(&b) + } + #[inline(always)] + fn is_lowercase(self) -> bool { + self.is_ascii_lowercase() + } + + /// Case-sensitive search uses SIMD-backed `memchr`; case-insensitive + /// falls back to the generic scalar loop. + #[inline] + fn find_first_in(self, haystack: &[Self], respect_case: bool) -> Option { + if respect_case { + // SAFETY: `self` is a u8 and memchr searches for it in a byte slice. + memchr(self, haystack) + } else { + // Case-insensitive: compare lowercase. Also try the uppercase variant + // so a single `memchr` can be used for each case variant. + let lo = self.to_ascii_lowercase(); + let hi = self.to_ascii_uppercase(); + if lo == hi { + // No case distinction for this byte (digit, symbol, etc.). + memchr(lo, haystack) + } else { + // Check both variants and return the earliest occurrence. + let p_lo = memchr(lo, haystack); + let p_hi = memchr(hi, haystack); + match (p_lo, p_hi) { + (None, x) | (x, None) => x, + (Some(a), Some(b)) => Some(a.min(b)), + } + } + } + } +} +impl Atom for char { + #[inline(always)] + fn eq_ignore_case(self, b: Self) -> bool { + self.to_lowercase().eq(b.to_lowercase()) + } + #[inline(always)] + fn is_lowercase(self) -> bool { + self.is_ascii_lowercase() + } +} diff --git a/src/fuzzy_matcher/arinae/banding.rs b/src/fuzzy_matcher/arinae/banding.rs new file mode 100644 index 00000000..7bc655e2 --- /dev/null +++ b/src/fuzzy_matcher/arinae/banding.rs @@ -0,0 +1,92 @@ +//! Banding utils +//! Banding is the process of calculating the pertinent parts of the matrix to our specific +//! computation to avoid computing every cell + +use super::atom::Atom; +use super::constants::*; +use super::helpers::{compute_last_match_cols, compute_row_col_bounds, find_first_char}; + +/// Precomputed banding information shared by both score-only and full DP. +pub(super) struct BandingInfo { + /// Per-row column bounds (only present in exact mode). + pub(super) row_bounds: Option<([usize; MAX_PAT_LEN], [usize; MAX_PAT_LEN])>, + /// 1-indexed column of the first match of `pat[0]` in `cho`. + pub(super) j_first: usize, + /// Bandwidth for typo-mode diagonal banding (0 in exact mode). + pub(super) bandwidth: usize, + /// Minimum number of true (non-substitution) matches to accept. + pub(super) min_true_matches: usize, +} + +/// Compute banding information for the DP. Returns `None` if the pattern +/// cannot possibly match (e.g. a pattern character has no occurrence). +pub(super) fn compute_banding( + pat: &[C], + cho: &[C], + respect_case: bool, +) -> Option { + let n = pat.len(); + let m = cho.len(); + let row_bounds; + let j_first; + + if !ALLOW_TYPOS { + let fm = compute_first_match_cols(pat, cho, respect_case)?; + let lm = compute_last_match_cols(pat, cho, respect_case)?; + j_first = fm[0]; + row_bounds = Some(compute_row_col_bounds(n, m, &fm, &lm)); + } else { + j_first = find_first_char(pat, cho, respect_case)?; + row_bounds = None; + } + + let bandwidth = if ALLOW_TYPOS { n + TYPO_BAND_SLACK } else { 0 }; + let min_true_matches = if ALLOW_TYPOS { n.div_ceil(2) } else { 0 }; + + Some(BandingInfo { + row_bounds, + j_first, + bandwidth, + min_true_matches, + }) +} + +/// Row-major V-shaped band: compute column bounds at row `i`. +/// +/// The result is an upper triangle starting at the diagonal (j ~ i + j_first - 1) +#[inline(always)] +pub(super) fn typo_vband_row(i: usize, m: usize, bandwidth: usize, j_first: usize) -> (usize, usize) { + let j = i + j_first - 1; + let lo = j.saturating_sub(bandwidth).max(j_first); + + (lo, m) +} + +/// For exact (non-typo) mode, compute the earliest column (1-indexed) at which +/// each pattern character can first be matched. This tightens the diagonal +/// lower bound so we never compute cells that cannot participate in a valid +/// alignment. +/// +/// Returns `None` if any pattern character has no match in the choice (the +/// subsequence check should have caught this, but we guard anyway). +fn compute_first_match_cols(pat: &[C], cho: &[C], respect_case: bool) -> Option<[usize; MAX_PAT_LEN]> { + let n = pat.len(); + // Patterns longer than MAX_PAT_LEN cannot be handled by the stack-allocated + // banding arrays. Return None so the caller skips this choice gracefully. + if n > MAX_PAT_LEN { + return None; + } + let mut first = [0usize; MAX_PAT_LEN]; + let mut start = 0usize; // search from this choice index onward + for i in 0..n { + let found = cho[start..].iter().position(|&c| pat[i].eq(c, respect_case)); + match found { + Some(pos) => { + first[i] = start + pos + 1; // 1-indexed column + start = start + pos + 1; // next char must be strictly after + } + None => return None, + } + } + Some(first) +} diff --git a/src/fuzzy_matcher/arinae/constants.rs b/src/fuzzy_matcher/arinae/constants.rs new file mode 100644 index 00000000..6406658d --- /dev/null +++ b/src/fuzzy_matcher/arinae/constants.rs @@ -0,0 +1,55 @@ +// --------------------------------------------------------------------------- +// Scoring constants +// --------------------------------------------------------------------------- +use super::Score; + +/// Points awarded for each correctly matched character. +pub(super) const MATCH_BONUS: Score = 18; + +/// Extra bonus when the match is at position 0 of the choice string. +pub(super) const START_OF_STRING_BONUS: Score = 16; + +/// Extra bonus for a camelCase transition. +pub(super) const CAMEL_CASE_BONUS: Score = 6; + +/// Bonus for each additional consecutive matched character. +pub(super) const CONSECUTIVE_BONUS: Score = 11; + +/// Cost to open a gap (skip characters in choice). +pub(super) const GAP_OPEN: Score = 6; + +/// Cost to extend a gap by one more character. +pub(super) const GAP_EXTEND: Score = 2; + +pub(super) const TYPO_PENALTY: Score = 8; + +/// Penalty for aligning a pattern char to a different choice char (typos only). +pub(super) const MISMATCH_PENALTY: Score = 16; + +/// Maximum pattern length supported by the banding arrays (stack-allocated). +pub(super) const MAX_PAT_LEN: usize = 32; + +/// Bandwidth for typo-mode banding. In typo mode we allow diagonal moves +/// (match/mismatch) plus UP (skip pattern char) and LEFT (skip choice char), +/// so the optimal path can wander off the main diagonal. A bandwidth of +/// `n + TYPO_BAND_SLACK` columns around the diagonal is generous enough +/// to capture all viable alignments while still pruning far-off cells. +pub(super) const TYPO_BAND_SLACK: usize = 4; + +/// Per-separator bonus lookup table. Each entry holds the `Score` awarded when +/// a matched character immediately follows that ASCII codepoint. Non-separator +/// characters (and all non-ASCII codepoints) map to `0`. +/// +/// Different separators can carry different bonuses — for example, `/` and `\` +/// delimit path components (high bonus), while `_` or `-` delimit sub-words +/// (standard bonus). Entries that are `0` are not considered separators. +pub(super) const SEPARATOR_TABLE: [Score; 128] = { + let mut t = [0 as Score; 128]; + t[b' ' as usize] = 12; // space + t[b'-' as usize] = 10; // hyphen / kebab-case + t[b'.' as usize] = 12; // dot (file extensions, domain names) + t[b'/' as usize] = 16; // forward slash (path separator — higher bonus) + t[b'\\' as usize] = 16; // backslash (Windows path separator — higher bonus) + t[b'_' as usize] = 12; // underscore / snake_case + t +}; diff --git a/src/fuzzy_matcher/arinae/helpers.rs b/src/fuzzy_matcher/arinae/helpers.rs new file mode 100644 index 00000000..16d0bc0a --- /dev/null +++ b/src/fuzzy_matcher/arinae/helpers.rs @@ -0,0 +1,91 @@ +//! &[dyn Atom] manipulation helpers + +use super::Atom; +use super::constants::*; + +/// Find the 1-indexed column of the first occurrence of `pat[0]` in `cho`. +/// +/// Returns `None` if `pat[0]` is not found anywhere (caller should return +/// `None`). The position defines the start of the V-shaped banding envelope. +/// Uses SIMD-backed `find_first_in` for `u8` slices. +#[inline] +pub(super) fn find_first_char(pat: &[C], cho: &[C], respect_case: bool) -> Option { + pat[0].find_first_in(cho, respect_case).map(|idx| idx + 1) // 1-indexed +} + +/// Compute the last column (1-indexed) at which each pattern character can be +/// matched, scanning from the end. Used to tighten the diagonal upper bound. +pub(super) fn compute_last_match_cols( + pat: &[C], + cho: &[C], + respect_case: bool, +) -> Option<[usize; MAX_PAT_LEN]> { + let n = pat.len(); + // Patterns longer than MAX_PAT_LEN cannot be handled by the stack-allocated + // banding arrays. Return None so the caller skips this choice gracefully. + if n > MAX_PAT_LEN { + return None; + } + let m = cho.len(); + let mut last = [0usize; MAX_PAT_LEN]; + let mut end = m; // search up to this choice index (exclusive) + for i in (0..n).rev() { + let found = cho[..end].iter().rposition(|&c| pat[i].eq(c, respect_case)); + match found { + Some(pos) => { + last[i] = pos + 1; // 1-indexed column + end = pos; // previous char must be strictly before + } + None => return None, + } + } + Some(last) +} + +/// For the **row-major** full DP (outer loop over rows), compute per-row +/// column bounds `(j_lo, j_hi)` accounting for cross-row Diag reads. +/// +/// Row `i` (1-indexed) matches pattern char `i-1`. The Diag move at +/// `(i, j)` reads `buf[i-1][j-1]`, so row `i-1` must have computed +/// column `j-1`. We expand each row's upper bound to satisfy the next +/// row's lower-bound Diag dependency, and each row's lower bound to +/// satisfy the previous row's upper-bound Diag dependency. +pub(super) fn compute_row_col_bounds( + n: usize, + m: usize, + first_match: &[usize; MAX_PAT_LEN], + last_match: &[usize; MAX_PAT_LEN], +) -> ([usize; MAX_PAT_LEN], [usize; MAX_PAT_LEN]) { + let mut lo = [0usize; MAX_PAT_LEN]; + let mut hi = [0usize; MAX_PAT_LEN]; + + // Start with the raw first/last match bounds. + lo[..n].copy_from_slice(&first_match[..n]); + hi[..n].copy_from_slice(&last_match[..n]); + + // Forward pass: row i's upper bound must extend so that row i+1 can + // read Diag at (i+1, j_lo[i+1]) → needs buf[i][j_lo[i+1]-1]. + // Also, LEFT propagation within row i+1 starts at j_lo[i+1], but + // score flows from row i via Diag, so row i must reach j_lo[i+1]-1. + for i in 0..n.saturating_sub(1) { + let next_lo = lo[i + 1]; + if next_lo > 1 { + hi[i] = hi[i].max(next_lo - 1); + } + } + + // Backward pass: row i's lower bound can't be later than row i-1's + // upper bound + 1 (Diag from (i-1, hi[i-1]) can reach (i, hi[i-1]+1)). + // This is rarely binding but ensures consistency. + for i in 1..n { + lo[i] = lo[i].min(hi[i - 1] + 1); + } + + // Clamp to valid range. + for i in 0..n { + lo[i] = lo[i].max(1).min(m); + hi[i] = hi[i].max(lo[i]).min(m); + } + + (lo, hi) +} diff --git a/src/fuzzy_matcher/arinae/matrix.rs b/src/fuzzy_matcher/arinae/matrix.rs new file mode 100644 index 00000000..a7e0cbe0 --- /dev/null +++ b/src/fuzzy_matcher/arinae/matrix.rs @@ -0,0 +1,85 @@ +//! Base structs for the matching algorithm: Cell & SWMatrix + +use super::Score; + +/// Direction the optimal path took to reach a cell. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u8)] +#[allow(dead_code)] // variants are constructed via transmute from bits +pub(super) enum Dir { + /// No valid path (score == 0). + /// + /// Assigned tag 0 so that `Cell::new(0, Dir::None)` encodes as all-zero + /// bits, allowing boundary rows/columns to be bulk-zeroed with + /// `write_bytes(0)` instead of a scalar loop. + None = 0, + /// Diagonal: match or mismatch (came from [i-1][j-1]) + Diag = 1, + /// Up: gap in choice (came from [i-1][j], skip pattern char) + Up = 2, + /// Left: gap in pattern (came from [i][j-1], skip choice char) + Left = 3, +} + +/// Packed cell stored as a `u32`: bits [15:0] = score (as u16 bitcast from +/// i16), bits [17:16] = direction tag. This gives 4 bytes per cell with no +/// padding and enables branchless direction extraction via bitmask. +#[derive(Copy, Clone)] +pub(super) struct Cell(u32); + +pub(super) const CELL_ZERO: Cell = Cell::new(0, Dir::None); + +impl std::fmt::Debug for Cell { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Cell") + .field("score", &self.score()) + .field("dir", &self.dir()) + .finish() + } +} + +impl Cell { + #[inline(always)] + pub(super) const fn new(score: Score, dir: Dir) -> Cell { + // Store score as u16 bits in low 16 bits, dir in bits 16-17. + Cell((score as u16 as u32) | ((dir as u32) << 16)) + } + #[inline(always)] + pub(super) fn score(self) -> Score { + self.0 as u16 as i16 + } + #[inline(always)] + pub(super) fn dir(self) -> Dir { + // SAFETY: Dir has repr(u8) with values 0..=3 and we only ever store + // valid Dir values in bits 16-17. + unsafe { std::mem::transmute((self.0 >> 16) as u8 & 0x3) } + } + /// Branchless check: true when dir == Diag (tag 1). + #[inline(always)] + pub(super) fn is_diag(self) -> bool { + (self.0 >> 16) & 0x3 == 1 + } +} + +#[derive(Default, Debug)] +pub(super) struct SWMatrix { + pub(super) data: Vec, + pub(super) cols: usize, + pub(super) rows: usize, +} + +impl SWMatrix { + pub fn zero(rows: usize, cols: usize) -> Self { + let mut res = SWMatrix::default(); + res.resize(rows, cols); + res + } + pub fn resize(&mut self, rows: usize, cols: usize) { + let needed = rows * cols; + if needed > self.data.len() { + self.data.resize(needed, CELL_ZERO); + } + self.rows = rows; + self.cols = cols; + } +} diff --git a/src/fuzzy_matcher/arinae/mod.rs b/src/fuzzy_matcher/arinae/mod.rs new file mode 100644 index 00000000..6483b522 --- /dev/null +++ b/src/fuzzy_matcher/arinae/mod.rs @@ -0,0 +1,260 @@ +//! Arinae fuzzy matching algorithm. +//! +//! Uses a Smith-Waterman local alignment approach with affine gap penalties +//! and context-sensitive bonuses. +//! +//! ## Key design choices +//! +//! - **Single score per cell** (u16 saturating) plus a 2-bit direction tag +//! for traceback. Gap open vs extend is tracked via the direction tag. +//! - **Semi-global alignment**: the pattern must be fully consumed, but +//! alignment can start/end at any position in the choice string. +//! +//! +//! ## Pruning strategies +//! +//! - **Row-range banding**: each DP cell is only computed when the row/column +//! pair falls within the feasible alignment band. In exact mode the band is +//! derived from precomputed first/last match columns for each pattern +//! character; in typo mode a diagonal ± bandwidth envelope is used. +//! - **Interpair max-score pruning**: after processing a column (score-only) +//! or row (full DP), if all cells are zero for several consecutive +//! iterations, the alignment is dead and we terminate early. + +mod algo; +mod atom; +mod banding; +mod constants; +mod helpers; +mod matrix; +mod prefilter; +#[cfg(test)] +mod tests; + +use std::cell::RefCell; + +use thread_local::ThreadLocal; + +use self::algo::{full_dp, range_dp}; +use self::atom::Atom; +use self::constants::*; +use self::prefilter::cheap_typo_prefilter; + +use self::matrix::{CELL_ZERO, Cell, Dir, SWMatrix}; +use crate::{ + CaseMatching, + fuzzy_matcher::{FuzzyMatcher, MatchIndices, ScoreType}, +}; + +type Score = i16; + +fn precompute_bonuses(cho: &[C], buf: &mut Vec) { + // Reset length (O(1), no deallocation) then fill with fresh values. + buf.clear(); + // The first character always gets START_OF_STRING_BONUS. + // Subsequent characters get a bonus based on the previous character: + // - separator_bonus() when the previous char is a separator (the exact + // bonus depends on the separator — see SEPARATOR_TABLE in constants.rs), + // - CAMEL_CASE_BONUS when transitioning from lowercase to non-lowercase. + // Using a safe iterator lets the compiler auto-vectorise the loop. + let bonus_iter = std::iter::once(START_OF_STRING_BONUS).chain(cho.windows(2).map(|w| { + let prev = w[0]; + let cur = w[1]; + prev.separator_bonus() + CAMEL_CASE_BONUS * ((prev.is_lowercase() && !cur.is_lowercase()) as Score) + })); + buf.extend(bonus_iter); +} + +/// Arinae fuzzy matcher: Smith-Waterman local alignment with affine gap +/// penalties and context-sensitive bonuses. +#[derive(Debug, Default)] +pub struct ArinaeMatcher { + pub(crate) case: CaseMatching, + pub(crate) allow_typos: bool, + full_buf: ThreadLocal>, + indices_buf: ThreadLocal>, + #[allow(clippy::type_complexity)] + char_buf: ThreadLocal, Vec)>>, + bonus_buf: ThreadLocal>>, +} + +impl ArinaeMatcher { + /// Create a new `ArinaeMatcher` with the given settings. + pub fn new(case: CaseMatching, allow_typos: bool) -> Self { + Self { + case, + allow_typos, + ..Default::default() + } + } + + #[inline] + fn respect_case(&self, pattern: &[C]) -> bool { + self.case == CaseMatching::Respect + || (self.case == CaseMatching::Smart && !pattern.iter().all(|b| b.is_lowercase())) + } + + /// Dispatch to `full_dp` with the appropriate const generics. + /// Assumes prefilters and bonuses have already been computed. + fn dispatch_dp( + &self, + cho: &[C], + pat: &[C], + bonuses: &[Score], + respect_case: bool, + compute_indices: bool, + ) -> Option<(ScoreType, MatchIndices)> { + let res = if self.allow_typos { + if compute_indices { + full_dp::(cho, pat, bonuses, respect_case, &self.full_buf, &self.indices_buf) + } else { + full_dp::(cho, pat, bonuses, respect_case, &self.full_buf, &self.indices_buf) + } + } else if compute_indices { + full_dp::(cho, pat, bonuses, respect_case, &self.full_buf, &self.indices_buf) + } else { + full_dp::(cho, pat, bonuses, respect_case, &self.full_buf, &self.indices_buf) + }; + res.map(|(s, idx)| (s as ScoreType, idx)) + } + + /// Generic helper: run full DP over slices of Atom. + /// If `compute_indices` is true, returns the matched indices; otherwise + /// returns a single-element vec containing the 1-indexed end column. + fn match_slices(&self, cho: &[C], pat: &[C], compute_indices: bool) -> Option<(ScoreType, MatchIndices)> { + if pat.is_empty() { + return Some((0, MatchIndices::new())); + } + if cho.is_empty() { + return None; + } + + let respect_case = self.respect_case(pat); + + // Prefilter for typo mode. + // In exact mode (non-typo) we skip is_subsequence here: compute_banding + // calls compute_first_match_cols which already validates the subsequence + // and returns None if any pattern character is absent — no redundant scan. + if self.allow_typos && !cheap_typo_prefilter(pat, cho, respect_case) { + return None; + } + + // Prepare bonuses + let mut bonus_buf = self.bonus_buf.get_or(|| RefCell::new(Vec::new())).borrow_mut(); + precompute_bonuses(cho, &mut bonus_buf); + + self.dispatch_dp(cho, pat, &bonus_buf, respect_case, compute_indices) + } + + fn run(&self, choice: &str, pattern: &str, compute_indices: bool) -> Option<(ScoreType, MatchIndices)> { + if pattern.is_empty() { + return Some((0, MatchIndices::new())); + } + if choice.is_empty() { + return None; + } + + // Fast path for ASCII matching + if choice.is_ascii() && pattern.is_ascii() { + let cho = choice.as_bytes(); + let pat = pattern.as_bytes(); + return self.match_slices(cho, pat, compute_indices); + } + + let mut bufs = self + .char_buf + .get_or(|| RefCell::new((Vec::new(), Vec::new()))) + .borrow_mut(); + let (ref mut pat_buf, ref mut cho_buf) = *bufs; + pat_buf.clear(); + pat_buf.extend(pattern.chars()); + cho_buf.clear(); + cho_buf.extend(choice.chars()); + + let respect_case = self.respect_case(pat_buf); + + // Prefilter for typo mode only (see match_slices for rationale). + if self.allow_typos && !cheap_typo_prefilter(pat_buf, cho_buf, respect_case) { + return None; + } + + let mut bonus_buf = self.bonus_buf.get_or(|| RefCell::new(Vec::new())).borrow_mut(); + precompute_bonuses(cho_buf, &mut bonus_buf); + + // Call dispatch_dp directly to avoid double-borrowing bonus_buf. + self.dispatch_dp(cho_buf, pat_buf, &bonus_buf, respect_case, compute_indices) + } + + /// Run the DP and return `(score, begin, end)` without collecting all indices. + /// + /// Uses the full matrix (for traceback) but only records the first and last + /// matched columns instead of the full index list. Avoids the allocation and + /// work of `fuzzy_indices` when only the range is needed. + fn run_range(&self, choice: &str, pattern: &str) -> Option<(ScoreType, usize, usize)> { + if pattern.is_empty() { + return Some((0, 0, 0)); + } + if choice.is_empty() { + return None; + } + + let range = if choice.is_ascii() && pattern.is_ascii() { + let cho = choice.as_bytes(); + let pat = pattern.as_bytes(); + let respect_case = self.respect_case(pat); + // Exact mode: compute_banding validates the subsequence implicitly. + if self.allow_typos && !cheap_typo_prefilter(pat, cho, respect_case) { + return None; + } + let mut bonus_buf = self.bonus_buf.get_or(|| RefCell::new(Vec::new())).borrow_mut(); + precompute_bonuses(cho, &mut bonus_buf); + if self.allow_typos { + range_dp::(cho, pat, &bonus_buf, respect_case, &self.full_buf) + } else { + range_dp::(cho, pat, &bonus_buf, respect_case, &self.full_buf) + } + } else { + let mut bufs = self + .char_buf + .get_or(|| RefCell::new((Vec::new(), Vec::new()))) + .borrow_mut(); + let (ref mut pat_buf, ref mut cho_buf) = *bufs; + pat_buf.clear(); + pat_buf.extend(pattern.chars()); + cho_buf.clear(); + cho_buf.extend(choice.chars()); + let respect_case = self.respect_case(pat_buf); + // Exact mode: compute_banding validates the subsequence implicitly. + if self.allow_typos && !cheap_typo_prefilter(pat_buf, cho_buf, respect_case) { + return None; + } + let mut bonus_buf = self.bonus_buf.get_or(|| RefCell::new(Vec::new())).borrow_mut(); + precompute_bonuses(cho_buf, &mut bonus_buf); + if self.allow_typos { + range_dp::(cho_buf, pat_buf, &bonus_buf, respect_case, &self.full_buf) + } else { + range_dp::(cho_buf, pat_buf, &bonus_buf, respect_case, &self.full_buf) + } + }; + range.map(|(s, b, e)| (s as ScoreType, b, e)) + } +} + +// --------------------------------------------------------------------------- +// FuzzyMatcher trait implementation +// --------------------------------------------------------------------------- + +impl FuzzyMatcher for ArinaeMatcher { + fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option { + let result = self.run(choice, pattern, false); + result.map(|x| x.0) + } + + fn fuzzy_match_range(&self, choice: &str, pattern: &str) -> Option<(ScoreType, usize, usize)> { + self.run_range(choice, pattern) + } + + fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> { + self.run(choice, pattern, true) + } +} diff --git a/src/fuzzy_matcher/arinae/prefilter.rs b/src/fuzzy_matcher/arinae/prefilter.rs new file mode 100644 index 00000000..8783bebb --- /dev/null +++ b/src/fuzzy_matcher/arinae/prefilter.rs @@ -0,0 +1,130 @@ +//! Prefilters running before the algo to optimize performance on unmatchable items + +use super::Atom; +use super::constants::MAX_PAT_LEN; + +/// Cheap prefilter for typo-tolerant matching. +/// +/// Rejects choices that clearly cannot produce a positive score in the DP. +/// The prefilter is intentionally lenient — false positives are fine (the DP +/// will reject them), but false negatives lose valid matches. +/// +/// Strategy: +/// 1. The first pattern character must appear somewhere in the choice at +/// position `j_first` (anchoring the alignment). +/// 2. Of the remaining `n - 1` pattern characters, at least +/// `floor((n - 1) / 2)` must also appear (unordered, as a multiset) in +/// `choice[j_first..]` — the window the DP actually examines. +/// +/// Scoping the tail check to `choice[j_first..]` is strictly correct: the +/// typo-mode DP band starts at `j_first` for every row (bandwidth = n + 4 +/// always exceeds n - 1, so the left clamp always hits `j_first`). Any tail +/// character that only exists before `j_first` can never contribute a true +/// diagonal match in the DP; counting it would be a false positive. +/// +/// We use a multiset frequency check rather than an ordered greedy scan. +/// An ordered scan causes false negatives when a greedily-consumed character +/// advances the cursor past positions where later characters could still match. +/// +/// For the ASCII (`u8`) path the tail frequency table is built in a single +/// O(m) sequential pass over the window, then queried in O(n). For the `char` +/// path we fall back to a small O(n) linear-search table seeded from the +/// tail, queried via a scalar scan of the window — still a single O(m) pass. +pub(super) fn cheap_typo_prefilter(pattern: &[C], choice: &[C], respect_case: bool) -> bool { + let n = pattern.len(); + let m = choice.len(); + + // A pattern much longer than the choice cannot match. + if n > m + 2 { + return false; + } + + // The first pattern character must be present in the choice. + // Use the SIMD-backed find_first_in (memchr for u8, scalar for char). + // j_first is 0-indexed; the DP window is choice[j_first..]. + let first = pattern[0]; + let j_first = match first.find_first_in(choice, respect_case) { + Some(pos) => pos, + None => return false, + }; + + if n == 1 { + return true; + } + + let min_tail = (n - 1) / 2; + if min_tail == 0 { + return true; + } + + // Tail frequency check scoped to choice[j_first..]. + // Build a frequency table over the window in one pass, then consume + // entries as we walk the tail pattern characters. + let window = &choice[j_first..]; + tail_freq_check(pattern, window, respect_case, min_tail) +} + +/// Multiset frequency check: count how many of `pattern[1..]` can be +/// satisfied (one-for-one) by characters in `window`, and return `true` +/// as soon as `min_tail` matches are reached. +/// +/// Builds a frequency table over `window` in a single O(|window|) pass, +/// then walks the tail in O(n). Total cost: O(m + n) with a single +/// sequential read of `window` — optimal cache behaviour. +#[inline] +fn tail_freq_check(pattern: &[C], window: &[C], respect_case: bool, min_tail: usize) -> bool { + // We need a per-character frequency table for the window. + // Use a small stack-allocated array of (char_value, count) pairs keyed on + // the PATTERN tail characters (at most MAX_PAT_LEN - 1 = 15 entries). + // We build it in two passes: + // Pass 1 (O(n)): collect distinct tail chars into the table with count=0. + // Pass 2 (O(m)): scan window and increment matching table entries. + // Pass 3 (O(n)): walk the tail, decrement table entries, count matches. + + const MAX_TAIL: usize = MAX_PAT_LEN - 1; + let tail = &pattern[1..]; + let tail_len = tail.len().min(MAX_TAIL); + + // Table of (pattern_char, available_count). At most MAX_TAIL distinct chars. + // Seed every slot with the first tail char and count=0 so the array is fully + // initialised; only entries 0..table_len are ever consulted. + let placeholder = tail[0]; + let mut table: [(C, u8); MAX_TAIL] = [(placeholder, 0); MAX_TAIL]; + let mut table_len = 0usize; + + // Pass 1: populate table with distinct tail chars (count = 0). + for &pi in tail[..tail_len].iter() { + if !table[..table_len].iter().any(|&(c, _)| pi.eq(c, respect_case)) { + table[table_len] = (pi, 0); + table_len += 1; + } + } + + // Pass 2: scan window, increment table counts (saturate at 255). + for &c in window { + if let Some(entry) = table[..table_len] + .iter_mut() + .find(|(tc, _)| Atom::eq(*tc, c, respect_case)) + { + entry.1 = entry.1.saturating_add(1); + } + } + + // Pass 3: walk the tail, consume from table, count matches. + let mut matched = 0usize; + for &pi in tail[..tail_len].iter() { + if let Some(entry) = table[..table_len] + .iter_mut() + .find(|(tc, _)| Atom::eq(pi, *tc, respect_case)) + && entry.1 > 0 + { + entry.1 -= 1; + matched += 1; + if matched >= min_tail { + return true; + } + } + } + + false +} diff --git a/src/fuzzy_matcher/arinae/tests.rs b/src/fuzzy_matcher/arinae/tests.rs new file mode 100644 index 00000000..1e7028e5 --- /dev/null +++ b/src/fuzzy_matcher/arinae/tests.rs @@ -0,0 +1,393 @@ +use super::*; +use crate::fuzzy_matcher::FuzzyMatcher; + +fn matcher() -> ArinaeMatcher { + ArinaeMatcher::default() +} + +fn matcher_typos() -> ArinaeMatcher { + ArinaeMatcher { + allow_typos: true, + ..Default::default() + } +} + +fn score(choice: &str, pattern: &str) -> Option { + matcher().fuzzy_match(choice, pattern) +} + +fn score_typos(choice: &str, pattern: &str) -> Option { + matcher_typos().fuzzy_match(choice, pattern) +} + +fn indices(choice: &str, pattern: &str) -> Option { + matcher().fuzzy_indices(choice, pattern).map(|(_, v)| v) +} + +// ----- Basic matching ----- + +#[test] +fn empty_pattern_always_matches() { + assert_eq!(score("anything", ""), Some(0)); + assert_eq!(score("", ""), Some(0)); +} + +#[test] +fn empty_choice_never_matches() { + assert!(score("", "a").is_none()); +} + +#[test] +fn exact_match_scores_positive() { + assert!(score("hello", "hello").unwrap() > 0); +} + +#[test] +fn no_match_returns_none() { + assert!(score("abc", "xyz").is_none()); +} + +#[test] +fn subsequence_match() { + assert!(score("axbycz", "abc").is_some()); + let idx = indices("axbycz", "abc").unwrap(); + assert_eq!(idx.as_slice(), &[0, 2, 4]); +} + +// ----- Scoring quality ----- + +#[test] +fn contiguous_beats_scattered() { + let contiguous = score("ab", "ab").unwrap(); + let scattered = score("axb", "ab").unwrap(); + assert!( + contiguous > scattered, + "contiguous={contiguous} should beat scattered={scattered}" + ); +} + +#[test] +fn fewer_gaps_beats_more_gaps() { + let one_gap = score("abxc", "abc").unwrap(); + let two_gaps = score("axbxc", "abc").unwrap(); + assert!(one_gap > two_gaps, "one_gap={one_gap} should beat two_gaps={two_gaps}"); +} + +#[test] +fn word_start_bonus() { + let boundary = score("src/reader.rs", "reader").unwrap(); + let stitched = score("src/tui/header.rs", "reader").unwrap(); + assert!( + boundary > stitched, + "word-boundary={boundary} should beat stitched={stitched}" + ); +} + +#[test] +fn start_of_string_bonus() { + let at_start = score("abc", "a").unwrap(); + let at_mid = score("xabc", "a").unwrap(); + assert!(at_start > at_mid, "start={at_start} should beat mid={at_mid}"); +} + +#[test] +fn consecutive_match_preferred() { + let consecutive = score("foobar", "oob").unwrap(); + let spread = score("oxoxb", "oob").unwrap(); + assert!( + consecutive > spread, + "consecutive={consecutive} should beat spread={spread}" + ); +} + +#[test] +fn camel_case_bonus() { + let camel = score("FooBar", "fb").unwrap(); + let flat = score("foobar", "fb").unwrap(); + assert!(camel > flat, "camel={camel} should beat flat={flat}"); +} + +// ----- Case sensitivity ----- + +#[test] +fn smart_case_insensitive_lowercase_pattern() { + let m = ArinaeMatcher { + case: CaseMatching::Smart, + allow_typos: false, + ..Default::default() + }; + assert!(m.fuzzy_match("FooBar", "foobar").is_some()); +} + +#[test] +fn smart_case_sensitive_uppercase_pattern() { + let m = ArinaeMatcher { + case: CaseMatching::Smart, + allow_typos: false, + ..Default::default() + }; + assert!(m.fuzzy_match("foobar", "FooBar").is_none()); + assert!(m.fuzzy_match("FooBar", "FooBar").is_some()); +} + +#[test] +fn respect_case() { + let m = ArinaeMatcher { + case: CaseMatching::Respect, + allow_typos: false, + ..Default::default() + }; + assert!(m.fuzzy_match("abc", "ABC").is_none()); + assert!(m.fuzzy_match("ABC", "ABC").is_some()); +} + +#[test] +fn ignore_case() { + let m = ArinaeMatcher { + case: CaseMatching::Ignore, + allow_typos: false, + ..Default::default() + }; + assert!(m.fuzzy_match("abc", "ABC").is_some()); +} + +// ----- Typo tolerance ----- + +#[test] +fn no_typos_rejects_mismatch() { + assert!(score("hxllo", "hello").is_none()); +} + +#[test] +fn typos_accepts_mismatch() { + assert!(score_typos("hxllo", "hello").is_some()); +} + +#[test] +fn no_typos_rejects_transposition() { + assert!(score("hlelo", "hello").is_none()); +} + +#[test] +fn typos_accepts_transposition() { + assert!(score_typos("hlelo", "hello").is_some()); +} + +#[test] +fn exact_match_same_with_and_without_typos() { + let with = score_typos("hello", "hello").unwrap(); + let without = score("hello", "hello").unwrap(); + assert_eq!( + with, without, + "exact match score should be identical regardless of typo flag" + ); +} + +#[test] +fn typo_match_scores_less_than_exact() { + let exact = score_typos("hello", "hello").unwrap(); + let typo = score_typos("hxllo", "hello").unwrap(); + assert!(exact > typo, "exact={exact} should beat typo={typo}"); +} + +// ----- Traceback correctness ----- + +#[test] +fn indices_exact_match() { + let idx = indices("hello", "hello").unwrap(); + assert_eq!(idx.as_slice(), &[0, 1, 2, 3, 4]); +} + +#[test] +fn transposition_matches() { + let result = matcher_typos().fuzzy_indices("abdc", "abcd"); + assert!(result.is_some(), "transposed input should match with typos"); + let (score_trans, _) = result.unwrap(); + + let (score_exact, _) = matcher_typos().fuzzy_indices("abcd", "abcd").unwrap(); + assert!( + score_exact > score_trans, + "exact={score_exact} should beat transposed={score_trans}" + ); +} + +// ----- Reader ranking regression ----- + +#[test] +fn reader_ranking() { + let pattern = "reader"; + let dense = score("src/reader.rs", pattern).unwrap(); + let sparse = score( + "tests/snapshots/normalize__insta_normalize_accented_item_unaccented_query.snap", + pattern, + ) + .unwrap_or(0); + assert!(dense > sparse, "dense={dense} should beat sparse={sparse}"); +} + +// ----- Ordering sanity ----- + +#[test] +fn ordering_ab() { + use crate::fuzzy_matcher::util::assert_order; + let m = ArinaeMatcher { + case: CaseMatching::Ignore, + allow_typos: false, + ..Default::default() + }; + assert_order(&m, "ab", &["ab", "aoo_boo", "acb"]); +} + +#[test] +fn ordering_print() { + use crate::fuzzy_matcher::util::assert_order; + let m = ArinaeMatcher { + case: CaseMatching::Ignore, + allow_typos: false, + ..Default::default() + }; + assert_order(&m, "print", &["printf", "sprintf"]); +} + +// ----- Score-only vs full DP consistency ----- + +#[test] +fn score_only_matches_full_dp() { + let m = ArinaeMatcher { + case: CaseMatching::Ignore, + allow_typos: true, + ..Default::default() + }; + let cases = [ + ("hello world", "hlo"), + ("src/reader.rs", "reader"), + ("FooBar", "fb"), + ("axbycz", "abc"), + ("hxllo", "hello"), + ]; + for (choice, pattern) in &cases { + let score_only = m.fuzzy_match(choice, pattern); + let full = m.fuzzy_indices(choice, pattern).map(|(s, _)| s); + assert_eq!( + score_only, full, + "score mismatch for ({choice}, {pattern}): score_only={score_only:?} full={full:?}" + ); + } +} + +// ----- Non-ASCII fallback ----- + +#[test] +fn non_ascii_matching() { + let m = matcher(); + assert!(m.fuzzy_match("café", "café").is_some()); + assert!(m.fuzzy_match("naïve", "naive").is_none()); +} + +// Regression test: all valid subsequences must be returned in --no-typos mode. +// grep '.*t.*e.*s.*t' should give the same results as arinae with pattern 'test'. +#[test] +fn all_subsequences_must_match() { + let m = matcher(); + let cases = [ + // Bug 1: full_dp tracked best_j across all rows instead of only the + // last row, so traceback started at the wrong cell. + "audio/audio/bin/temp/usr/uploads/mnt/cache/media_3445258", + "audio/audio/audio/docs/cache/temp/downloads/backup/shared/data_9591740", + // Bug 2: min_true_matches was enforced in exact mode, but the true-count + // bookkeeping is corrupted by tiebreaking when a character coincidentally + // matches at a column where diag_score=0 (fresh local alignment start). + // In exact mode every row increment requires a true match, so score > 0 + // at row n already guarantees n true matches; the threshold is not needed. + "audio/audio/audio/opt/media/sys/sys/backup/etc_744357", + "audio/audio/audio/temp/shared/uploads/downloads/config/home/mnt_9037278", + "audio/audio/opt/cache/usr/usr/var/temp_1579492", + ]; + for choice in &cases { + assert!( + m.fuzzy_match(choice, "test").is_some(), + "fuzzy_match should match subsequence 'test' in {:?}", + choice + ); + assert!( + m.fuzzy_indices(choice, "test").is_some(), + "fuzzy_indices should match subsequence 'test' in {:?}", + choice + ); + } +} +#[test] +fn score_and_full_dp_same() { + let cases = [("dist-workspace.toml", "tst")]; + let m = matcher_typos(); + for (choice, pat) in cases { + assert_eq!( + m.fuzzy_indices(choice, pat).map(|(s, _)| s), + m.fuzzy_match_range(choice, pat).map(|(s, _, _)| s) + ) + } +} + +// Verify that fuzzy_match_range returns scores consistent with fuzzy_indices +// and that begin/end are within the span of the full index list. +#[test] +fn range_consistent_with_indices() { + let cases = [ + ("hello", "hello"), + ("axbycz", "abc"), + ("src/reader.rs", "reader"), + ("FooBar", "fb"), + ("dist-workspace.toml", "tst"), + ]; + let matchers = [matcher(), matcher_typos()]; + for m in &matchers { + for &(choice, pattern) in &cases { + let range = m.fuzzy_match_range(choice, pattern); + let full = m.fuzzy_indices(choice, pattern); + match (range, full) { + (None, None) => {} + (Some((rs, rb, re)), Some((fs, fidx))) => { + assert_eq!(rs, fs, "score mismatch for ({choice}, {pattern})"); + let fbegin = fidx.first().copied().unwrap_or_default(); + let fend = fidx.last().copied().unwrap_or_default(); + assert_eq!( + rb, fbegin, + "begin mismatch for ({choice}, {pattern}): range={rb} indices={fbegin}" + ); + assert_eq!( + re, fend, + "end mismatch for ({choice}, {pattern}): range={re} indices={fend}" + ); + } + _ => panic!("range/indices disagreement for ({choice}, {pattern})"), + } + } + } +} + +// ----- Prefilter regression tests ----- + +/// Extending a typo-tolerant match with an additional character must not cause +/// the candidate to be incorrectly rejected. +/// +/// "fobara" matches "src/fuzzy_matcher/arinae/algo.rs" via the typo-tolerant +/// DP (score 91). Typing one more character to form "fobaral" should continue +/// to match — the `a`, `r`, `a` subsequence exists in the choice string and +/// satisfies the prefilter threshold (min_tail = 3). +/// +/// The old prefilter used a greedy ordered scan that consumed `o` at position 28, +/// locking the cursor past all four `a` occurrences (at positions 11, 18, 22, 25), +/// causing a false negative. The correct approach is an unordered frequency check. +#[test] +fn typo_prefilter_no_false_negative_on_extension() { + let choice = "src/fuzzy_matcher/arinae/algo.rs"; + // Both the shorter and the extended pattern must match. + assert!( + score_typos(choice, "fobara").is_some(), + "\"fobara\" should match \"{choice}\"" + ); + assert!( + score_typos(choice, "fobaral").is_some(), + "\"fobaral\" should match \"{choice}\" (regression: greedy prefilter scan false negative)" + ); +} diff --git a/src/fuzzy_matcher/clangd.rs b/src/fuzzy_matcher/clangd.rs index 59754ddd..16005d9c 100644 --- a/src/fuzzy_matcher/clangd.rs +++ b/src/fuzzy_matcher/clangd.rs @@ -21,7 +21,7 @@ //! https://github.com/llvm-mirror/clang-tools-extra/blob/master/clangd/FuzzyMatch.cpp //! Also check: https://github.com/lewang/flx/issues/98 use crate::fuzzy_matcher::util::*; -use crate::fuzzy_matcher::{FuzzyMatcher, IndexType, ScoreType}; +use crate::fuzzy_matcher::{FuzzyMatcher, IndexType, MatchIndices, ScoreType}; use std::cell::RefCell; use std::cmp::max; use thread_local::ThreadLocal; @@ -100,7 +100,7 @@ impl ClangdMatcher { } impl FuzzyMatcher for ClangdMatcher { - fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { + fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> { let case_sensitive = self.is_case_sensitive(pattern); let mut choice_chars = self.c_cache.get_or(|| RefCell::new(Vec::new())).borrow_mut(); @@ -159,7 +159,10 @@ impl FuzzyMatcher for ClangdMatcher { } indices_reverse.reverse(); - Some((adjust_score(score, num_choice_chars), indices_reverse)) + Some(( + adjust_score(score, num_choice_chars), + MatchIndices::from(indices_reverse), + )) } fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option { @@ -199,7 +202,7 @@ impl FuzzyMatcher for ClangdMatcher { } /// fuzzy match `line` with `pattern`, returning the score and indices of matches -pub fn fuzzy_indices(line: &str, pattern: &str) -> Option<(ScoreType, Vec)> { +pub fn fuzzy_indices(line: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> { ClangdMatcher::default().ignore_case().fuzzy_indices(line, pattern) } diff --git a/src/fuzzy_matcher/frizbee.rs b/src/fuzzy_matcher/frizbee.rs index 562510a2..bed38f0b 100644 --- a/src/fuzzy_matcher/frizbee.rs +++ b/src/fuzzy_matcher/frizbee.rs @@ -3,7 +3,7 @@ use frizbee::{Scoring, smith_waterman::SmithWatermanMatcher}; use crate::{ CaseMatching, - fuzzy_matcher::{FuzzyMatcher, IndexType, ScoreType}, + fuzzy_matcher::{FuzzyMatcher, MatchIndices, ScoreType}, }; const RESPECT_CASE_BONUS: u16 = 10000; @@ -31,7 +31,7 @@ impl FrizbeeMatcher { } impl FuzzyMatcher for FrizbeeMatcher { - fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { + fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> { let scoring = Scoring { matching_case_bonus: match self.case { CaseMatching::Respect => RESPECT_CASE_BONUS, @@ -49,7 +49,7 @@ impl FuzzyMatcher for FrizbeeMatcher { let mut matcher = SmithWatermanMatcher::new(pattern.as_bytes(), &scoring); matcher .match_haystack_indices(choice.as_bytes(), 0, self.max_typos) - .and_then(|(m, indices)| { + .and_then(|(m, mut indices)| { debug!("{choice}: {m} ({})", scoring.matching_case_bonus); if m > scoring.matching_case_bonus.saturating_mul( pattern @@ -59,10 +59,31 @@ impl FuzzyMatcher for FrizbeeMatcher { .try_into() .unwrap(), ) { - Some((m.into(), indices)) + indices.reverse(); + Some((m.into(), MatchIndices::from(indices))) } else { None } }) } + fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option { + let scoring = Scoring { + matching_case_bonus: match self.case { + CaseMatching::Respect => RESPECT_CASE_BONUS, + CaseMatching::Ignore => 0, + CaseMatching::Smart => { + if pattern.chars().any(|c| c.is_uppercase()) { + RESPECT_CASE_BONUS + } else { + 0 + } + } + }, + ..Default::default() + }; + let mut matcher = SmithWatermanMatcher::new(pattern.as_bytes(), &scoring); + matcher + .match_haystack(choice.as_bytes(), self.max_typos) + .map(|x| x as ScoreType) + } } diff --git a/src/fuzzy_matcher/fzy.rs b/src/fuzzy_matcher/fzy.rs index 5ad40c9a..61d28586 100644 --- a/src/fuzzy_matcher/fzy.rs +++ b/src/fuzzy_matcher/fzy.rs @@ -41,7 +41,7 @@ use std::cell::RefCell; use thread_local::ThreadLocal; use crate::fuzzy_matcher::util::cheap_matches; -use crate::fuzzy_matcher::{FuzzyMatcher, IndexType, ScoreType}; +use crate::fuzzy_matcher::{FuzzyMatcher, IndexType, MatchIndices, ScoreType}; // --------------------------------------------------------------------------- // Score constants (from fzy's config.def.h, scaled ×200 to integer) @@ -777,7 +777,7 @@ impl FzyMatcher { } impl FuzzyMatcher for FzyMatcher { - fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { + fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> { let case_sensitive = self.is_case_sensitive(pattern); let mut choice_chars = self.c_cache.get_or(|| RefCell::new(Vec::new())).borrow_mut(); @@ -793,14 +793,14 @@ impl FuzzyMatcher for FzyMatcher { cheap_matches(&choice_chars, &pattern_chars, case_sensitive)?; let mut positions = Vec::with_capacity(pattern_chars.len()); let s = fzy_score(&pattern_chars, &choice_chars, case_sensitive, Some(&mut positions))?; - Some((internal_to_skim_score(s), positions)) + Some((internal_to_skim_score(s), MatchIndices::from(positions))) } Some(max_t) => { // Fast path: try exact subsequence match first if cheap_matches(&choice_chars, &pattern_chars, case_sensitive).is_some() { let mut positions = Vec::with_capacity(pattern_chars.len()); if let Some(s) = fzy_score(&pattern_chars, &choice_chars, case_sensitive, Some(&mut positions)) { - return Some((internal_to_skim_score(s), positions)); + return Some((internal_to_skim_score(s), MatchIndices::from(positions))); } } @@ -853,7 +853,7 @@ impl FuzzyMatcher for FzyMatcher { self.lp_cache.get().map(|cell| cell.replace(vec![])); } - Some((internal_to_skim_score(s), positions)) + Some((internal_to_skim_score(s), MatchIndices::from(positions))) } } } @@ -942,7 +942,7 @@ impl FuzzyMatcher for FzyMatcher { /// Fuzzy match `choice` against `pattern` using the fzy algorithm, returning /// the score and matched character indices. -pub fn fuzzy_indices(choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { +pub fn fuzzy_indices(choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> { FzyMatcher::default().ignore_case().fuzzy_indices(choice, pattern) } @@ -1052,7 +1052,7 @@ mod tests { let result = matcher.fuzzy_indices("Hello, 世界", "H世"); assert!(result.is_some()); let (_, indices) = result.unwrap(); - assert_eq!(indices, vec![0, 7]); + assert_eq!(indices.as_slice(), &[0, 7]); } #[test] @@ -1142,7 +1142,7 @@ mod tests { let result = matcher.fuzzy_indices("abx", "abc"); assert!(result.is_some()); let (_, indices) = result.unwrap(); - assert_eq!(indices, vec![0, 1, 2]); + assert_eq!(indices.as_slice(), &[0, 1, 2]); } #[test] @@ -1152,7 +1152,7 @@ mod tests { assert!(result.is_some()); let (_, indices) = result.unwrap(); // 'a'→0, 'b'→1, 'c' deleted (no index), 'd'→2 - assert_eq!(indices, vec![0, 1, 2]); + assert_eq!(indices.as_slice(), &[0, 1, 2]); } #[test] diff --git a/src/fuzzy_matcher/mod.rs b/src/fuzzy_matcher/mod.rs index 83a818af..e9fb1bc1 100644 --- a/src/fuzzy_matcher/mod.rs +++ b/src/fuzzy_matcher/mod.rs @@ -3,6 +3,8 @@ //! This module provides different fuzzy matching algorithms including //! skim's own algorithm and clangd's algorithm for matching text patterns. +/// Arinae fuzzy matching algorithm (Smith-Waterman with affine gaps) +pub mod arinae; /// Clangd fuzzy matching algorithm pub mod clangd; pub mod frizbee; @@ -15,13 +17,31 @@ mod util; pub(crate) type IndexType = usize; pub(crate) type ScoreType = i64; +pub(crate) type MatchIndices = Vec; + /// Trait for fuzzy matching text patterns against choices pub trait FuzzyMatcher: Send + Sync { /// fuzzy match choice with pattern, and return the score & matched indices of characters - fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(i64, Vec)>; + fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(i64, MatchIndices)>; /// fuzzy match choice with pattern, and return the score of matching fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option { self.fuzzy_indices(choice, pattern).map(|(score, _)| score) } + + /// Fuzzy match and return (score, begin_char_index, end_char_index) without + /// computing per-character match indices. This avoids the Vec allocation and + /// traceback that `fuzzy_indices` requires, making it much faster for ranking. + /// + /// `begin` is the character index of the first matched pattern character, + /// `end` is the character index of the last matched pattern character. + /// + /// Default implementation falls back to `fuzzy_indices`. + fn fuzzy_match_range(&self, choice: &str, pattern: &str) -> Option<(i64, usize, usize)> { + self.fuzzy_indices(choice, pattern).map(|(score, indices)| { + let begin = indices.first().copied().unwrap_or(0); + let end = indices.last().copied().unwrap_or(0); + (score, begin, end) + }) + } } diff --git a/src/fuzzy_matcher/skim.rs b/src/fuzzy_matcher/skim.rs index 6081aff5..67f1dadf 100644 --- a/src/fuzzy_matcher/skim.rs +++ b/src/fuzzy_matcher/skim.rs @@ -24,7 +24,7 @@ use thread_local::ThreadLocal; use super::skim::Movement::{Match, Skip}; use super::util::{char_equal, cheap_matches}; -use super::{FuzzyMatcher, IndexType, ScoreType}; +use super::{FuzzyMatcher, IndexType, MatchIndices, ScoreType}; const BONUS_MATCHED: ScoreType = 4; const BONUS_CASE_MATCH: ScoreType = 4; @@ -49,8 +49,8 @@ pub struct SkimMatcher {} /// /// V1 algorithm is deprecated, checkout `FuzzyMatcherV2` impl FuzzyMatcher for SkimMatcher { - fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { - fuzzy_indices(choice, pattern) + fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> { + fuzzy_indices(choice, pattern).map(|(s, v)| (s, MatchIndices::from(v))) } fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option { @@ -1033,8 +1033,9 @@ impl SkimMatcherV2 { } impl FuzzyMatcher for SkimMatcherV2 { - fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { + fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> { self.fuzzy(choice, pattern, true) + .map(|(s, v)| (s, MatchIndices::from(v))) } fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option { diff --git a/src/helper/item_reader.rs b/src/helper/item_reader.rs index 8e6eadc4..9ea44465 100644 --- a/src/helper/item_reader.rs +++ b/src/helper/item_reader.rs @@ -17,7 +17,7 @@ use crate::{SkimItem, SkimItemReceiver, SkimItemSender, SkimOptions}; const DELIMITER_STR: &str = r"[\t\n ]+"; const READ_BUFFER_SIZE: usize = 1024; -const ITEMS_BUFFER_SIZE: usize = 128; +const ITEMS_BUFFER_SIZE: usize = 1024; const SEND_TIMEOUT_MS: u64 = 100; // Send items if we haven't sent anything in 100ms pub enum CollectorInput { diff --git a/src/item.rs b/src/item.rs index 52bb6809..a2e41875 100644 --- a/src/item.rs +++ b/src/item.rs @@ -16,6 +16,7 @@ use clap::builder::PossibleValue; use crate::spinlock::{SpinLock, SpinLockGuard}; use crate::{MatchRange, Rank, SkimItem}; +use tokio::sync::Notify; //------------------------------------------------------------------------------ @@ -143,20 +144,91 @@ impl MatchedItem { return existing; } + // Fast path: if all existing <= all incoming, we can append without merging. + if existing.last().unwrap() <= incoming.first().unwrap() { + let mut out = existing; + out.extend(incoming); + return out; + } + + // Fast path: if all incoming <= all existing, prepend without complex merge. + if incoming.last().unwrap() <= existing.first().unwrap() { + let mut out = incoming; + out.extend(existing); + return out; + } + + // Merge using direct next values to avoid Peekable overhead. let mut merged = Vec::with_capacity(existing.len() + incoming.len()); - let mut a = existing.into_iter().peekable(); - let mut b = incoming.into_iter().peekable(); - while a.peek().is_some() && b.peek().is_some() { - if a.peek().unwrap() <= b.peek().unwrap() { - merged.push(a.next().unwrap()); - } else { - merged.push(b.next().unwrap()); + let mut a = existing.into_iter(); + let mut b = incoming.into_iter(); + let mut a_next = a.next(); + let mut b_next = b.next(); + + loop { + match (&a_next, &b_next) { + (Some(av), Some(bv)) => { + if av <= bv { + // take a_next + merged.push(a_next.take().unwrap()); + a_next = a.next(); + } else { + merged.push(b_next.take().unwrap()); + b_next = b.next(); + } + } + (Some(_), None) => { + merged.push(a_next.take().unwrap()); + merged.extend(a); + break; + } + (None, Some(_)) => { + merged.push(b_next.take().unwrap()); + merged.extend(b); + break; + } + (None, None) => break, } } - merged.extend(a); - merged.extend(b); + merged } + + /// Merge `incoming` into an already-sorted `existing` vector in-place. + /// + /// This function chooses between two strategies: + /// - If `incoming` is small (few items), insert them one-by-one using binary + /// search to find the insertion point. This is O(m log n) for m incoming + /// items and is faster when m << n. + /// - Otherwise, fall back to the linear two-way merge which is O(n+m). + /// + /// `existing` must be sorted according to the same ordering used by + /// `MatchedItem::cmp`. + pub fn merge_into_sorted(existing: &mut Vec, incoming: Vec) { + if incoming.is_empty() { + return; + } + + // Heuristic threshold: for small incoming batches, prefer binary-insert. + // This avoids allocating a new vector and copying the entire existing + // list when we only need to insert a few new items. + const SMALL_INSERT_THRESHOLD: usize = 256; + + if incoming.len() <= SMALL_INSERT_THRESHOLD { + // Insert each incoming item into the existing sorted vector. + // For small m this is typically faster than allocating a new + // buffer and performing a full linear merge. + for item in incoming { + let pos = existing.binary_search_by(|e| e.cmp(&item)).unwrap_or_else(|p| p); + existing.insert(pos, item); + } + } else { + // For larger incoming batches, perform the linear two-way merge + // which is O(n+m) and avoids the O(n*m) cost of repeated inserts. + let old = std::mem::take(existing); + *existing = MatchedItem::sorted_merge(old, incoming); + } + } } use std::cmp::Ordering as CmpOrd; @@ -200,6 +272,13 @@ pub struct ItemPool { lines_to_reserve: usize, /// Reverse the order of items (--tac flag) tac: bool, + + /// Notified whenever new items are appended to the pool (async path). + /// + /// Listeners (e.g. the TUI event loop) can `await` this to wake up + /// immediately when items arrive instead of waiting for the next + /// periodic tick. + pub items_available: Arc, } impl Default for ItemPool { @@ -211,6 +290,7 @@ impl Default for ItemPool { reserved_items: SpinLock::new(Vec::new()), lines_to_reserve: 0, tac: false, + items_available: Arc::new(Notify::new()), } } } @@ -230,6 +310,7 @@ impl ItemPool { reserved_items: SpinLock::new(Vec::new()), lines_to_reserve: options.header_lines, tac: options.tac, + items_available: Arc::new(Notify::new()), } } @@ -305,7 +386,15 @@ impl ItemPool { } self.length.store(pool.len(), Ordering::SeqCst); trace!("item pool, done append {len} items, total: {}", pool.len()); - pool.len() + let new_len = pool.len(); + drop(pool); + drop(header_items); + // Wake any listener that is waiting for new items (e.g. the event loop + // or the filter-mode loop) so it can restart the matcher immediately + // instead of waiting for the next periodic tick. + self.items_available.notify_one(); + + new_len } /// Takes items from the pool, copying new items since last take and releasing lock immediately diff --git a/src/lib.rs b/src/lib.rs index bdfd29ee..b2b215e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,7 @@ use std::borrow::Cow; use std::fmt::Display; use std::sync::Arc; +use crate::fuzzy_matcher::MatchIndices; use ratatui::{ style::Style, text::{Line, Span}, @@ -97,7 +98,7 @@ pub enum Matches { #[default] None, /// Matches at specific character indices - CharIndices(Vec), + CharIndices(MatchIndices), /// Matches in a character range (start, end) CharRange(usize, usize), /// Matches in a byte range (start, end) @@ -281,7 +282,7 @@ pub enum MatchRange { /// Range of bytes (start, end) ByteRange(usize, usize), /// Individual character indices that matched - Chars(Vec), + Chars(MatchIndices), } /// Rank stores the raw match measurements used for sorting results. @@ -316,7 +317,7 @@ pub struct MatchResult { impl MatchResult { #[must_use] /// Converts the match range to character indices - pub fn range_char_indices(&self, text: &str) -> Vec { + pub fn range_char_indices(&self, text: &str) -> MatchIndices { match &self.matched_range { &MatchRange::ByteRange(start, end) => { let first = text[..start].chars().count(); diff --git a/src/matcher.rs b/src/matcher.rs index fc6d9803..534b1634 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -126,6 +126,7 @@ impl Matcher { .fuzzy_algorithm(options.algorithm) .exact_mode(options.exact) .typos(options.typos) + .filter_mode(options.filter.is_some()) .rank_builder(rank_builder.clone()) .build(); @@ -192,43 +193,80 @@ impl Matcher { // could call kill() + reset() before the old closure runs, causing the old // closure to re-take items that should belong to the new matcher. let items = item_pool.take(); - trace!("matcher start, total: {}", items.len()); + let total = items.len(); + trace!("matcher start, total: {}", total); thread_pool.spawn(move || { + // Process items in parallel using chunk-based accounting to minimize + // atomic contention. Each rayon work unit processes a chunk of items, + // updating the shared `processed` and `matched` counters only once per + // chunk instead of once per item. The interrupt flag is also checked + // only once per chunk to amortize the atomic load. + // + // `with_min_len` ensures rayon doesn't split work into chunks smaller + // than CHUNK_SIZE, keeping the overhead of the parallel iterator low + // relative to the actual matching work. + const CHUNK_SIZE: usize = 512; + let matched_items: Vec = items .into_par_iter() - .chunks(8196) - .take_any_while(|_chunk| { - if interrupt.load(Ordering::Relaxed) { - return false; - } + .with_min_len(CHUNK_SIZE) + .fold( + || (Vec::new(), 0usize, 0usize), // (local_matches, local_processed, local_matched) + |(mut local_matches, mut local_processed, mut local_matched), item| { + // Check interrupt once at the start of each chunk boundary. + // The fold processes items sequentially within each rayon work unit, + // so checking every CHUNK_SIZE items amortizes the atomic load. + if local_processed % CHUNK_SIZE == 0 && interrupt.load(Ordering::Relaxed) { + return (local_matches, local_processed, local_matched); + } - true - }) - .map(|chunk| { - processed.fetch_add(chunk.len(), Ordering::Relaxed); - - let matched_chunk: Vec = chunk - .into_iter() - .filter_map(|item| { - matcher_engine.match_item(item.as_ref()).map(|match_result| { - // item is Arc but we get &Arc from iterator, so one clone is needed - MatchedItem { - item, - rank: match_result.rank, - rank_builder: rank_builder.clone(), - matched_range: Some(match_result.matched_range), - } - }) - }) - .collect(); - - matched.fetch_add(matched_chunk.len(), Ordering::Relaxed); - - matched_chunk + local_processed += 1; + + if let Some(match_result) = matcher_engine.match_item(item.as_ref()) { + local_matched += 1; + local_matches.push(MatchedItem { + item, + rank: match_result.rank, + rank_builder: rank_builder.clone(), + matched_range: Some(match_result.matched_range), + }); + } + + // Flush counters periodically so the UI sees progress updates. + if local_processed % CHUNK_SIZE == 0 { + processed.fetch_add(CHUNK_SIZE, Ordering::Relaxed); + if local_matched > 0 { + matched.fetch_add(local_matched, Ordering::Relaxed); + local_matched = 0; + } + } + + (local_matches, local_processed, local_matched) + }, + ) + .map(|(local_matches, local_processed, local_matched)| { + // Flush any remaining counts that didn't hit a chunk boundary. + let remainder = local_processed % CHUNK_SIZE; + if remainder > 0 { + processed.fetch_add(remainder, Ordering::Relaxed); + } + if local_matched > 0 { + matched.fetch_add(local_matched, Ordering::Relaxed); + } + local_matches }) - .flatten_iter() - .collect(); + .reduce(Vec::new, |mut a, mut b| { + // Merge per-thread result vectors. Always extend the larger one + // to avoid unnecessary reallocations. + if a.len() >= b.len() { + a.extend(b); + a + } else { + b.extend(a); + b + } + }); if !interrupt.load(Ordering::SeqCst) { trace!("matcher stop, total matched: {}", matched_items.len()); diff --git a/src/options.rs b/src/options.rs index 51cb36d1..cf9b6bb0 100644 --- a/src/options.rs +++ b/src/options.rs @@ -1147,6 +1147,12 @@ impl SkimOptions { self.typos = Typos::Disabled; } + if let Some(ref filter_query) = self.filter + && self.query.is_none() + { + self.query = Some(filter_query.clone()); + } + self } /// Initializes history from configured history files @@ -1229,6 +1235,8 @@ impl SkimOptions { pub enum FeatureFlag { /// Disable preview PTY on linux NoPreviewPty, + /// Display the item's match score before its value in the item list (for matcher debugging) + ShowScore, } #[allow(unused_macros)] diff --git a/src/skim.rs b/src/skim.rs index 6f116edc..dc6257f5 100644 --- a/src/skim.rs +++ b/src/skim.rs @@ -49,14 +49,8 @@ impl Skim { /// # Panics /// /// Panics if the tui fails to initilize - pub fn run_with(mut options: SkimOptions, source: Option) -> Result { + pub fn run_with(options: SkimOptions, source: Option) -> Result { trace!("running skim"); - // In filter mode, use the filter string as the query for matching - if let Some(ref filter_query) = options.filter - && options.query.is_none() - { - options.query = Some(filter_query.clone()); - } let mut skim = Self::init(options, source)?; skim.start(); @@ -329,7 +323,7 @@ where && (!app.matcher_control.stopped() || !reader_control.is_done()) { trace!("still waiting"); - std::thread::sleep(Duration::from_millis(10)); + std::thread::sleep(Duration::from_millis(1)); app.restart_matcher(false); } trace!( @@ -429,6 +423,7 @@ where /// ``` pub async fn tick(&mut self) -> Result { let matcher_interval = &mut self.matcher_interval; + let items_available = self.app.item_pool.items_available.clone(); select! { event = self.tui.as_mut().expect("TUI should be initialized before the event loop can start").next() => { let evt = event.ok_or_eyre("Could not acquire next event")?; @@ -458,6 +453,11 @@ where } => { self.app.restart_matcher(false); } + // Wake immediately when new items arrive in the pool so the matcher + // can pick them up without waiting for the next periodic interval. + _ = items_available.notified() => { + self.app.restart_matcher(false); + } Ok(stream) = async { match &self.listener { Some(l) => interprocess::local_socket::traits::tokio::Listener::accept(l).await, @@ -490,7 +490,7 @@ where /// until the user accepts or aborts. Use `tick()` directly if you need /// to interleave your own logic between iterations. pub async fn run(&mut self) -> Result<()> { - self.matcher_interval = Some(tokio::time::interval(Duration::from_millis(100))); + self.matcher_interval = Some(tokio::time::interval(Duration::from_millis(10))); trace!("Starting event loop"); loop { if self.tick().await? { diff --git a/src/tui/app.rs b/src/tui/app.rs index fd425dc1..2dece17f 100644 --- a/src/tui/app.rs +++ b/src/tui/app.rs @@ -1271,8 +1271,8 @@ impl App { if no_sort { existing.items.extend(matches); } else { - let old = std::mem::take(&mut existing.items); - existing.items = MatchedItem::sorted_merge(old, matches); + // Merge incoming matches into existing sorted list in-place. + MatchedItem::merge_into_sorted(&mut existing.items, matches); } } else { *guard = Some(ProcessedItems { diff --git a/src/tui/item_list.rs b/src/tui/item_list.rs index 19343f8a..89ce160d 100644 --- a/src/tui/item_list.rs +++ b/src/tui/item_list.rs @@ -74,6 +74,8 @@ pub struct ItemList { wrap: bool, /// Border type, if borders are enabled pub border: Option, + /// When true, prepend each item's match score to its display text + print_score: bool, } impl Default for ItemList { @@ -107,6 +109,7 @@ impl Default for ItemList { cycle: false, wrap: false, border: None, + print_score: false, } } } @@ -568,6 +571,7 @@ impl SkimWidget for ItemList { cycle: options.cycle, wrap: options.wrap_items, border: options.border, + print_score: options.flags.contains(&crate::options::FeatureFlag::ShowScore), } } @@ -720,7 +724,7 @@ impl SkimWidget for ItemList { // Prepend cursor indicators // Pre-allocate capacity to avoid reallocation - let mut spans: Vec = Vec::with_capacity(2 + display_line.spans.len()); + let mut spans: Vec = Vec::with_capacity(3 + display_line.spans.len()); spans.push(Span::styled( if is_current { selector_icon.to_owned() @@ -737,6 +741,14 @@ impl SkimWidget for ItemList { }, theme.selected, )); + // Optionally prepend the match score for debugging + if this.print_score { + let score = item.rank.score; + spans.push(Span::styled( + format!("[{score}] "), + if is_current { theme.current } else { theme.normal }, + )); + } spans.extend(display_line.spans); if *wrap { diff --git a/tests/matcher.rs b/tests/matcher.rs index b3d1e182..285ce3a3 100644 --- a/tests/matcher.rs +++ b/tests/matcher.rs @@ -79,3 +79,9 @@ insta_test!(matcher_fzy, INPUT_ITEMS, &["-q", "stum", "--algo", "fzy", "--no-typ insta_test!(matcher_fzy_typos, INPUT_ITEMS, &["-q", "stum", "--algo", "fzy"], { @snap; }); +insta_test!(matcher_arinae, INPUT_ITEMS, &["-q", "stum", "--algo", "arinae", "--no-typos"], { + @snap; +}); +insta_test!(matcher_arinae_typos, INPUT_ITEMS, &["-q", "stum", "--algo", "arinae"], { + @snap; +}); diff --git a/tests/snapshots/matcher__matcher_arinae.snap b/tests/snapshots/matcher__matcher_arinae.snap new file mode 100644 index 00000000..b1262148 --- /dev/null +++ b/tests/snapshots/matcher__matcher_arinae.snap @@ -0,0 +1,29 @@ +--- +source: tests/matcher.rs +expression: buf + & cursor_pos +--- +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" src/tui/item_list.rs " +"> src/tui/mod.rs " +" 2/49 0/0" +"> stum " +cursor: (24, 7) diff --git a/tests/snapshots/matcher__matcher_arinae_typos.snap b/tests/snapshots/matcher__matcher_arinae_typos.snap new file mode 100644 index 00000000..378135f3 --- /dev/null +++ b/tests/snapshots/matcher__matcher_arinae_typos.snap @@ -0,0 +1,29 @@ +--- +source: tests/matcher.rs +expression: buf + & cursor_pos +--- +" src/manpage.rs " +" src/util.rs " +" src/item.rs " +" src/fuzzy_matcher/clangd.rs " +" src/fuzzy_matcher/frizbee.rs " +" src/fuzzy_matcher/skim.rs " +" src/fuzzy_matcher/util.rs " +" src/fuzzy_matcher/mod.rs " +" src/theme.rs " +" src/tmux.rs " +" src/tui/header.rs " +" src/tui/statusline.rs " +" src/tui/input.rs " +" src/tui/app.rs " +" src/tui/backend.rs " +" src/tui/event.rs " +" src/tui/preview.rs " +" src/tui/widget.rs " +" src/tui/options.rs " +" src/tui/util.rs " +" src/tui/item_list.rs " +"> src/tui/mod.rs " +" 38/49 0/0" +"> stum " +cursor: (24, 7) diff --git a/tests/snapshots/matcher__matcher_skim_v3.snap b/tests/snapshots/matcher__matcher_skim_v3.snap new file mode 100644 index 00000000..b1262148 --- /dev/null +++ b/tests/snapshots/matcher__matcher_skim_v3.snap @@ -0,0 +1,29 @@ +--- +source: tests/matcher.rs +expression: buf + & cursor_pos +--- +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" " +" src/tui/item_list.rs " +"> src/tui/mod.rs " +" 2/49 0/0" +"> stum " +cursor: (24, 7) diff --git a/tests/snapshots/matcher__matcher_skim_v3_typos.snap b/tests/snapshots/matcher__matcher_skim_v3_typos.snap new file mode 100644 index 00000000..9aefc072 --- /dev/null +++ b/tests/snapshots/matcher__matcher_skim_v3_typos.snap @@ -0,0 +1,29 @@ +--- +source: tests/matcher.rs +expression: buf + & cursor_pos +--- +" src/manpage.rs " +" src/util.rs " +" src/item.rs " +" src/fuzzy_matcher/clangd.rs " +" src/fuzzy_matcher/frizbee.rs " +" src/fuzzy_matcher/skim.rs " +" src/fuzzy_matcher/util.rs " +" src/fuzzy_matcher/mod.rs " +" src/theme.rs " +" src/tmux.rs " +" src/tui/header.rs " +" src/tui/statusline.rs " +" src/tui/input.rs " +" src/tui/app.rs " +" src/tui/backend.rs " +" src/tui/event.rs " +" src/tui/preview.rs " +" src/tui/widget.rs " +" src/tui/options.rs " +" src/tui/util.rs " +" src/tui/item_list.rs " +"> src/tui/mod.rs " +" 37/49 0/0" +"> stum " +cursor: (24, 7)