diff --git a/.gitignore b/.gitignore
index 01b6dd90..4a8f00b9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,9 @@ codecov.json
 *.profraw
 
 benches/fixtures/*.txt
+
+# Profiling
+profile.json.gz
+perf.data.old
+perf.data
+flamegraph.svg
diff --git a/Cargo.lock b/Cargo.lock
index c3af2ae8..b597cf32 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -810,7 +810,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
 dependencies = [
  "libc",
- "windows-sys 0.61.2",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -1189,9 +1189,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
 
 [[package]]
 name = "jiff"
-version = "0.2.20"
+version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c867c356cc096b33f4981825ab281ecba3db0acefe60329f044c1789d94c6543"
+checksum = "b3e3d65f018c6ae946ab16e80944b97096ed73c35b221d1c478a6c81d8f57940"
 dependencies = [
  "jiff-static",
  "log",
@@ -1202,9 +1202,9 @@ dependencies = [
 
 [[package]]
 name = "jiff-static"
-version = "0.2.20"
+version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5"
+checksum = "a17c2b211d863c7fde02cbea8a3c1a439b98e109286554f2860bdded7ff83818"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1498,9 +1498,9 @@ dependencies = [
 
 [[package]]
 name = "owo-colors"
-version = "4.2.3"
+version = "4.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52"
+checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d"
 
 [[package]]
 name = "page_size"
@@ -1727,9 +1727,9 @@ dependencies = [
 
 [[package]]
 name = "pulldown-cmark"
-version = "0.13.0"
+version = "0.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0"
+checksum = "83c41efbf8f90ac44de7f3a868f0867851d261b56291732d0cbf7cceaaeb55a6"
 dependencies = [
  "bitflags 2.11.0",
  "memchr",
@@ -1992,7 +1992,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys 0.61.2",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -2205,6 +2205,7 @@ dependencies = [
  "interprocess",
  "kanal",
  "log",
+ "memchr",
  "nix 0.31.1",
  "portable-pty",
  "rand 0.10.0",
@@ -2314,7 +2315,7 @@ dependencies = [
  "getrandom 0.4.1",
  "once_cell",
  "rustix",
- "windows-sys 0.61.2",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -2937,7 +2938,7 @@ version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
 dependencies = [
- "windows-sys 0.61.2",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index fcd5df29..a3ca9c66 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,6 +21,11 @@ strip = true
 [profile.dist]
 inherits = "release"
 
+[profile.release-debug]
+inherits = "release"
+debug = true
+strip = false
+
 
 [lib]
 name = "skim"
@@ -57,6 +62,7 @@ thiserror = "=2.0.18"
 tempfile = "=3.25.0"
 crossterm = { version = ">=0.0.0", features = ["event-stream", "use-dev-tty", "libc"] }
 thread_local = "=1.1.9"
+memchr = "=2.8.0"
 clap_complete_nushell = "=4.5.10"
 interprocess = { version = "=2.4.0", features = ["tokio"] }
 serde = { version = "=1.0.228", features = ["derive"] }
@@ -95,3 +101,7 @@ harness = false
 [[bench]]
 name = "partial"
 harness = false
+
+[[bench]]
+name = "matcher_micro"
+harness = false
diff --git a/README.md b/README.md
index e042e6ff..7281dc0d 100644
--- a/README.md
+++ b/README.md
@@ -57,6 +57,7 @@ Skim provides a single executable called `sk`. Think of it as a smarter alternat
    * [Interactive mode](#interactive-mode)
       + [How does it work?](#how-does-it-work)
    * [Executing external programs](#executing-external-programs)
+   * [Algorithms](#algorithms)
    * [Preview Window](#preview-window)
       + [How does it work?](#how-does-it-work-1)
    * [Fields support](#fields-support)
@@ -84,7 +85,7 @@ The skim project contains several components:
 | macOS          | MacPorts          | `sudo port install skim`     |
 | Alpine         | apk               | `apk add skim`               |
 | Arch           | pacman            | `pacman -S skim`             |
-| Fedora         | COPR              |   see below                  | 
+| Fedora         | COPR              |   see below                  |
 | Gentoo         | Portage           | `emerge --ask app-misc/skim` |
 | Guix           | guix              | `guix install skim`          |
 | Void           | XBPS              | `xbps-install -S skim`       |
@@ -452,6 +453,14 @@ You can configure key bindings to start external processes without leaving Skim
 sk --bind 'f1:execute(less -f {}),ctrl-y:execute-silent(echo {} | pbcopy)+abort'
 ```
 
+## Algorithms
+
+Skim offers multiple algorithms, check the help or manpage for an exhaustive list. Among them are:
+- `skim_v2`, the default algorithm, loosely based on `fzf`'s algorithm
+- `frizbee`([crate](https://crates.io/frizbee), the typo-resistant algorithm used in the [blink.cmp](https://github.com/saghen/blink.cmp) neovim plugin
+- `fzy`, based on [fzy](https://github.com/jhawthorn/fzy/)'s algorithm expanded for basic typo-resistance
+- `arinae`, skim's newest algorithm, designed in-house with typo-resistance in mind, expanding on all the above to make typo-resistant matching feel more natural while keeping the per-item performance up to the best standards
+
 ## Preview Window
 
 This is a great feature of fzf that skim borrows. For example, we use 'ag' to
diff --git a/benches/filter.rs b/benches/filter.rs
index cb7fd930..9b665b79 100644
--- a/benches/filter.rs
+++ b/benches/filter.rs
@@ -75,6 +75,27 @@ fn criterion_benchmark_10m(c: &mut Criterion) {
             Skim::run_with(opts, None)
         });
     });
+    c.bench_function("filter_10M_arinae", |b| {
+        b.iter(|| {
+            let opts = SkimOptionsBuilder::default()
+                .cmd("cat benches/fixtures/10M.txt")
+                .filter("test")
+                .algorithm(FuzzyAlgorithm::Arinae)
+                .build()?;
+            Skim::run_with(opts, None)
+        });
+    });
+    c.bench_function("filter_10M_arinae_typos", |b| {
+        b.iter(|| {
+            let opts = SkimOptionsBuilder::default()
+                .cmd("cat benches/fixtures/10M.txt")
+                .filter("test")
+                .typos(Typos::Smart)
+                .algorithm(FuzzyAlgorithm::Arinae)
+                .build()?;
+            Skim::run_with(opts, None)
+        });
+    });
 }
 
 fn criterion_benchmark_1m(c: &mut Criterion) {
@@ -150,6 +171,27 @@ fn criterion_benchmark_1m(c: &mut Criterion) {
             Skim::run_with(opts, None)
         });
     });
+    c.bench_function("filter_1M_arinae", |b| {
+        b.iter(|| {
+            let opts = SkimOptionsBuilder::default()
+                .cmd("cat benches/fixtures/1M.txt")
+                .filter("test")
+                .algorithm(FuzzyAlgorithm::Arinae)
+                .build()?;
+            Skim::run_with(opts, None)
+        });
+    });
+    c.bench_function("filter_1M_arinae_typos", |b| {
+        b.iter(|| {
+            let opts = SkimOptionsBuilder::default()
+                .cmd("cat benches/fixtures/1M.txt")
+                .filter("test")
+                .typos(Typos::Smart)
+                .algorithm(FuzzyAlgorithm::Arinae)
+                .build()?;
+            Skim::run_with(opts, None)
+        });
+    });
 
     c.bench_function("filter_1M_andor", |b| {
         b.iter(|| {
diff --git a/benches/matcher_micro.rs b/benches/matcher_micro.rs
new file mode 100644
index 00000000..a7ea4917
--- /dev/null
+++ b/benches/matcher_micro.rs
@@ -0,0 +1,133 @@
+//! Microbenchmark that isolates the fuzzy matcher DP from all other overhead
+//! (I/O, threading, sorting).
+
+use std::fs;
+
+use criterion::{Criterion, criterion_group, criterion_main};
+
+use skim::CaseMatching;
+use skim::fuzzy_matcher::FuzzyMatcher;
+use skim::fuzzy_matcher::arinae::ArinaeMatcher;
+use skim::fuzzy_matcher::frizbee::FrizbeeMatcher;
+use skim::prelude::SkimMatcherV2;
+
+fn load_lines() -> Vec<String> {
+    let data = fs::read_to_string("benches/fixtures/1M.txt").expect("1M.txt missing");
+    data.lines().map(|l| l.to_string()).collect()
+}
+
+fn bench_matcher(c: &mut Criterion) {
+    let lines = load_lines();
+
+    c.bench_function("micro_skim_v2", |b| {
+        let m = SkimMatcherV2::default().smart_case();
+        b.iter(|| {
+            let mut count = 0u64;
+            for line in &lines {
+                if m.fuzzy_indices(line, "test").is_some() {
+                    count += 1;
+                }
+            }
+            count
+        });
+    });
+    c.bench_function("micro_frizbee", |b| {
+        let m = FrizbeeMatcher::default().case(CaseMatching::Smart).max_typos(Some(0));
+        b.iter(|| {
+            let mut count = 0u64;
+            for line in &lines {
+                if m.fuzzy_indices(line, "test").is_some() {
+                    count += 1;
+                }
+            }
+            count
+        });
+    });
+    c.bench_function("micro_typos_frizbee", |b| {
+        let m = FrizbeeMatcher::default().case(CaseMatching::Smart).max_typos(Some(1));
+        b.iter(|| {
+            let mut count = 0u64;
+            for line in &lines {
+                if m.fuzzy_indices(line, "test").is_some() {
+                    count += 1;
+                }
+            }
+            count
+        });
+    });
+    c.bench_function("micro_arinae", |b| {
+        let m = ArinaeMatcher::new(CaseMatching::Smart, false);
+        b.iter(|| {
+            let mut count = 0u64;
+            for line in &lines {
+                if m.fuzzy_indices(line, "test").is_some() {
+                    count += 1;
+                }
+            }
+            count
+        });
+    });
+    c.bench_function("micro_arinae_range", |b| {
+        let m = ArinaeMatcher::new(CaseMatching::Smart, false);
+        b.iter(|| {
+            let mut count = 0u64;
+            for line in &lines {
+                if m.fuzzy_match_range(line, "test").is_some() {
+                    count += 1;
+                }
+            }
+            count
+        });
+    });
+    c.bench_function("micro_arinae_score", |b| {
+        let m = ArinaeMatcher::new(CaseMatching::Smart, false);
+        b.iter(|| {
+            let mut count = 0u64;
+            for line in &lines {
+                if m.fuzzy_match(line, "test").is_some() {
+                    count += 1;
+                }
+            }
+            count
+        });
+    });
+    c.bench_function("micro_typos_arinae", |b| {
+        let m = ArinaeMatcher::new(CaseMatching::Smart, true);
+        b.iter(|| {
+            let mut count = 0u64;
+            for line in &lines {
+                if m.fuzzy_indices(line, "test").is_some() {
+                    count += 1;
+                }
+            }
+            count
+        });
+    });
+    c.bench_function("micro_typos_arinae_range", |b| {
+        let m = ArinaeMatcher::new(CaseMatching::Smart, true);
+        b.iter(|| {
+            let mut count = 0u64;
+            for line in &lines {
+                if m.fuzzy_match_range(line, "test").is_some() {
+                    count += 1;
+                }
+            }
+            count
+        });
+    });
+    c.bench_function("micro_typos_arinae_score", |b| {
+        let m = ArinaeMatcher::new(CaseMatching::Smart, true);
+        b.iter(|| {
+            let mut count = 0u64;
+            for line in &lines {
+                if m.fuzzy_match(line, "test").is_some() {
+                    count += 1;
+                }
+            }
+            count
+        });
+    });
+}
+
+criterion_group!(benches, bench_matcher);
+criterion_main!(benches);
diff --git a/benches/read_and_match.rs b/benches/read_and_match.rs
index 67bbcc35..c7a402a5 100644
--- a/benches/read_and_match.rs
+++ b/benches/read_and_match.rs
@@ -28,6 +28,60 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.to_async(rt)
             .iter(async || wait_until_done(SkimOptionsBuilder::default().query("test").build().unwrap()).await);
     });
+    c.bench_function("query_frizbee", |b| {
+        let rt = tokio::runtime::Runtime::new().unwrap();
+        b.to_async(rt).iter(async || {
+            wait_until_done(
+                SkimOptionsBuilder::default()
+                    .query("test")
+                    .algorithm(FuzzyAlgorithm::Frizbee)
+                    .no_typos(true)
+                    .build()
+                    .unwrap(),
+            )
+            .await
+        });
+    });
+    c.bench_function("query_ari", |b| {
+        let rt = tokio::runtime::Runtime::new().unwrap();
+        b.to_async(rt).iter(async || {
+            wait_until_done(
+                SkimOptionsBuilder::default()
+                    .query("test")
+                    .algorithm(FuzzyAlgorithm::Arinae)
+                    .no_typos(true)
+                    .build()
+                    .unwrap(),
+            )
+            .await
+        });
+    });
+    c.bench_function("query_frizbee_typos", |b| {
+        let rt = tokio::runtime::Runtime::new().unwrap();
+        b.to_async(rt).iter(async || {
+            wait_until_done(
+                SkimOptionsBuilder::default()
+                    .query("test")
+                    .algorithm(FuzzyAlgorithm::Frizbee)
+                    .build()
+                    .unwrap(),
+            )
+            .await
+        });
+    });
+    c.bench_function("query_ari_typos", |b| {
+        let rt = tokio::runtime::Runtime::new().unwrap();
+        b.to_async(rt).iter(async || {
+            wait_until_done(
+                SkimOptionsBuilder::default()
+                    .query("test")
+                    .algorithm(FuzzyAlgorithm::Arinae)
+                    .build()
+                    .unwrap(),
+            )
+            .await
+        });
+    });
     c.bench_function("typing", |b| {
         let rt = tokio::runtime::Runtime::new().unwrap();
         b.to_async(rt).iter(async || {
@@ -46,7 +100,7 @@ fn criterion_benchmark(c: &mut Criterion) {
                     } else {
                         done_since = 1;
                     }
-                    if sent && done_since > 5 {
+                    if sent && done_since > 50 {
                         s.send(Event::Action(Action::Accept(None))).await?;
                     } else if !sent {
                         s.send(Event::Action(Action::AddChar('t'))).await?;
diff --git a/man/man1/sk.1 b/man/man1/sk.1
index d94bb306..1d5ef768 100644
--- a/man/man1/sk.1
+++ b/man/man1/sk.1
@@ -100,7 +100,9 @@ clangd: Clangd fuzzy matching algorithm
 .IP \(bu 2
 fzy: Fzy matching algorithm (https://github.com/jhawthorn/fzy)
 .IP \(bu 2
-frizbee: Frizbee matching algorithm, typo resistant Will fallback to SkimV2 if the feature is not enabled
+frizbee: Frizbee matching algorithm, typo resistant
+.IP \(bu 2
+arinae: Arinae: typo\-resistant & natural algorithm
 .RE
 .TP
 \fB\-\-case\fR \fI<CASE>\fR [default: smart]
diff --git a/shell/completion.bash b/shell/completion.bash
index d1b3daff..7cb1e7f7 100644
--- a/shell/completion.bash
+++ b/shell/completion.bash
@@ -62,7 +62,7 @@ _sk() {
                     return 0
                     ;;
                 --algo)
-                    COMPREPLY=($(compgen -W "skim_v1 skim_v2 clangd fzy frizbee" -- "${cur}"))
+                    COMPREPLY=($(compgen -W "skim_v1 skim_v2 clangd fzy frizbee arinae" -- "${cur}"))
                     return 0
                     ;;
                 --case)
@@ -250,7 +250,7 @@ _sk() {
                     return 0
                     ;;
                 --flags)
-                    COMPREPLY=($(compgen -W "no-preview-pty" -- "${cur}"))
+                    COMPREPLY=($(compgen -W "no-preview-pty show-score" -- "${cur}"))
                     return 0
                     ;;
                 --hscroll-off)
diff --git a/shell/completion.fish b/shell/completion.fish
index 3296067b..eed04374 100644
--- a/shell/completion.fish
+++ b/shell/completion.fish
@@ -16,7 +16,8 @@ complete -c sk -l algo -d 'Fuzzy matching algorithm' -r -f -a "skim_v1\t'Origina
 skim_v2\t'Improved skim fuzzy matching algorithm (v2, default)'
 clangd\t'Clangd fuzzy matching algorithm'
 fzy\t'Fzy matching algorithm (https://github.com/jhawthorn/fzy)'
-frizbee\t'Frizbee matching algorithm, typo resistant Will fallback to SkimV2 if the feature is not enabled'"
+frizbee\t'Frizbee matching algorithm, typo resistant'
+arinae\t'Arinae: typo-resistant & natural algorithm'"
 complete -c sk -l case -d 'Case sensitivity' -r -f -a "respect\t'Case-sensitive matching'
 ignore\t'Case-insensitive matching'
 smart\t'Smart case: case-insensitive unless query contains uppercase'"
@@ -80,7 +81,8 @@ complete -c sk -l listen -d 'Run an IPC socket with optional name (defaults to s
 complete -c sk -l remote -d 'Send commands to an IPC socket with optional name (defaults to sk)' -r
 complete -c sk -l tmux -d 'Run in a tmux popup' -r
 complete -c sk -l log-file -d 'Pipe log output to a file' -r
-complete -c sk -l flags -d 'Feature flags' -r -f -a "no-preview-pty\t'Disable preview PTY on linux'"
+complete -c sk -l flags -d 'Feature flags' -r -f -a "no-preview-pty\t'Disable preview PTY on linux'
+show-score\t'Display the item\'s match score before its value in the item list (for matcher debugging)'"
 complete -c sk -l hscroll-off -r
 complete -c sk -l jump-labels -r
 complete -c sk -l scheme -r
diff --git a/shell/completion.nu b/shell/completion.nu
index 4f4302b3..a94c4af6 100644
--- a/shell/completion.nu
+++ b/shell/completion.nu
@@ -5,7 +5,7 @@ module completions {
   }
 
   def "nu-complete sk algorithm" [] {
-    [ "skim_v1" "skim_v2" "clangd" "fzy" "frizbee" ]
+    [ "skim_v1" "skim_v2" "clangd" "fzy" "frizbee" "arinae" ]
   }
 
   def "nu-complete sk case" [] {
@@ -29,7 +29,7 @@ module completions {
   }
 
   def "nu-complete sk flags" [] {
-    [ "no-preview-pty" ]
+    [ "no-preview-pty" "show-score" ]
   }
 
   # Fuzzy Finder in rust!
diff --git a/shell/completion.zsh b/shell/completion.zsh
index 85618263..c7a8e670 100644
--- a/shell/completion.zsh
+++ b/shell/completion.zsh
@@ -27,7 +27,8 @@ _sk() {
 skim_v2\:"Improved skim fuzzy matching algorithm (v2, default)"
 clangd\:"Clangd fuzzy matching algorithm"
 fzy\:"Fzy matching algorithm (https\://github.com/jhawthorn/fzy)"
-frizbee\:"Frizbee matching algorithm, typo resistant Will fallback to SkimV2 if the feature is not enabled"))' \
+frizbee\:"Frizbee matching algorithm, typo resistant"
+arinae\:"Arinae\: typo-resistant & natural algorithm"))' \
 '--case=[Case sensitivity]:CASE:((respect\:"Case-sensitive matching"
 ignore\:"Case-insensitive matching"
 smart\:"Smart case\: case-insensitive unless query contains uppercase"))' \
@@ -83,7 +84,8 @@ zsh\:"Zsh"))' \
 '--remote=[Send commands to an IPC socket with optional name (defaults to sk)]::REMOTE:_default' \
 '--tmux=[Run in a tmux popup]::TMUX:_default' \
 '--log-file=[Pipe log output to a file]:LOG_FILE:_default' \
-'*--flags=[Feature flags]:FLAGS:((no-preview-pty\:"Disable preview PTY on linux"))' \
+'*--flags=[Feature flags]:FLAGS:((no-preview-pty\:"Disable preview PTY on linux"
+show-score\:"Display the item'\''s match score before its value in the item list (for matcher debugging)"))' \
 '--hscroll-off=[]:HSCROLL_OFF:_default' \
 '--jump-labels=[]:JUMP_LABELS:_default' \
 '--scheme=[]:SCHEME:_default' \
diff --git a/src/engine/andor.rs b/src/engine/andor.rs
index 34d8c75b..bf6b06b1 100644
--- a/src/engine/andor.rs
+++ b/src/engine/andor.rs
@@ -1,5 +1,6 @@
 use std::fmt::{Display, Error, Formatter};
 
+use crate::fuzzy_matcher::MatchIndices;
 use crate::{MatchEngine, MatchRange, MatchResult, SkimItem};
 
 //------------------------------------------------------------------------------
@@ -70,7 +71,7 @@ impl AndEngine {
     }
 
     fn merge_matched_items(&self, items: Vec<MatchResult>, text: &str) -> MatchResult {
-        let mut ranges = vec![];
+        let mut ranges = MatchIndices::new();
         let mut rank = items[0].rank;
         for item in items {
             match item.matched_range {
@@ -78,7 +79,7 @@ impl AndEngine {
                     ranges.extend(item.range_char_indices(text));
                 }
                 MatchRange::Chars(vec) => {
-                    ranges.extend(vec.iter());
+                    ranges.extend(vec.iter().copied());
                 }
             }
             rank.score = rank.score.max(item.rank.score);
diff --git a/src/engine/factory.rs b/src/engine/factory.rs
index 57e6087e..a6a0bdcb 100644
--- a/src/engine/factory.rs
+++ b/src/engine/factory.rs
@@ -19,6 +19,7 @@ pub struct ExactOrFuzzyEngineFactory {
     fuzzy_algorithm: FuzzyAlgorithm,
     rank_builder: Arc<RankBuilder>,
     typos: Typos,
+    filter_mode: bool,
 }
 
 impl ExactOrFuzzyEngineFactory {
@@ -29,6 +30,7 @@ impl ExactOrFuzzyEngineFactory {
             fuzzy_algorithm: FuzzyAlgorithm::SkimV2,
             rank_builder: Default::default(),
             typos: Typos::Disabled,
+            filter_mode: false,
         }
     }
 
@@ -60,6 +62,12 @@ impl ExactOrFuzzyEngineFactory {
         self
     }
 
+    /// Sets filter mode (skips per-character match indices for faster matching)
+    pub fn filter_mode(mut self, filter_mode: bool) -> Self {
+        self.filter_mode = filter_mode;
+        self
+    }
+
     /// Builds the factory (currently a no-op, returns self)
     pub fn build(self) -> Self {
         self
@@ -126,6 +134,7 @@ impl MatchEngineFactory for ExactOrFuzzyEngineFactory {
                     .algorithm(self.fuzzy_algorithm)
                     .case(case)
                     .typos(self.typos)
+                    .filter_mode(self.filter_mode)
                     .rank_builder(self.rank_builder.clone())
                     .build(),
             )
diff --git a/src/engine/fuzzy.rs b/src/engine/fuzzy.rs
index e8d364af..964b5be8 100644
--- a/src/engine/fuzzy.rs
+++ b/src/engine/fuzzy.rs
@@ -2,10 +2,10 @@ use std::cmp::min;
 use std::fmt::{Display, Error, Formatter};
 use std::sync::Arc;
 
+use crate::fuzzy_matcher::MatchIndices;
+use crate::fuzzy_matcher::arinae::ArinaeMatcher;
 use crate::fuzzy_matcher::frizbee::FrizbeeMatcher;
-use crate::fuzzy_matcher::{
-    FuzzyMatcher, IndexType, ScoreType, clangd::ClangdMatcher, fzy::FzyMatcher, skim::SkimMatcherV2,
-};
+use crate::fuzzy_matcher::{FuzzyMatcher, clangd::ClangdMatcher, fzy::FzyMatcher, skim::SkimMatcherV2};
 
 use crate::item::RankBuilder;
 use crate::{CaseMatching, MatchEngine, Typos};
@@ -27,8 +27,10 @@ pub enum FuzzyAlgorithm {
     /// Fzy matching algorithm (https://github.com/jhawthorn/fzy)
     Fzy,
     /// Frizbee matching algorithm, typo resistant
-    /// Will fallback to SkimV2 if the feature is not enabled
     Frizbee,
+    /// Arinae: typo-resistant & natural algorithm
+    #[cfg_attr(feature = "cli", clap(alias = "ari"))]
+    Arinae,
 }
 
 const BYTES_1M: usize = 1024 * 1024 * 1024;
@@ -46,6 +48,10 @@ pub struct FuzzyEngineBuilder {
     /// - `Typos::Smart`: adaptive (pattern_length / 4)
     /// - `Typos::Fixed(n)`: exactly n typos allowed
     typos: Typos,
+    /// When true, use `fuzzy_match_range` instead of `fuzzy_indices` to avoid
+    /// per-character index computation (useful in filter mode where highlighting
+    /// is not needed).
+    filter_mode: bool,
 }
 
 impl FuzzyEngineBuilder {
@@ -74,6 +80,11 @@ impl FuzzyEngineBuilder {
         self
     }
 
+    pub fn filter_mode(mut self, filter_mode: bool) -> Self {
+        self.filter_mode = filter_mode;
+        self
+    }
+
     /// Compute the effective max_typos for the given query.
     ///
     /// - `Typos::Disabled` → `None` (no typo tolerance)
@@ -129,12 +140,20 @@ impl FuzzyEngineBuilder {
                 debug!("Initialized Fzy algorithm (max_typos: {:?})", max_typos);
                 Box::new(matcher)
             }
+            FuzzyAlgorithm::Arinae => {
+                let mut matcher = ArinaeMatcher::default();
+                matcher.case = self.case;
+                matcher.allow_typos = !matches!(self.typos, Typos::Disabled);
+                debug!("Initialized Arinae algorithm");
+                Box::new(matcher)
+            }
         };
 
         FuzzyEngine {
             matcher,
             query: self.query,
             rank_builder: self.rank_builder,
+            filter_mode: self.filter_mode,
         }
     }
 }
@@ -144,6 +163,7 @@ pub struct FuzzyEngine {
     query: String,
     matcher: Box<dyn FuzzyMatcher>,
     rank_builder: Arc<RankBuilder>,
+    filter_mode: bool,
 }
 
 impl FuzzyEngine {
@@ -151,58 +171,92 @@ impl FuzzyEngine {
     pub fn builder() -> FuzzyEngineBuilder {
         FuzzyEngineBuilder::default()
     }
-
-    fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
-        if pattern.is_empty() {
-            return Some((0, Vec::new()));
-        } else if choice.is_empty() {
-            return None;
-        }
-
-        self.matcher.fuzzy_indices(choice, pattern)
-    }
 }
 
 impl MatchEngine for FuzzyEngine {
     fn match_item(&self, item: &dyn SkimItem) -> Option<MatchResult> {
-        // iterate over all matching fields:
-        let mut matched_result = None;
         let item_text = item.text();
         let default_range = [(0, item_text.len())];
-        for &(start, end) in item.get_matching_ranges().unwrap_or(&default_range) {
-            let start = min(start, item_text.len());
-            let end = min(end, item_text.len());
-            matched_result = self.fuzzy_match(&item_text[start..end], &self.query).map(|(s, vec)| {
-                if start != 0 {
-                    let start_char = &item_text[..start].chars().count();
-                    (s, vec.iter().map(|x| x + start_char).collect())
+
+        if self.filter_mode {
+            // Fast path: use fuzzy_match_range to avoid per-character index computation
+            let mut best: Option<(i64, usize, usize)> = None;
+            for &(start, end) in item.get_matching_ranges().unwrap_or(&default_range) {
+                let start = min(start, item_text.len());
+                let end = min(end, item_text.len());
+
+                let result = if self.query.is_empty() {
+                    Some((0i64, 0, 0))
+                } else if item_text[start..end].is_empty() {
+                    None
                 } else {
-                    (s, vec)
-                }
-            });
+                    self.matcher
+                        .fuzzy_match_range(&item_text[start..end], &self.query)
+                        .map(|(s, b, e)| {
+                            let offset = if start != 0 {
+                                item_text[..start].chars().count()
+                            } else {
+                                0
+                            };
+                            (s, b + offset, e + offset)
+                        })
+                };
 
-            if matched_result.is_some() {
-                break;
+                if result.is_some() {
+                    best = result;
+                    break;
+                }
             }
-        }
 
-        let (score, matched_range) = matched_result?;
+            let (score, begin, end) = best?;
+            let item_len = item_text.len();
+            Some(MatchResult {
+                rank: self
+                    .rank_builder
+                    .build_rank(score as i32, begin, end, item_len, item.get_index()),
+                matched_range: MatchRange::ByteRange(begin, end),
+            })
+        } else {
+            let mut matched_result = None;
+            for &(start, end) in item.get_matching_ranges().unwrap_or(&default_range) {
+                let start = min(start, item_text.len());
+                let end = min(end, item_text.len());
+
+                let result = if self.query.is_empty() {
+                    Some((0i64, MatchIndices::new()))
+                } else if item_text[start..end].is_empty() {
+                    None
+                } else {
+                    self.matcher.fuzzy_indices(&item_text[start..end], &self.query)
+                };
 
-        let begin = *matched_range.first().unwrap_or(&0);
-        let end = *matched_range.last().unwrap_or(&0);
+                matched_result = result.map(|(s, vec)| {
+                    if start != 0 {
+                        let start_char = item_text[..start].chars().count();
+                        (s, vec.iter().map(|x| x + start_char).collect::<MatchIndices>())
+                    } else {
+                        (s, vec)
+                    }
+                });
 
-        let item_len = item_text.len();
+                if matched_result.is_some() {
+                    break;
+                }
+            }
 
-        // Use individual character indices for highlighting instead of byte range
-        // This allows each matched character to be highlighted individually
-        let matched_range = MatchRange::Chars(matched_range);
+            let (score, matched_indices) = matched_result?;
+            let begin = *matched_indices.first().unwrap_or(&0);
+            let end = *matched_indices.last().unwrap_or(&0);
+            let item_len = item_text.len();
+            let matched_range = MatchRange::Chars(matched_indices);
 
-        Some(MatchResult {
-            rank: self
-                .rank_builder
-                .build_rank(score as i32, begin, end, item_len, item.get_index()),
-            matched_range,
-        })
+            Some(MatchResult {
+                rank: self
+                    .rank_builder
+                    .build_rank(score as i32, begin, end, item_len, item.get_index()),
+                matched_range,
+            })
+        }
     }
 }
 
diff --git a/src/engine/split.rs b/src/engine/split.rs
index 068bd679..cca745c5 100644
--- a/src/engine/split.rs
+++ b/src/engine/split.rs
@@ -3,6 +3,7 @@
 //! This engine splits both the query and item text on a delimiter character, then matches
 //! the query parts against the corresponding item parts.
 
+use crate::fuzzy_matcher::MatchIndices;
 use crate::{MatchEngine, MatchEngineFactory, MatchRange, MatchResult, SkimItem};
 use std::fmt::{Display, Error, Formatter};
 
@@ -51,7 +52,7 @@ impl MatchEngine for SplitMatchEngine {
         // Combine the results - use rank from first result (like AndEngine does)
         let rank = before_result.rank;
 
-        let mut combined_indices: Vec<usize> = match before_result.matched_range {
+        let mut combined_indices: MatchIndices = match before_result.matched_range {
             MatchRange::Chars(indices) => indices,
             MatchRange::ByteRange(start, end) => {
                 // Convert byte range to char indices for the before part
@@ -67,7 +68,7 @@ impl MatchEngine for SplitMatchEngine {
         // Offset for the "after" part: delimiter_char_idx + 1 (to skip the delimiter)
         let offset = delimiter_char_idx + 1;
 
-        let after_indices: Vec<usize> = match after_result.matched_range {
+        let after_indices: MatchIndices = match after_result.matched_range {
             MatchRange::Chars(indices) => indices.into_iter().map(|i| i + offset).collect(),
             MatchRange::ByteRange(start, end) => {
                 // Convert byte range to char indices for the after part
diff --git a/src/engine/util.rs b/src/engine/util.rs
index eb42f9b9..c2ca7d3c 100644
--- a/src/engine/util.rs
+++ b/src/engine/util.rs
@@ -1,3 +1,4 @@
+use crate::fuzzy_matcher::MatchIndices;
 use regex::Regex;
 use unicode_normalization::UnicodeNormalization;
 
@@ -26,7 +27,7 @@ pub fn normalize_with_char_mapping(s: &str) -> (String, Vec<usize>) {
 ///
 /// Given indices into a normalized string and the char mapping from normalize_with_char_mapping,
 /// returns the corresponding indices in the original string.
-pub fn map_char_indices_to_original(normalized_indices: &[usize], char_mapping: &[usize]) -> Vec<usize> {
+pub fn map_char_indices_to_original(normalized_indices: &[usize], char_mapping: &[usize]) -> MatchIndices {
     normalized_indices
         .iter()
         .filter_map(|&idx| char_mapping.get(idx).copied())
diff --git a/src/fuzzy_matcher/arinae/algo.rs b/src/fuzzy_matcher/arinae/algo.rs
new file mode 100644
index 00000000..9df907a9
--- /dev/null
+++ b/src/fuzzy_matcher/arinae/algo.rs
@@ -0,0 +1,573 @@
+//! Arinae's algo itself
+
+use std::cell::RefCell;
+
+use thread_local::ThreadLocal;
+
+use crate::fuzzy_matcher::{IndexType, MatchIndices};
+
+use super::banding::{compute_banding, typo_vband_row};
+use super::constants::*;
+use super::{Atom, CELL_ZERO, Cell, Dir, SWMatrix, Score};
+
+/// Core cell scoring kernel shared by both score-only and full DP.
+///
+/// Computes the best score and direction for a single DP cell from its
+/// three neighbours (diagonal, up, left). The caller is responsible for
+/// fetching the neighbour values from whatever storage layout it uses.
+///
+/// Returns `(best_score, direction)`. The direction is `Dir::None` when
+/// `best_score <= 0`.
+///
+/// This function is written in a branchless style: all scoring arithmetic
+/// uses `bool as Score` multipliers and `max` instead of if/else, and the
+/// final direction is selected via a branchless cascade of conditional moves.
+#[inline(always)]
+#[allow(clippy::too_many_arguments)]
+fn compute_cell<const ALLOW_TYPOS: bool>(
+    is_match: bool,
+    is_first: bool,
+    bonus_j: Score,
+    diag_score: Score,
+    diag_was_diag: bool,
+    up_score: Score,
+    left_score: Score,
+    left_was_diag: bool,
+) -> (Score, Dir) {
+    // --- Bonus (branchless) ---
+    // consecutive bonus added when diag_was_diag, first-char multiplier doubles the bonus.
+    // `bool as Score` is 0 or 1 — no branch.
+    let bonus = (bonus_j + CONSECUTIVE_BONUS * (diag_was_diag as Score)) * (1 + is_first as Score);
+
+    // --- DIAGONAL (branchless) ---
+    // Match path: diag_score + MATCH_BONUS + bonus, masked by is_match.
+    // Mismatch path (typos only): diag_score - MISMATCH_PENALTY, masked by !is_match.
+    let match_val = (diag_score + MATCH_BONUS + bonus) * (is_match as Score);
+    let mismatch_val = if ALLOW_TYPOS {
+        (diag_score - MISMATCH_PENALTY) * (!is_match as Score)
+    } else {
+        0
+    };
+    let diag_val = match_val + mismatch_val;
+
+    // --- UP (skip pattern char, typos only — const-generic elides entirely) ---
+    let up_val = if ALLOW_TYPOS { up_score - TYPO_PENALTY } else { 0 };
+
+    // --- LEFT (skip choice char, branchless gap penalty) ---
+    // GAP_OPEN when left_was_diag, GAP_EXTEND otherwise.
+    // pen = GAP_EXTEND + (GAP_OPEN - GAP_EXTEND) * left_was_diag
+    let left_val = left_score - (GAP_EXTEND + (GAP_OPEN - GAP_EXTEND) * (left_was_diag as Score));
+
+    // --- Best score (branchless max chain) ---
+    let best = diag_val.max(up_val).max(left_val);
+
+    // --- Direction (branchless select) ---
+    // We encode direction as a u8 and build it without branches.
+    // Priority: Diag > Up > Left > None (when best <= 0).
+    //
+    // Start with Left (2), override with Up if up wins, override with Diag
+    // if diag wins, override with None if best <= 0.
+    // For exact mode (ALLOW_TYPOS=false), Diag is only valid when is_match.
+    let diag_wins = if ALLOW_TYPOS {
+        diag_val >= up_val && diag_val >= left_val
+    } else {
+        is_match && diag_val >= left_val
+    };
+    let up_wins = ALLOW_TYPOS && !diag_wins && up_val >= left_val;
+
+    // Branchless cascade: select dir as integer.
+    // Dir encoding: None=0, Diag=1, Up=2, Left=3.
+    // Base is Left(3); subtract 1 if Up wins, subtract 2 if Diag wins.
+    let dir_bits: u8 = Dir::Left as u8 - (up_wins as u8) - (diag_wins as u8) * 2;
+    // If best <= 0, force Dir::None (0) — achieved by ANDing with all-zeros.
+    let positive = best > 0;
+    // When positive: dir_bits; when not: 0 (Dir::None).
+    let dir_val = dir_bits & (positive as u8).wrapping_neg();
+
+    // SAFETY: dir_val is in 0..=3 because of the construction above.
+    let dir: Dir = unsafe { std::mem::transmute(dir_val) };
+
+    (best, dir)
+}
+
+// ---------------------------------------------------------------------------
+// Full DP with traceback — packed Cell (u32 = score + dir)
+// ---------------------------------------------------------------------------
+
+/// Full DP for byte slices using packed cells.
+///
+/// Implements two pruning strategies:
+///
+/// 1. **Row-range banding** – for each row `i` only compute columns
+///    `j_lo..=j_hi` that can participate in a valid alignment.
+///    - Exact mode: bounded by precomputed first/last match columns.
+///    - Typo mode: bounded by diagonal ± bandwidth.
+///
+/// 2. **Interpair max-score pruning** – after processing a row, if no
+///    column produced a non-zero score, all active alignments for this
+///    and subsequent rows are dead (since UP/LEFT can only propagate
+///    existing scores). We track this and allow early termination.
+pub(super) fn full_dp<const ALLOW_TYPOS: bool, const COMPUTE_INDICES: bool, C: Atom>(
+    cho: &[C],
+    pat: &[C],
+    bonuses: &[Score],
+    respect_case: bool,
+    full_buf: &ThreadLocal<RefCell<SWMatrix>>,
+    indices_buf: &ThreadLocal<RefCell<MatchIndices>>,
+) -> Option<(Score, MatchIndices)> {
+    let n = pat.len();
+    let m = cho.len();
+
+    let banding = compute_banding::<ALLOW_TYPOS, C>(pat, cho, respect_case)?;
+    let j_start = banding.j_first; // earliest match — skip columns before this
+
+    // Column offset: the matrix stores only columns from j_start onward.
+    // Matrix column 0 is the left wall (all zeros); matrix column `jm`
+    // corresponds to original 1-indexed column `j = jm + j_start - 1`.
+    let col_off = j_start - 1; // subtract from original j to get matrix col
+    let mcols = m - col_off + 1; // matrix columns: 0 ..= (m - col_off)
+
+    let mut buf = full_buf
+        .get_or(|| RefCell::new(SWMatrix::zero(n + 1, mcols)))
+        .borrow_mut();
+    buf.resize(n + 1, mcols);
+
+    // Hoist pointer and stride before initialization to use raw access.
+    let base_ptr = buf.data.as_mut_ptr();
+    let cols = buf.cols;
+
+    // Initialize row 0 to CELL_ZERO (all-zero bytes: score=0, dir=None=0).
+    // Column 0 of each subsequent row is also CELL_ZERO.
+    // SAFETY: base_ptr points to a valid allocation of (n+1)*cols Cells.
+    unsafe {
+        // Row 0: mcols contiguous Cells starting at base_ptr.
+        std::ptr::write_bytes(base_ptr, 0, mcols);
+        // Column 0 of rows 1..=n: one Cell per row, stride = cols.
+        for i in 1..=n {
+            *base_ptr.add(i * cols) = CELL_ZERO;
+        }
+    }
+
+    // base_ptr and cols already set above
+
+    // Pre-extract row bounds once (avoids repeated unwrap inside the loop).
+    // For exact mode we copy the arrays out; for typo mode these are unused.
+    let (row_lo_arr, row_hi_arr) = if !ALLOW_TYPOS {
+        let (lo, hi) = banding.row_bounds.as_ref().unwrap();
+        (*lo, *hi)
+    } else {
+        ([0usize; MAX_PAT_LEN], [0usize; MAX_PAT_LEN])
+    };
+
+    // Hoist invariant pointers outside the row loop.
+    let cho_ptr = cho.as_ptr();
+    let bonuses_ptr = bonuses.as_ptr();
+
+    for i in 1..=n {
+        let pi = pat[i - 1];
+        let is_first = i == 1;
+
+        // --- Compute column bounds for this row (original 1-indexed space) ---
+        let (j_lo, j_hi) = typo_vband_row(i, m, banding.bandwidth, banding.j_first);
+
+        if j_lo > j_hi || j_lo > m {
+            // Entire row is outside the band. Only zero the cells the next
+            // row's Diag (reads [i][jm-1]) and Up (reads [i][jm]) will touch.
+            // Peek at the next row's bounds to limit work.
+            if i < n {
+                let (nj_lo, nj_hi) = if ALLOW_TYPOS {
+                    typo_vband_row(i + 1, m, banding.bandwidth, banding.j_first)
+                } else {
+                    (row_lo_arr[i], row_hi_arr[i])
+                };
+                let nj_lo = nj_lo.max(j_start);
+                if nj_lo <= nj_hi && nj_lo <= m {
+                    let njm_lo = nj_lo - col_off;
+                    let njm_hi = (nj_hi - col_off).min(mcols - 1);
+                    // Diag reads jm-1, Up reads jm → need [njm_lo-1 .. njm_hi].
+                    let zero_lo = njm_lo.saturating_sub(1);
+                    let zero_hi = njm_hi.min(mcols - 1);
+                    // SAFETY: row i is within the allocated matrix.
+                    unsafe {
+                        let row_ptr = base_ptr.add(i * cols);
+                        for k in zero_lo..=zero_hi {
+                            *row_ptr.add(k) = CELL_ZERO;
+                        }
+                    }
+                }
+            }
+            continue;
+        }
+
+        // Convert to matrix-local column indices (safe: j_lo >= j_start here).
+        let jm_lo = j_lo - col_off;
+        let jm_hi = j_hi - col_off;
+        let jm_max = mcols - 1; // last valid matrix column
+
+        // Zero only the boundary cells that Diag/Left/Up moves will read:
+        // - Cell at jm_lo-1: read by Left at jm_lo and Diag from next row.
+        // - Cell at jm_hi+1: read by Up from next row at jm_hi+1 (if in next band).
+        // SAFETY: indices are within the row's allocation.
+        unsafe {
+            let row_ptr = base_ptr.add(i * cols);
+            if jm_lo > 1 {
+                *row_ptr.add(jm_lo - 1) = CELL_ZERO;
+            }
+            if jm_hi < jm_max {
+                *row_ptr.add(jm_hi + 1) = CELL_ZERO;
+            }
+        }
+
+        // Get prev_row as immutable slice, cur_row as mutable slice.
+        // SAFETY: i >= 1 so rows i-1 and i are distinct; each row is
+        // cols-aligned inside the contiguous data vec. base_ptr/cols are
+        // hoisted outside the loop.
+        let (prev_row, cur_row) = unsafe {
+            let pr = std::slice::from_raw_parts(base_ptr.add((i - 1) * cols), cols);
+            let cr = std::slice::from_raw_parts_mut(base_ptr.add(i * cols), cols);
+            (pr, cr)
+        };
+
+        // Hoist raw pointers for unchecked access inside the hot loop.
+        let prev_ptr = prev_row.as_ptr();
+        let cur_ptr = cur_row.as_mut_ptr();
+
+        for j in j_lo..=j_hi {
+            let jm = j - col_off; // matrix column
+            // SAFETY: j and jm are inside the band and within array bounds.
+            let cj = unsafe { *cho_ptr.add(j - 1) };
+            let is_match = pi.eq(cj, respect_case);
+
+            // Fetch neighbour values from the matrix.
+            let diag_cell = unsafe { *prev_ptr.add(jm - 1) };
+            let up_score = if ALLOW_TYPOS {
+                let up_cell = unsafe { *prev_ptr.add(jm) };
+                up_cell.score()
+            } else {
+                0
+            };
+            let left_cell = unsafe { *cur_ptr.add(jm - 1) };
+
+            let (best, dir) = compute_cell::<ALLOW_TYPOS>(
+                is_match,
+                is_first,
+                unsafe { *bonuses_ptr.add(j - 1) },
+                diag_cell.score(),
+                diag_cell.is_diag(),
+                up_score,
+                left_cell.score(),
+                left_cell.is_diag(),
+            );
+
+            unsafe {
+                *cur_ptr.add(jm) = Cell::new(best, dir);
+            }
+        }
+    }
+
+    // --- Find best score in the last row (row n) ---
+    // Moved out of the inner loop to eliminate the `i == n` branch per cell.
+    let mut best_score: Score = 0;
+    let mut best_j = 0usize; // stored in original 1-indexed space
+    {
+        let (last_j_lo, last_j_hi) = if ALLOW_TYPOS {
+            typo_vband_row(n, m, banding.bandwidth, banding.j_first)
+        } else {
+            (row_lo_arr[n - 1], row_hi_arr[n - 1])
+        };
+        let last_j_lo = last_j_lo.max(j_start);
+        if last_j_lo <= last_j_hi && last_j_lo <= m {
+            let last_row_ptr = unsafe { base_ptr.add(n * cols) };
+            for j in last_j_lo..=last_j_hi {
+                let jm = j - col_off;
+                let s = unsafe { (*last_row_ptr.add(jm)).score() };
+                // Branchless max: update best_score and best_j together.
+                let better = s > best_score;
+                // Use conditional moves instead of a branch.
+                best_score = if better { s } else { best_score };
+                best_j = if better { j } else { best_j };
+            }
+        }
+    }
+
+    if best_score <= 0 {
+        return None;
+    }
+
+    if COMPUTE_INDICES {
+        // Traceback — j walks in original 1-indexed space, convert to matrix
+        // column for buf access; output indices in original 0-indexed space.
+        // Reuse a thread-local Vec to avoid per-call allocation.
+        let indices_ref_cell = indices_buf.get_or(|| RefCell::new(Vec::new()));
+        let mut indices_ref = indices_ref_cell.borrow_mut();
+        indices_ref.clear();
+        let mut i = n;
+        let mut j = best_j;
+        let mut true_matches = 0usize;
+
+        while i > 0 && j >= j_start {
+            let jm = j - col_off;
+            // SAFETY: jm and i are within the matrix bounds established above.
+            let c = unsafe { *base_ptr.add(i * cols).add(jm) };
+            match c.dir() {
+                Dir::Diag => {
+                    if pat[i - 1].eq(cho[j - 1], respect_case) {
+                        indices_ref.push((j - 1) as IndexType);
+                        true_matches += 1;
+                    }
+                    i -= 1;
+                    j -= 1;
+                }
+                Dir::Up => {
+                    i -= 1;
+                }
+                Dir::Left => {
+                    j -= 1;
+                }
+                Dir::None => break,
+            }
+        }
+
+        if true_matches < banding.min_true_matches {
+            return None;
+        }
+
+        // Traceback produces indices in reverse order; reverse is O(n)
+        // vs sort_unstable's O(n log n).
+        indices_ref.reverse();
+
+        // Move ownership out of the thread-local buffer by cloning the vec's
+        // contents into a fresh Vec (cheap since MatchIndices is Vec<usize>),
+        // but avoid an extra clone by using `to_vec()` which reallocates once.
+        let out = indices_ref.to_vec();
+        Some((best_score, out))
+    } else {
+        Some((best_score, Vec::default()))
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Range DP — full matrix, minimal traceback (begin + end only)
+// ---------------------------------------------------------------------------
+
+/// Full matrix DP followed by a traceback that only records the first and
+/// last matched positions (not every index). Used by `fuzzy_match_range` to
+/// avoid allocating and populating the full index vec when only the span is
+/// needed.
+pub(super) fn range_dp<const ALLOW_TYPOS: bool, C: Atom>(
+    cho: &[C],
+    pat: &[C],
+    bonuses: &[Score],
+    respect_case: bool,
+    full_buf: &ThreadLocal<RefCell<SWMatrix>>,
+) -> Option<(Score, usize, usize)> {
+    let n = pat.len();
+    let m = cho.len();
+
+    let banding = compute_banding::<ALLOW_TYPOS, C>(pat, cho, respect_case)?;
+    let j_start = banding.j_first;
+    let col_off = j_start - 1;
+    let mcols = m - col_off + 1;
+
+    let mut buf = full_buf
+        .get_or(|| RefCell::new(SWMatrix::zero(n + 1, mcols)))
+        .borrow_mut();
+    buf.resize(n + 1, mcols);
+
+    let base_ptr = buf.data.as_mut_ptr();
+    let cols = buf.cols;
+
+    // Initialize row 0 to CELL_ZERO (all-zero bytes: score=0, dir=None=0).
+    // Column 0 of each subsequent row is also CELL_ZERO.
+    // SAFETY: base_ptr points to a valid allocation of (n+1)*cols Cells.
+    unsafe {
+        std::ptr::write_bytes(base_ptr, 0, mcols);
+        for i in 1..=n {
+            *base_ptr.add(i * cols) = CELL_ZERO;
+        }
+    }
+
+    let (row_lo_arr, row_hi_arr) = if !ALLOW_TYPOS {
+        let (lo, hi) = banding.row_bounds.as_ref().unwrap();
+        (*lo, *hi)
+    } else {
+        ([0usize; MAX_PAT_LEN], [0usize; MAX_PAT_LEN])
+    };
+
+    let cho_ptr = cho.as_ptr();
+    let bonuses_ptr = bonuses.as_ptr();
+    let mut dead_rows = 0u32;
+
+    for i in 1..=n {
+        let pi = pat[i - 1];
+        let is_first = i == 1;
+
+        let (j_lo, j_hi) = if ALLOW_TYPOS {
+            typo_vband_row(i, m, banding.bandwidth, banding.j_first)
+        } else {
+            (row_lo_arr[i - 1], row_hi_arr[i - 1])
+        };
+        let j_lo = j_lo.max(j_start);
+
+        if j_lo > j_hi || j_lo > m {
+            if i < n {
+                let (nj_lo, nj_hi) = if ALLOW_TYPOS {
+                    typo_vband_row(i + 1, m, banding.bandwidth, banding.j_first)
+                } else {
+                    (row_lo_arr[i], row_hi_arr[i])
+                };
+                let nj_lo = nj_lo.max(j_start);
+                if nj_lo <= nj_hi && nj_lo <= m {
+                    let njm_lo = nj_lo - col_off;
+                    let njm_hi = (nj_hi - col_off).min(mcols - 1);
+                    let zero_lo = njm_lo.saturating_sub(1);
+                    let zero_hi = njm_hi.min(mcols - 1);
+                    unsafe {
+                        let row_ptr = base_ptr.add(i * cols);
+                        for k in zero_lo..=zero_hi {
+                            *row_ptr.add(k) = CELL_ZERO;
+                        }
+                    }
+                }
+            }
+            dead_rows += 1;
+            if dead_rows >= 2 {
+                return None;
+            }
+            continue;
+        }
+
+        let jm_lo = j_lo - col_off;
+        let jm_hi = j_hi - col_off;
+        let jm_max = mcols - 1;
+
+        unsafe {
+            let row_ptr = base_ptr.add(i * cols);
+            if jm_lo > 1 {
+                *row_ptr.add(jm_lo - 1) = CELL_ZERO;
+            }
+            if jm_hi < jm_max {
+                *row_ptr.add(jm_hi + 1) = CELL_ZERO;
+            }
+        }
+
+        let (prev_row, cur_row) = unsafe {
+            let pr = std::slice::from_raw_parts(base_ptr.add((i - 1) * cols), cols);
+            let cr = std::slice::from_raw_parts_mut(base_ptr.add(i * cols), cols);
+            (pr, cr)
+        };
+
+        let prev_ptr = prev_row.as_ptr();
+        let cur_ptr = cur_row.as_mut_ptr();
+
+        let mut row_positive = false;
+        for j in j_lo..=j_hi {
+            let jm = j - col_off;
+            let cj = unsafe { *cho_ptr.add(j - 1) };
+            let is_match = pi.eq(cj, respect_case);
+
+            let diag_cell = unsafe { *prev_ptr.add(jm - 1) };
+            let up_score = if ALLOW_TYPOS {
+                let up_cell = unsafe { *prev_ptr.add(jm) };
+                up_cell.score()
+            } else {
+                0
+            };
+            let left_cell = unsafe { *cur_ptr.add(jm - 1) };
+
+            let (best, dir) = compute_cell::<ALLOW_TYPOS>(
+                is_match,
+                is_first,
+                unsafe { *bonuses_ptr.add(j - 1) },
+                diag_cell.score(),
+                diag_cell.is_diag(),
+                up_score,
+                left_cell.score(),
+                left_cell.is_diag(),
+            );
+
+            row_positive |= best > 0;
+            unsafe {
+                *cur_ptr.add(jm) = Cell::new(best, dir);
+            }
+        }
+
+        if row_positive {
+            dead_rows = 0;
+        } else {
+            dead_rows += 1;
+            if dead_rows >= 2 {
+                return None;
+            }
+        }
+    }
+
+    // Find best score in the last row.
+    let mut best_score: Score = 0;
+    let mut best_j = 0usize;
+    {
+        let (last_j_lo, last_j_hi) = if ALLOW_TYPOS {
+            typo_vband_row(n, m, banding.bandwidth, banding.j_first)
+        } else {
+            (row_lo_arr[n - 1], row_hi_arr[n - 1])
+        };
+        let last_j_lo = last_j_lo.max(j_start);
+        if last_j_lo <= last_j_hi && last_j_lo <= m {
+            let last_row_ptr = unsafe { base_ptr.add(n * cols) };
+            for j in last_j_lo..=last_j_hi {
+                let jm = j - col_off;
+                let s = unsafe { (*last_row_ptr.add(jm)).score() };
+                let better = s > best_score;
+                best_score = if better { s } else { best_score };
+                best_j = if better { j } else { best_j };
+            }
+        }
+    }
+
+    if best_score <= 0 {
+        return None;
+    }
+
+    // Minimal traceback: walk back until we can go no further, recording
+    // only the final j (which becomes `begin`). `end` is best_j - 1.
+    let end_0 = best_j - 1; // 0-indexed end
+    let mut i = n;
+    let mut j = best_j;
+    let mut true_matches = 0usize;
+
+    while i > 0 && j >= j_start {
+        let jm = j - col_off;
+        let c = unsafe { *base_ptr.add(i * cols).add(jm) };
+        match c.dir() {
+            Dir::Diag => {
+                if pat[i - 1].eq(cho[j - 1], respect_case) {
+                    true_matches += 1;
+                }
+                i -= 1;
+                j -= 1;
+            }
+            Dir::Up => {
+                i -= 1;
+            }
+            Dir::Left => {
+                j -= 1;
+            }
+            Dir::None => break,
+        }
+    }
+
+    if true_matches < banding.min_true_matches {
+        return None;
+    }
+
+    // `j` after traceback is one step before the first matched column;
+    // the first match is at `j` (0-indexed: `j` since j is 1-indexed here
+    // but we stepped past it). We need the earliest index that was recorded.
+    // After the loop, j points to the column just before the alignment start,
+    // so begin = j (0-indexed) because the first Diag step decremented j before
+    // breaking. Re-scan the last row of the traceback to find begin precisely:
+    // We track the last diagonal j we visited.
+    let begin_0 = j; // j is 1-indexed after the last decrement; 0-indexed = j
+
+    Some((best_score, begin_0, end_0))
+}
diff --git a/src/fuzzy_matcher/arinae/atom.rs b/src/fuzzy_matcher/arinae/atom.rs
new file mode 100644
index 00000000..8a81ed90
--- /dev/null
+++ b/src/fuzzy_matcher/arinae/atom.rs
@@ -0,0 +1,90 @@
+//! Byte/Char helpers
+use super::Score;
+use super::constants::SEPARATOR_TABLE;
+use memchr::memchr;
+
+pub(super) trait Atom: PartialEq + Into<char> + Copy {
+    #[inline(always)]
+    fn eq(self, other: Self, respect_case: bool) -> bool
+    where
+        Self: PartialEq + Sized,
+    {
+        if respect_case {
+            self == other
+        } else {
+            self.eq_ignore_case(other)
+        }
+    }
+    fn eq_ignore_case(self, other: Self) -> bool;
+    fn is_lowercase(self) -> bool;
+
+    /// Return the index of the first occurrence of `self` in `haystack`,
+    /// or `None` if not found.
+    ///
+    /// Implementations may override this with a SIMD-backed search (e.g.
+    /// `memchr` for `u8` in case-sensitive mode).
+    #[inline]
+    fn find_first_in(self, haystack: &[Self], respect_case: bool) -> Option<usize> {
+        haystack.iter().position(|&c| self.eq(c, respect_case))
+    }
+    /// Return the word-separator bonus for this character, or `0` if it is not
+    /// a separator.  Uses a table lookup — a single bounds check replaces
+    /// several branches and the returned value encodes both *whether* the
+    /// character is a separator and *how much* bonus it carries.
+    #[inline(always)]
+    fn separator_bonus(self) -> Score {
+        let ch = self.into() as usize;
+        // For ch < 128 we do a table lookup; for ch >= 128 we return 0.
+        // The `get` returns None for out-of-range, and `copied().unwrap_or(0)` is
+        // typically compiled as a conditional move (branchless).
+        SEPARATOR_TABLE.get(ch).copied().unwrap_or(0)
+    }
+}
+
+impl Atom for u8 {
+    #[inline(always)]
+    fn eq_ignore_case(self, b: Self) -> bool {
+        self.eq_ignore_ascii_case(&b)
+    }
+    #[inline(always)]
+    fn is_lowercase(self) -> bool {
+        self.is_ascii_lowercase()
+    }
+
+    /// Case-sensitive search uses SIMD-backed `memchr`; case-insensitive
+    /// falls back to the generic scalar loop.
+    #[inline]
+    fn find_first_in(self, haystack: &[Self], respect_case: bool) -> Option<usize> {
+        if respect_case {
+            // SAFETY: `self` is a u8 and memchr searches for it in a byte slice.
+            memchr(self, haystack)
+        } else {
+            // Case-insensitive: compare lowercase. Also try the uppercase variant
+            // so a single `memchr` can be used for each case variant.
+            let lo = self.to_ascii_lowercase();
+            let hi = self.to_ascii_uppercase();
+            if lo == hi {
+                // No case distinction for this byte (digit, symbol, etc.).
+                memchr(lo, haystack)
+            } else {
+                // Check both variants and return the earliest occurrence.
+                let p_lo = memchr(lo, haystack);
+                let p_hi = memchr(hi, haystack);
+                match (p_lo, p_hi) {
+                    (None, x) | (x, None) => x,
+                    (Some(a), Some(b)) => Some(a.min(b)),
+                }
+            }
+        }
+    }
+}
+impl Atom for char {
+    #[inline(always)]
+    fn eq_ignore_case(self, b: Self) -> bool {
+        self.to_lowercase().eq(b.to_lowercase())
+    }
+    #[inline(always)]
+    fn is_lowercase(self) -> bool {
+        self.is_ascii_lowercase()
+    }
+}
diff --git a/src/fuzzy_matcher/arinae/banding.rs b/src/fuzzy_matcher/arinae/banding.rs
new file mode 100644
index 00000000..7bc655e2
--- /dev/null
+++ b/src/fuzzy_matcher/arinae/banding.rs
@@ -0,0 +1,92 @@
+//! Banding utils
+//! Banding is the process of calculating the pertinent parts of the matrix to our specific
+//! computation to avoid computing every cell
+
+use super::atom::Atom;
+use super::constants::*;
+use super::helpers::{compute_last_match_cols, compute_row_col_bounds, find_first_char};
+
+/// Precomputed banding information shared by both score-only and full DP.
+pub(super) struct BandingInfo {
+    /// Per-row column bounds (only present in exact mode).
+    pub(super) row_bounds: Option<([usize; MAX_PAT_LEN], [usize; MAX_PAT_LEN])>,
+    /// 1-indexed column of the first match of `pat[0]` in `cho`.
+    pub(super) j_first: usize,
+    /// Bandwidth for typo-mode diagonal banding (0 in exact mode).
+    pub(super) bandwidth: usize,
+    /// Minimum number of true (non-substitution) matches to accept.
+    pub(super) min_true_matches: usize,
+}
+
+/// Compute banding information for the DP. Returns `None` if the pattern
+/// cannot possibly match (e.g. a pattern character has no occurrence).
+pub(super) fn compute_banding<const ALLOW_TYPOS: bool, C: Atom>(
+    pat: &[C],
+    cho: &[C],
+    respect_case: bool,
+) -> Option<BandingInfo> {
+    let n = pat.len();
+    let m = cho.len();
+    let row_bounds;
+    let j_first;
+
+    if !ALLOW_TYPOS {
+        let fm = compute_first_match_cols(pat, cho, respect_case)?;
+        let lm = compute_last_match_cols(pat, cho, respect_case)?;
+        j_first = fm[0];
+        row_bounds = Some(compute_row_col_bounds(n, m, &fm, &lm));
+    } else {
+        j_first = find_first_char(pat, cho, respect_case)?;
+        row_bounds = None;
+    }
+
+    let bandwidth = if ALLOW_TYPOS { n + TYPO_BAND_SLACK } else { 0 };
+    let min_true_matches = if ALLOW_TYPOS { n.div_ceil(2) } else { 0 };
+
+    Some(BandingInfo {
+        row_bounds,
+        j_first,
+        bandwidth,
+        min_true_matches,
+    })
+}
+
+/// Row-major V-shaped band: compute column bounds at row `i`.
+///
+/// The result is an upper triangle starting at the diagonal (j ~ i + j_first - 1)
+#[inline(always)]
+pub(super) fn typo_vband_row(i: usize, m: usize, bandwidth: usize, j_first: usize) -> (usize, usize) {
+    let j = i + j_first - 1;
+    let lo = j.saturating_sub(bandwidth).max(j_first);
+
+    (lo, m)
+}
+
+/// For exact (non-typo) mode, compute the earliest column (1-indexed) at which
+/// each pattern character can first be matched. This tightens the diagonal
+/// lower bound so we never compute cells that cannot participate in a valid
+/// alignment.
+///
+/// Returns `None` if any pattern character has no match in the choice (the
+/// subsequence check should have caught this, but we guard anyway).
+fn compute_first_match_cols<C: Atom>(pat: &[C], cho: &[C], respect_case: bool) -> Option<[usize; MAX_PAT_LEN]> {
+    let n = pat.len();
+    // Patterns longer than MAX_PAT_LEN cannot be handled by the stack-allocated
+    // banding arrays.  Return None so the caller skips this choice gracefully.
+    if n > MAX_PAT_LEN {
+        return None;
+    }
+    let mut first = [0usize; MAX_PAT_LEN];
+    let mut start = 0usize; // search from this choice index onward
+    for i in 0..n {
+        let found = cho[start..].iter().position(|&c| pat[i].eq(c, respect_case));
+        match found {
+            Some(pos) => {
+                first[i] = start + pos + 1; // 1-indexed column
+                start = start + pos + 1; // next char must be strictly after
+            }
+            None => return None,
+        }
+    }
+    Some(first)
+}
diff --git a/src/fuzzy_matcher/arinae/constants.rs b/src/fuzzy_matcher/arinae/constants.rs
new file mode 100644
index 00000000..6406658d
--- /dev/null
+++ b/src/fuzzy_matcher/arinae/constants.rs
@@ -0,0 +1,55 @@
+// ---------------------------------------------------------------------------
+// Scoring constants
+// ---------------------------------------------------------------------------
+use super::Score;
+
+/// Points awarded for each correctly matched character.
+pub(super) const MATCH_BONUS: Score = 18;
+
+/// Extra bonus when the match is at position 0 of the choice string.
+pub(super) const START_OF_STRING_BONUS: Score = 16;
+
+/// Extra bonus for a camelCase transition.
+pub(super) const CAMEL_CASE_BONUS: Score = 6;
+
+/// Bonus for each additional consecutive matched character.
+pub(super) const CONSECUTIVE_BONUS: Score = 11;
+
+/// Cost to open a gap (skip characters in choice).
+pub(super) const GAP_OPEN: Score = 6;
+
+/// Cost to extend a gap by one more character.
+pub(super) const GAP_EXTEND: Score = 2;
+
+pub(super) const TYPO_PENALTY: Score = 8;
+
+/// Penalty for aligning a pattern char to a different choice char (typos only).
+pub(super) const MISMATCH_PENALTY: Score = 16;
+
+/// Maximum pattern length supported by the banding arrays (stack-allocated).
+pub(super) const MAX_PAT_LEN: usize = 32;
+
+/// Bandwidth for typo-mode banding. In typo mode we allow diagonal moves
+/// (match/mismatch) plus UP (skip pattern char) and LEFT (skip choice char),
+/// so the optimal path can wander off the main diagonal. A bandwidth of
+/// `n + TYPO_BAND_SLACK` columns around the diagonal is generous enough
+/// to capture all viable alignments while still pruning far-off cells.
+pub(super) const TYPO_BAND_SLACK: usize = 4;
+
+/// Per-separator bonus lookup table. Each entry holds the `Score` awarded when
+/// a matched character immediately follows that ASCII codepoint. Non-separator
+/// characters (and all non-ASCII codepoints) map to `0`.
+///
+/// Different separators can carry different bonuses — for example, `/` and `\`
+/// delimit path components (high bonus), while `_` or `-` delimit sub-words
+/// (standard bonus).  Entries that are `0` are not considered separators.
+pub(super) const SEPARATOR_TABLE: [Score; 128] = {
+    let mut t = [0 as Score; 128];
+    t[b' ' as usize] = 12; // space
+    t[b'-' as usize] = 10; // hyphen / kebab-case
+    t[b'.' as usize] = 12; // dot (file extensions, domain names)
+    t[b'/' as usize] = 16; // forward slash (path separator — higher bonus)
+    t[b'\\' as usize] = 16; // backslash (Windows path separator — higher bonus)
+    t[b'_' as usize] = 12; // underscore / snake_case
+    t
+};
diff --git a/src/fuzzy_matcher/arinae/helpers.rs b/src/fuzzy_matcher/arinae/helpers.rs
new file mode 100644
index 00000000..16d0bc0a
--- /dev/null
+++ b/src/fuzzy_matcher/arinae/helpers.rs
@@ -0,0 +1,91 @@
+//! &[dyn Atom] manipulation helpers
+
+use super::Atom;
+use super::constants::*;
+
+/// Find the 1-indexed column of the first occurrence of `pat[0]` in `cho`.
+///
+/// Returns `None` if `pat[0]` is not found anywhere (caller should return
+/// `None`). The position defines the start of the V-shaped banding envelope.
+/// Uses SIMD-backed `find_first_in` for `u8` slices.
+#[inline]
+pub(super) fn find_first_char<C: Atom>(pat: &[C], cho: &[C], respect_case: bool) -> Option<usize> {
+    pat[0].find_first_in(cho, respect_case).map(|idx| idx + 1) // 1-indexed
+}
+
+/// Compute the last column (1-indexed) at which each pattern character can be
+/// matched, scanning from the end. Used to tighten the diagonal upper bound.
+pub(super) fn compute_last_match_cols<C: Atom>(
+    pat: &[C],
+    cho: &[C],
+    respect_case: bool,
+) -> Option<[usize; MAX_PAT_LEN]> {
+    let n = pat.len();
+    // Patterns longer than MAX_PAT_LEN cannot be handled by the stack-allocated
+    // banding arrays.  Return None so the caller skips this choice gracefully.
+    if n > MAX_PAT_LEN {
+        return None;
+    }
+    let m = cho.len();
+    let mut last = [0usize; MAX_PAT_LEN];
+    let mut end = m; // search up to this choice index (exclusive)
+    for i in (0..n).rev() {
+        let found = cho[..end].iter().rposition(|&c| pat[i].eq(c, respect_case));
+        match found {
+            Some(pos) => {
+                last[i] = pos + 1; // 1-indexed column
+                end = pos; // previous char must be strictly before
+            }
+            None => return None,
+        }
+    }
+    Some(last)
+}
+
+/// For the **row-major** full DP (outer loop over rows), compute per-row
+/// column bounds `(j_lo, j_hi)` accounting for cross-row Diag reads.
+///
+/// Row `i` (1-indexed) matches pattern char `i-1`. The Diag move at
+/// `(i, j)` reads `buf[i-1][j-1]`, so row `i-1` must have computed
+/// column `j-1`. We expand each row's upper bound to satisfy the next
+/// row's lower-bound Diag dependency, and each row's lower bound to
+/// satisfy the previous row's upper-bound Diag dependency.
+pub(super) fn compute_row_col_bounds(
+    n: usize,
+    m: usize,
+    first_match: &[usize; MAX_PAT_LEN],
+    last_match: &[usize; MAX_PAT_LEN],
+) -> ([usize; MAX_PAT_LEN], [usize; MAX_PAT_LEN]) {
+    let mut lo = [0usize; MAX_PAT_LEN];
+    let mut hi = [0usize; MAX_PAT_LEN];
+
+    // Start with the raw first/last match bounds.
+    lo[..n].copy_from_slice(&first_match[..n]);
+    hi[..n].copy_from_slice(&last_match[..n]);
+
+    // Forward pass: row i's upper bound must extend so that row i+1 can
+    // read Diag at (i+1, j_lo[i+1]) → needs buf[i][j_lo[i+1]-1].
+    // Also, LEFT propagation within row i+1 starts at j_lo[i+1], but
+    // score flows from row i via Diag, so row i must reach j_lo[i+1]-1.
+    for i in 0..n.saturating_sub(1) {
+        let next_lo = lo[i + 1];
+        if next_lo > 1 {
+            hi[i] = hi[i].max(next_lo - 1);
+        }
+    }
+
+    // Backward pass: row i's lower bound can't be later than row i-1's
+    // upper bound + 1 (Diag from (i-1, hi[i-1]) can reach (i, hi[i-1]+1)).
+    // This is rarely binding but ensures consistency.
+    for i in 1..n {
+        lo[i] = lo[i].min(hi[i - 1] + 1);
+    }
+
+    // Clamp to valid range.
+    for i in 0..n {
+        lo[i] = lo[i].max(1).min(m);
+        hi[i] = hi[i].max(lo[i]).min(m);
+    }
+
+    (lo, hi)
+}
diff --git a/src/fuzzy_matcher/arinae/matrix.rs b/src/fuzzy_matcher/arinae/matrix.rs
new file mode 100644
index 00000000..a7e0cbe0
--- /dev/null
+++ b/src/fuzzy_matcher/arinae/matrix.rs
@@ -0,0 +1,85 @@
+//! Base structs for the matching algorithm: Cell & SWMatrix
+
+use super::Score;
+
+/// Direction the optimal path took to reach a cell.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
+#[allow(dead_code)] // variants are constructed via transmute from bits
+pub(super) enum Dir {
+    /// No valid path (score == 0).
+    ///
+    /// Assigned tag 0 so that `Cell::new(0, Dir::None)` encodes as all-zero
+    /// bits, allowing boundary rows/columns to be bulk-zeroed with
+    /// `write_bytes(0)` instead of a scalar loop.
+    None = 0,
+    /// Diagonal: match or mismatch (came from [i-1][j-1])
+    Diag = 1,
+    /// Up: gap in choice (came from [i-1][j], skip pattern char)
+    Up = 2,
+    /// Left: gap in pattern (came from [i][j-1], skip choice char)
+    Left = 3,
+}
+
+/// Packed cell stored as a `u32`: bits [15:0] = score (as u16 bitcast from
+/// i16), bits [17:16] = direction tag.  This gives 4 bytes per cell with no
+/// padding and enables branchless direction extraction via bitmask.
+#[derive(Copy, Clone)]
+pub(super) struct Cell(u32);
+
+pub(super) const CELL_ZERO: Cell = Cell::new(0, Dir::None);
+
+impl std::fmt::Debug for Cell {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Cell")
+            .field("score", &self.score())
+            .field("dir", &self.dir())
+            .finish()
+    }
+}
+
+impl Cell {
+    #[inline(always)]
+    pub(super) const fn new(score: Score, dir: Dir) -> Cell {
+        // Store score as u16 bits in low 16 bits, dir in bits 16-17.
+        Cell((score as u16 as u32) | ((dir as u32) << 16))
+    }
+    #[inline(always)]
+    pub(super) fn score(self) -> Score {
+        self.0 as u16 as i16
+    }
+    #[inline(always)]
+    pub(super) fn dir(self) -> Dir {
+        // SAFETY: Dir has repr(u8) with values 0..=3 and we only ever store
+        // valid Dir values in bits 16-17.
+        unsafe { std::mem::transmute((self.0 >> 16) as u8 & 0x3) }
+    }
+    /// Branchless check: true when dir == Diag (tag 1).
+    #[inline(always)]
+    pub(super) fn is_diag(self) -> bool {
+        (self.0 >> 16) & 0x3 == 1
+    }
+}
+
+#[derive(Default, Debug)]
+pub(super) struct SWMatrix {
+    pub(super) data: Vec<Cell>,
+    pub(super) cols: usize,
+    pub(super) rows: usize,
+}
+
+impl SWMatrix {
+    pub fn zero(rows: usize, cols: usize) -> Self {
+        let mut res = SWMatrix::default();
+        res.resize(rows, cols);
+        res
+    }
+    pub fn resize(&mut self, rows: usize, cols: usize) {
+        let needed = rows * cols;
+        if needed > self.data.len() {
+            self.data.resize(needed, CELL_ZERO);
+        }
+        self.rows = rows;
+        self.cols = cols;
+    }
+}
diff --git a/src/fuzzy_matcher/arinae/mod.rs b/src/fuzzy_matcher/arinae/mod.rs
new file mode 100644
index 00000000..6483b522
--- /dev/null
+++ b/src/fuzzy_matcher/arinae/mod.rs
@@ -0,0 +1,260 @@
+//! Arinae fuzzy matching algorithm.
+//!
+//! Uses a Smith-Waterman local alignment approach with affine gap penalties
+//! and context-sensitive bonuses.
+//!
+//! ## Key design choices
+//!
+//! - **Single score per cell** (u16 saturating) plus a 2-bit direction tag
+//!   for traceback. Gap open vs extend is tracked via the direction tag.
+//! - **Semi-global alignment**: the pattern must be fully consumed, but
+//!   alignment can start/end at any position in the choice string.
+//!
+//!
+//! ## Pruning strategies
+//!
+//! - **Row-range banding**: each DP cell is only computed when the row/column
+//!   pair falls within the feasible alignment band. In exact mode the band is
+//!   derived from precomputed first/last match columns for each pattern
+//!   character; in typo mode a diagonal ± bandwidth envelope is used.
+//! - **Interpair max-score pruning**: after processing a column (score-only)
+//!   or row (full DP), if all cells are zero for several consecutive
+//!   iterations, the alignment is dead and we terminate early.
+
+mod algo;
+mod atom;
+mod banding;
+mod constants;
+mod helpers;
+mod matrix;
+mod prefilter;
+#[cfg(test)]
+mod tests;
+
+use std::cell::RefCell;
+
+use thread_local::ThreadLocal;
+
+use self::algo::{full_dp, range_dp};
+use self::atom::Atom;
+use self::constants::*;
+use self::prefilter::cheap_typo_prefilter;
+
+use self::matrix::{CELL_ZERO, Cell, Dir, SWMatrix};
+use crate::{
+    CaseMatching,
+    fuzzy_matcher::{FuzzyMatcher, MatchIndices, ScoreType},
+};
+
+type Score = i16;
+
+fn precompute_bonuses<C: Atom>(cho: &[C], buf: &mut Vec<Score>) {
+    // Reset length (O(1), no deallocation) then fill with fresh values.
+    buf.clear();
+    // The first character always gets START_OF_STRING_BONUS.
+    // Subsequent characters get a bonus based on the previous character:
+    //   - separator_bonus() when the previous char is a separator (the exact
+    //     bonus depends on the separator — see SEPARATOR_TABLE in constants.rs),
+    //   - CAMEL_CASE_BONUS when transitioning from lowercase to non-lowercase.
+    // Using a safe iterator lets the compiler auto-vectorise the loop.
+    let bonus_iter = std::iter::once(START_OF_STRING_BONUS).chain(cho.windows(2).map(|w| {
+        let prev = w[0];
+        let cur = w[1];
+        prev.separator_bonus() + CAMEL_CASE_BONUS * ((prev.is_lowercase() && !cur.is_lowercase()) as Score)
+    }));
+    buf.extend(bonus_iter);
+}
+
+/// Arinae fuzzy matcher: Smith-Waterman local alignment with affine gap
+/// penalties and context-sensitive bonuses.
+#[derive(Debug, Default)]
+pub struct ArinaeMatcher {
+    pub(crate) case: CaseMatching,
+    pub(crate) allow_typos: bool,
+    full_buf: ThreadLocal<RefCell<SWMatrix>>,
+    indices_buf: ThreadLocal<RefCell<MatchIndices>>,
+    #[allow(clippy::type_complexity)]
+    char_buf: ThreadLocal<RefCell<(Vec<char>, Vec<char>)>>,
+    bonus_buf: ThreadLocal<RefCell<Vec<Score>>>,
+}
+
+impl ArinaeMatcher {
+    /// Create a new `ArinaeMatcher` with the given settings.
+    pub fn new(case: CaseMatching, allow_typos: bool) -> Self {
+        Self {
+            case,
+            allow_typos,
+            ..Default::default()
+        }
+    }
+
+    #[inline]
+    fn respect_case<C: Atom>(&self, pattern: &[C]) -> bool {
+        self.case == CaseMatching::Respect
+            || (self.case == CaseMatching::Smart && !pattern.iter().all(|b| b.is_lowercase()))
+    }
+
+    /// Dispatch to `full_dp` with the appropriate const generics.
+    /// Assumes prefilters and bonuses have already been computed.
+    fn dispatch_dp<C: Atom>(
+        &self,
+        cho: &[C],
+        pat: &[C],
+        bonuses: &[Score],
+        respect_case: bool,
+        compute_indices: bool,
+    ) -> Option<(ScoreType, MatchIndices)> {
+        let res = if self.allow_typos {
+            if compute_indices {
+                full_dp::<true, true, _>(cho, pat, bonuses, respect_case, &self.full_buf, &self.indices_buf)
+            } else {
+                full_dp::<true, false, _>(cho, pat, bonuses, respect_case, &self.full_buf, &self.indices_buf)
+            }
+        } else if compute_indices {
+            full_dp::<false, true, _>(cho, pat, bonuses, respect_case, &self.full_buf, &self.indices_buf)
+        } else {
+            full_dp::<false, false, _>(cho, pat, bonuses, respect_case, &self.full_buf, &self.indices_buf)
+        };
+        res.map(|(s, idx)| (s as ScoreType, idx))
+    }
+
+    /// Generic helper: run full DP over slices of Atom.
+    /// If `compute_indices` is true, returns the matched indices; otherwise
+    /// returns a single-element vec containing the 1-indexed end column.
+    fn match_slices<C: Atom>(&self, cho: &[C], pat: &[C], compute_indices: bool) -> Option<(ScoreType, MatchIndices)> {
+        if pat.is_empty() {
+            return Some((0, MatchIndices::new()));
+        }
+        if cho.is_empty() {
+            return None;
+        }
+
+        let respect_case = self.respect_case(pat);
+
+        // Prefilter for typo mode.
+        // In exact mode (non-typo) we skip is_subsequence here: compute_banding
+        // calls compute_first_match_cols which already validates the subsequence
+        // and returns None if any pattern character is absent — no redundant scan.
+        if self.allow_typos && !cheap_typo_prefilter(pat, cho, respect_case) {
+            return None;
+        }
+
+        // Prepare bonuses
+        let mut bonus_buf = self.bonus_buf.get_or(|| RefCell::new(Vec::new())).borrow_mut();
+        precompute_bonuses(cho, &mut bonus_buf);
+
+        self.dispatch_dp(cho, pat, &bonus_buf, respect_case, compute_indices)
+    }
+
+    fn run(&self, choice: &str, pattern: &str, compute_indices: bool) -> Option<(ScoreType, MatchIndices)> {
+        if pattern.is_empty() {
+            return Some((0, MatchIndices::new()));
+        }
+        if choice.is_empty() {
+            return None;
+        }
+
+        // Fast path for ASCII matching
+        if choice.is_ascii() && pattern.is_ascii() {
+            let cho = choice.as_bytes();
+            let pat = pattern.as_bytes();
+            return self.match_slices(cho, pat, compute_indices);
+        }
+
+        let mut bufs = self
+            .char_buf
+            .get_or(|| RefCell::new((Vec::new(), Vec::new())))
+            .borrow_mut();
+        let (ref mut pat_buf, ref mut cho_buf) = *bufs;
+        pat_buf.clear();
+        pat_buf.extend(pattern.chars());
+        cho_buf.clear();
+        cho_buf.extend(choice.chars());
+
+        let respect_case = self.respect_case(pat_buf);
+
+        // Prefilter for typo mode only (see match_slices for rationale).
+        if self.allow_typos && !cheap_typo_prefilter(pat_buf, cho_buf, respect_case) {
+            return None;
+        }
+
+        let mut bonus_buf = self.bonus_buf.get_or(|| RefCell::new(Vec::new())).borrow_mut();
+        precompute_bonuses(cho_buf, &mut bonus_buf);
+
+        // Call dispatch_dp directly to avoid double-borrowing bonus_buf.
+        self.dispatch_dp(cho_buf, pat_buf, &bonus_buf, respect_case, compute_indices)
+    }
+
+    /// Run the DP and return `(score, begin, end)` without collecting all indices.
+    ///
+    /// Uses the full matrix (for traceback) but only records the first and last
+    /// matched columns instead of the full index list. Avoids the allocation and
+    /// work of `fuzzy_indices` when only the range is needed.
+    fn run_range(&self, choice: &str, pattern: &str) -> Option<(ScoreType, usize, usize)> {
+        if pattern.is_empty() {
+            return Some((0, 0, 0));
+        }
+        if choice.is_empty() {
+            return None;
+        }
+
+        let range = if choice.is_ascii() && pattern.is_ascii() {
+            let cho = choice.as_bytes();
+            let pat = pattern.as_bytes();
+            let respect_case = self.respect_case(pat);
+            // Exact mode: compute_banding validates the subsequence implicitly.
+            if self.allow_typos && !cheap_typo_prefilter(pat, cho, respect_case) {
+                return None;
+            }
+            let mut bonus_buf = self.bonus_buf.get_or(|| RefCell::new(Vec::new())).borrow_mut();
+            precompute_bonuses(cho, &mut bonus_buf);
+            if self.allow_typos {
+                range_dp::<true, _>(cho, pat, &bonus_buf, respect_case, &self.full_buf)
+            } else {
+                range_dp::<false, _>(cho, pat, &bonus_buf, respect_case, &self.full_buf)
+            }
+        } else {
+            let mut bufs = self
+                .char_buf
+                .get_or(|| RefCell::new((Vec::new(), Vec::new())))
+                .borrow_mut();
+            let (ref mut pat_buf, ref mut cho_buf) = *bufs;
+            pat_buf.clear();
+            pat_buf.extend(pattern.chars());
+            cho_buf.clear();
+            cho_buf.extend(choice.chars());
+            let respect_case = self.respect_case(pat_buf);
+            // Exact mode: compute_banding validates the subsequence implicitly.
+            if self.allow_typos && !cheap_typo_prefilter(pat_buf, cho_buf, respect_case) {
+                return None;
+            }
+            let mut bonus_buf = self.bonus_buf.get_or(|| RefCell::new(Vec::new())).borrow_mut();
+            precompute_bonuses(cho_buf, &mut bonus_buf);
+            if self.allow_typos {
+                range_dp::<true, _>(cho_buf, pat_buf, &bonus_buf, respect_case, &self.full_buf)
+            } else {
+                range_dp::<false, _>(cho_buf, pat_buf, &bonus_buf, respect_case, &self.full_buf)
+            }
+        };
+        range.map(|(s, b, e)| (s as ScoreType, b, e))
+    }
+}
+
+// ---------------------------------------------------------------------------
+// FuzzyMatcher trait implementation
+// ---------------------------------------------------------------------------
+
+impl FuzzyMatcher for ArinaeMatcher {
+    fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<ScoreType> {
+        let result = self.run(choice, pattern, false);
+        result.map(|x| x.0)
+    }
+
+    fn fuzzy_match_range(&self, choice: &str, pattern: &str) -> Option<(ScoreType, usize, usize)> {
+        self.run_range(choice, pattern)
+    }
+
+    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> {
+        self.run(choice, pattern, true)
+    }
+}
diff --git a/src/fuzzy_matcher/arinae/prefilter.rs b/src/fuzzy_matcher/arinae/prefilter.rs
new file mode 100644
index 00000000..8783bebb
--- /dev/null
+++ b/src/fuzzy_matcher/arinae/prefilter.rs
@@ -0,0 +1,130 @@
+//! Prefilters running before the algo to optimize performance on unmatchable items
+
+use super::Atom;
+use super::constants::MAX_PAT_LEN;
+
+/// Cheap prefilter for typo-tolerant matching.
+///
+/// Rejects choices that clearly cannot produce a positive score in the DP.
+/// The prefilter is intentionally lenient — false positives are fine (the DP
+/// will reject them), but false negatives lose valid matches.
+///
+/// Strategy:
+///   1. The first pattern character must appear somewhere in the choice at
+///      position `j_first` (anchoring the alignment).
+///   2. Of the remaining `n - 1` pattern characters, at least
+///      `floor((n - 1) / 2)` must also appear (unordered, as a multiset) in
+///      `choice[j_first..]` — the window the DP actually examines.
+///
+/// Scoping the tail check to `choice[j_first..]` is strictly correct: the
+/// typo-mode DP band starts at `j_first` for every row (bandwidth = n + 4
+/// always exceeds n - 1, so the left clamp always hits `j_first`). Any tail
+/// character that only exists before `j_first` can never contribute a true
+/// diagonal match in the DP; counting it would be a false positive.
+///
+/// We use a multiset frequency check rather than an ordered greedy scan.
+/// An ordered scan causes false negatives when a greedily-consumed character
+/// advances the cursor past positions where later characters could still match.
+///
+/// For the ASCII (`u8`) path the tail frequency table is built in a single
+/// O(m) sequential pass over the window, then queried in O(n). For the `char`
+/// path we fall back to a small O(n) linear-search table seeded from the
+/// tail, queried via a scalar scan of the window — still a single O(m) pass.
+pub(super) fn cheap_typo_prefilter<C: Atom>(pattern: &[C], choice: &[C], respect_case: bool) -> bool {
+    let n = pattern.len();
+    let m = choice.len();
+
+    // A pattern much longer than the choice cannot match.
+    if n > m + 2 {
+        return false;
+    }
+
+    // The first pattern character must be present in the choice.
+    // Use the SIMD-backed find_first_in (memchr for u8, scalar for char).
+    // j_first is 0-indexed; the DP window is choice[j_first..].
+    let first = pattern[0];
+    let j_first = match first.find_first_in(choice, respect_case) {
+        Some(pos) => pos,
+        None => return false,
+    };
+
+    if n == 1 {
+        return true;
+    }
+
+    let min_tail = (n - 1) / 2;
+    if min_tail == 0 {
+        return true;
+    }
+
+    // Tail frequency check scoped to choice[j_first..].
+    // Build a frequency table over the window in one pass, then consume
+    // entries as we walk the tail pattern characters.
+    let window = &choice[j_first..];
+    tail_freq_check(pattern, window, respect_case, min_tail)
+}
+
+/// Multiset frequency check: count how many of `pattern[1..]` can be
+/// satisfied (one-for-one) by characters in `window`, and return `true`
+/// as soon as `min_tail` matches are reached.
+///
+/// Builds a frequency table over `window` in a single O(|window|) pass,
+/// then walks the tail in O(n). Total cost: O(m + n) with a single
+/// sequential read of `window` — optimal cache behaviour.
+#[inline]
+fn tail_freq_check<C: Atom>(pattern: &[C], window: &[C], respect_case: bool, min_tail: usize) -> bool {
+    // We need a per-character frequency table for the window.
+    // Use a small stack-allocated array of (char_value, count) pairs keyed on
+    // the PATTERN tail characters (at most MAX_PAT_LEN - 1 = 15 entries).
+    // We build it in two passes:
+    //   Pass 1 (O(n)): collect distinct tail chars into the table with count=0.
+    //   Pass 2 (O(m)): scan window and increment matching table entries.
+    //   Pass 3 (O(n)): walk the tail, decrement table entries, count matches.
+
+    const MAX_TAIL: usize = MAX_PAT_LEN - 1;
+    let tail = &pattern[1..];
+    let tail_len = tail.len().min(MAX_TAIL);
+
+    // Table of (pattern_char, available_count).  At most MAX_TAIL distinct chars.
+    // Seed every slot with the first tail char and count=0 so the array is fully
+    // initialised; only entries 0..table_len are ever consulted.
+    let placeholder = tail[0];
+    let mut table: [(C, u8); MAX_TAIL] = [(placeholder, 0); MAX_TAIL];
+    let mut table_len = 0usize;
+
+    // Pass 1: populate table with distinct tail chars (count = 0).
+    for &pi in tail[..tail_len].iter() {
+        if !table[..table_len].iter().any(|&(c, _)| pi.eq(c, respect_case)) {
+            table[table_len] = (pi, 0);
+            table_len += 1;
+        }
+    }
+
+    // Pass 2: scan window, increment table counts (saturate at 255).
+    for &c in window {
+        if let Some(entry) = table[..table_len]
+            .iter_mut()
+            .find(|(tc, _)| Atom::eq(*tc, c, respect_case))
+        {
+            entry.1 = entry.1.saturating_add(1);
+        }
+    }
+
+    // Pass 3: walk the tail, consume from table, count matches.
+    let mut matched = 0usize;
+    for &pi in tail[..tail_len].iter() {
+        if let Some(entry) = table[..table_len]
+            .iter_mut()
+            .find(|(tc, _)| Atom::eq(pi, *tc, respect_case))
+            && entry.1 > 0
+        {
+            entry.1 -= 1;
+            matched += 1;
+            if matched >= min_tail {
+                return true;
+            }
+        }
+    }
+
+    false
+}
diff --git a/src/fuzzy_matcher/arinae/tests.rs b/src/fuzzy_matcher/arinae/tests.rs
new file mode 100644
index 00000000..1e7028e5
--- /dev/null
+++ b/src/fuzzy_matcher/arinae/tests.rs
@@ -0,0 +1,393 @@
+use super::*;
+use crate::fuzzy_matcher::FuzzyMatcher;
+
+fn matcher() -> ArinaeMatcher {
+    ArinaeMatcher::default()
+}
+
+fn matcher_typos() -> ArinaeMatcher {
+    ArinaeMatcher {
+        allow_typos: true,
+        ..Default::default()
+    }
+}
+
+fn score(choice: &str, pattern: &str) -> Option<i64> {
+    matcher().fuzzy_match(choice, pattern)
+}
+
+fn score_typos(choice: &str, pattern: &str) -> Option<i64> {
+    matcher_typos().fuzzy_match(choice, pattern)
+}
+
+fn indices(choice: &str, pattern: &str) -> Option<MatchIndices> {
+    matcher().fuzzy_indices(choice, pattern).map(|(_, v)| v)
+}
+
+// ----- Basic matching -----
+
+#[test]
+fn empty_pattern_always_matches() {
+    assert_eq!(score("anything", ""), Some(0));
+    assert_eq!(score("", ""), Some(0));
+}
+
+#[test]
+fn empty_choice_never_matches() {
+    assert!(score("", "a").is_none());
+}
+
+#[test]
+fn exact_match_scores_positive() {
+    assert!(score("hello", "hello").unwrap() > 0);
+}
+
+#[test]
+fn no_match_returns_none() {
+    assert!(score("abc", "xyz").is_none());
+}
+
+#[test]
+fn subsequence_match() {
+    assert!(score("axbycz", "abc").is_some());
+    let idx = indices("axbycz", "abc").unwrap();
+    assert_eq!(idx.as_slice(), &[0, 2, 4]);
+}
+
+// ----- Scoring quality -----
+
+#[test]
+fn contiguous_beats_scattered() {
+    let contiguous = score("ab", "ab").unwrap();
+    let scattered = score("axb", "ab").unwrap();
+    assert!(
+        contiguous > scattered,
+        "contiguous={contiguous} should beat scattered={scattered}"
+    );
+}
+
+#[test]
+fn fewer_gaps_beats_more_gaps() {
+    let one_gap = score("abxc", "abc").unwrap();
+    let two_gaps = score("axbxc", "abc").unwrap();
+    assert!(one_gap > two_gaps, "one_gap={one_gap} should beat two_gaps={two_gaps}");
+}
+
+#[test]
+fn word_start_bonus() {
+    let boundary = score("src/reader.rs", "reader").unwrap();
+    let stitched = score("src/tui/header.rs", "reader").unwrap();
+    assert!(
+        boundary > stitched,
+        "word-boundary={boundary} should beat stitched={stitched}"
+    );
+}
+
+#[test]
+fn start_of_string_bonus() {
+    let at_start = score("abc", "a").unwrap();
+    let at_mid = score("xabc", "a").unwrap();
+    assert!(at_start > at_mid, "start={at_start} should beat mid={at_mid}");
+}
+
+#[test]
+fn consecutive_match_preferred() {
+    let consecutive = score("foobar", "oob").unwrap();
+    let spread = score("oxoxb", "oob").unwrap();
+    assert!(
+        consecutive > spread,
+        "consecutive={consecutive} should beat spread={spread}"
+    );
+}
+
+#[test]
+fn camel_case_bonus() {
+    let camel = score("FooBar", "fb").unwrap();
+    let flat = score("foobar", "fb").unwrap();
+    assert!(camel > flat, "camel={camel} should beat flat={flat}");
+}
+
+// ----- Case sensitivity -----
+
+#[test]
+fn smart_case_insensitive_lowercase_pattern() {
+    let m = ArinaeMatcher {
+        case: CaseMatching::Smart,
+        allow_typos: false,
+        ..Default::default()
+    };
+    assert!(m.fuzzy_match("FooBar", "foobar").is_some());
+}
+
+#[test]
+fn smart_case_sensitive_uppercase_pattern() {
+    let m = ArinaeMatcher {
+        case: CaseMatching::Smart,
+        allow_typos: false,
+        ..Default::default()
+    };
+    assert!(m.fuzzy_match("foobar", "FooBar").is_none());
+    assert!(m.fuzzy_match("FooBar", "FooBar").is_some());
+}
+
+#[test]
+fn respect_case() {
+    let m = ArinaeMatcher {
+        case: CaseMatching::Respect,
+        allow_typos: false,
+        ..Default::default()
+    };
+    assert!(m.fuzzy_match("abc", "ABC").is_none());
+    assert!(m.fuzzy_match("ABC", "ABC").is_some());
+}
+
+#[test]
+fn ignore_case() {
+    let m = ArinaeMatcher {
+        case: CaseMatching::Ignore,
+        allow_typos: false,
+        ..Default::default()
+    };
+    assert!(m.fuzzy_match("abc", "ABC").is_some());
+}
+
+// ----- Typo tolerance -----
+
+#[test]
+fn no_typos_rejects_mismatch() {
+    assert!(score("hxllo", "hello").is_none());
+}
+
+#[test]
+fn typos_accepts_mismatch() {
+    assert!(score_typos("hxllo", "hello").is_some());
+}
+
+#[test]
+fn no_typos_rejects_transposition() {
+    assert!(score("hlelo", "hello").is_none());
+}
+
+#[test]
+fn typos_accepts_transposition() {
+    assert!(score_typos("hlelo", "hello").is_some());
+}
+
+#[test]
+fn exact_match_same_with_and_without_typos() {
+    let with = score_typos("hello", "hello").unwrap();
+    let without = score("hello", "hello").unwrap();
+    assert_eq!(
+        with, without,
+        "exact match score should be identical regardless of typo flag"
+    );
+}
+
+#[test]
+fn typo_match_scores_less_than_exact() {
+    let exact = score_typos("hello", "hello").unwrap();
+    let typo = score_typos("hxllo", "hello").unwrap();
+    assert!(exact > typo, "exact={exact} should beat typo={typo}");
+}
+
+// ----- Traceback correctness -----
+
+#[test]
+fn indices_exact_match() {
+    let idx = indices("hello", "hello").unwrap();
+    assert_eq!(idx.as_slice(), &[0, 1, 2, 3, 4]);
+}
+
+#[test]
+fn transposition_matches() {
+    let result = matcher_typos().fuzzy_indices("abdc", "abcd");
+    assert!(result.is_some(), "transposed input should match with typos");
+    let (score_trans, _) = result.unwrap();
+
+    let (score_exact, _) = matcher_typos().fuzzy_indices("abcd", "abcd").unwrap();
+    assert!(
+        score_exact > score_trans,
+        "exact={score_exact} should beat transposed={score_trans}"
+    );
+}
+
+// ----- Reader ranking regression -----
+
+#[test]
+fn reader_ranking() {
+    let pattern = "reader";
+    let dense = score("src/reader.rs", pattern).unwrap();
+    let sparse = score(
+        "tests/snapshots/normalize__insta_normalize_accented_item_unaccented_query.snap",
+        pattern,
+    )
+    .unwrap_or(0);
+    assert!(dense > sparse, "dense={dense} should beat sparse={sparse}");
+}
+
+// ----- Ordering sanity -----
+
+#[test]
+fn ordering_ab() {
+    use crate::fuzzy_matcher::util::assert_order;
+    let m = ArinaeMatcher {
+        case: CaseMatching::Ignore,
+        allow_typos: false,
+        ..Default::default()
+    };
+    assert_order(&m, "ab", &["ab", "aoo_boo", "acb"]);
+}
+
+#[test]
+fn ordering_print() {
+    use crate::fuzzy_matcher::util::assert_order;
+    let m = ArinaeMatcher {
+        case: CaseMatching::Ignore,
+        allow_typos: false,
+        ..Default::default()
+    };
+    assert_order(&m, "print", &["printf", "sprintf"]);
+}
+
+// ----- Score-only vs full DP consistency -----
+
+#[test]
+fn score_only_matches_full_dp() {
+    let m = ArinaeMatcher {
+        case: CaseMatching::Ignore,
+        allow_typos: true,
+        ..Default::default()
+    };
+    let cases = [
+        ("hello world", "hlo"),
+        ("src/reader.rs", "reader"),
+        ("FooBar", "fb"),
+        ("axbycz", "abc"),
+        ("hxllo", "hello"),
+    ];
+    for (choice, pattern) in &cases {
+        let score_only = m.fuzzy_match(choice, pattern);
+        let full = m.fuzzy_indices(choice, pattern).map(|(s, _)| s);
+        assert_eq!(
+            score_only, full,
+            "score mismatch for ({choice}, {pattern}): score_only={score_only:?} full={full:?}"
+        );
+    }
+}
+
+// ----- Non-ASCII fallback -----
+
+#[test]
+fn non_ascii_matching() {
+    let m = matcher();
+    assert!(m.fuzzy_match("café", "café").is_some());
+    assert!(m.fuzzy_match("naïve", "naive").is_none());
+}
+
+// Regression test: all valid subsequences must be returned in --no-typos mode.
+// grep '.*t.*e.*s.*t' should give the same results as arinae with pattern 'test'.
+#[test]
+fn all_subsequences_must_match() {
+    let m = matcher();
+    let cases = [
+        // Bug 1: full_dp tracked best_j across all rows instead of only the
+        // last row, so traceback started at the wrong cell.
+        "audio/audio/bin/temp/usr/uploads/mnt/cache/media_3445258",
+        "audio/audio/audio/docs/cache/temp/downloads/backup/shared/data_9591740",
+        // Bug 2: min_true_matches was enforced in exact mode, but the true-count
+        // bookkeeping is corrupted by tiebreaking when a character coincidentally
+        // matches at a column where diag_score=0 (fresh local alignment start).
+        // In exact mode every row increment requires a true match, so score > 0
+        // at row n already guarantees n true matches; the threshold is not needed.
+        "audio/audio/audio/opt/media/sys/sys/backup/etc_744357",
+        "audio/audio/audio/temp/shared/uploads/downloads/config/home/mnt_9037278",
+        "audio/audio/opt/cache/usr/usr/var/temp_1579492",
+    ];
+    for choice in &cases {
+        assert!(
+            m.fuzzy_match(choice, "test").is_some(),
+            "fuzzy_match should match subsequence 'test' in {:?}",
+            choice
+        );
+        assert!(
+            m.fuzzy_indices(choice, "test").is_some(),
+            "fuzzy_indices should match subsequence 'test' in {:?}",
+            choice
+        );
+    }
+}
+#[test]
+fn score_and_full_dp_same() {
+    let cases = [("dist-workspace.toml", "tst")];
+    let m = matcher_typos();
+    for (choice, pat) in cases {
+        assert_eq!(
+            m.fuzzy_indices(choice, pat).map(|(s, _)| s),
+            m.fuzzy_match_range(choice, pat).map(|(s, _, _)| s)
+        )
+    }
+}
+
+// Verify that fuzzy_match_range returns scores consistent with fuzzy_indices
+// and that begin/end are within the span of the full index list.
+#[test]
+fn range_consistent_with_indices() {
+    let cases = [
+        ("hello", "hello"),
+        ("axbycz", "abc"),
+        ("src/reader.rs", "reader"),
+        ("FooBar", "fb"),
+        ("dist-workspace.toml", "tst"),
+    ];
+    let matchers = [matcher(), matcher_typos()];
+    for m in &matchers {
+        for &(choice, pattern) in &cases {
+            let range = m.fuzzy_match_range(choice, pattern);
+            let full = m.fuzzy_indices(choice, pattern);
+            match (range, full) {
+                (None, None) => {}
+                (Some((rs, rb, re)), Some((fs, fidx))) => {
+                    assert_eq!(rs, fs, "score mismatch for ({choice}, {pattern})");
+                    let fbegin = fidx.first().copied().unwrap_or_default();
+                    let fend = fidx.last().copied().unwrap_or_default();
+                    assert_eq!(
+                        rb, fbegin,
+                        "begin mismatch for ({choice}, {pattern}): range={rb} indices={fbegin}"
+                    );
+                    assert_eq!(
+                        re, fend,
+                        "end mismatch for ({choice}, {pattern}): range={re} indices={fend}"
+                    );
+                }
+                _ => panic!("range/indices disagreement for ({choice}, {pattern})"),
+            }
+        }
+    }
+}
+
+// ----- Prefilter regression tests -----
+
+/// Extending a typo-tolerant match with an additional character must not cause
+/// the candidate to be incorrectly rejected.
+///
+/// "fobara" matches "src/fuzzy_matcher/arinae/algo.rs" via the typo-tolerant
+/// DP (score 91). Typing one more character to form "fobaral" should continue
+/// to match — the `a`, `r`, `a` subsequence exists in the choice string and
+/// satisfies the prefilter threshold (min_tail = 3).
+///
+/// The old prefilter used a greedy ordered scan that consumed `o` at position 28,
+/// locking the cursor past all four `a` occurrences (at positions 11, 18, 22, 25),
+/// causing a false negative. The correct approach is an unordered frequency check.
+#[test]
+fn typo_prefilter_no_false_negative_on_extension() {
+    let choice = "src/fuzzy_matcher/arinae/algo.rs";
+    // Both the shorter and the extended pattern must match.
+    assert!(
+        score_typos(choice, "fobara").is_some(),
+        "\"fobara\" should match \"{choice}\""
+    );
+    assert!(
+        score_typos(choice, "fobaral").is_some(),
+        "\"fobaral\" should match \"{choice}\" (regression: greedy prefilter scan false negative)"
+    );
+}
diff --git a/src/fuzzy_matcher/clangd.rs b/src/fuzzy_matcher/clangd.rs
index 59754ddd..16005d9c 100644
--- a/src/fuzzy_matcher/clangd.rs
+++ b/src/fuzzy_matcher/clangd.rs
@@ -21,7 +21,7 @@
 //! https://github.com/llvm-mirror/clang-tools-extra/blob/master/clangd/FuzzyMatch.cpp
 //! Also check: https://github.com/lewang/flx/issues/98
 use crate::fuzzy_matcher::util::*;
-use crate::fuzzy_matcher::{FuzzyMatcher, IndexType, ScoreType};
+use crate::fuzzy_matcher::{FuzzyMatcher, IndexType, MatchIndices, ScoreType};
 use std::cell::RefCell;
 use std::cmp::max;
 use thread_local::ThreadLocal;
@@ -100,7 +100,7 @@ impl ClangdMatcher {
 }
 
 impl FuzzyMatcher for ClangdMatcher {
-    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
+    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> {
         let case_sensitive = self.is_case_sensitive(pattern);
 
         let mut choice_chars = self.c_cache.get_or(|| RefCell::new(Vec::new())).borrow_mut();
@@ -159,7 +159,10 @@ impl FuzzyMatcher for ClangdMatcher {
         }
 
         indices_reverse.reverse();
-        Some((adjust_score(score, num_choice_chars), indices_reverse))
+        Some((
+            adjust_score(score, num_choice_chars),
+            MatchIndices::from(indices_reverse),
+        ))
     }
 
     fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<ScoreType> {
@@ -199,7 +202,7 @@ impl FuzzyMatcher for ClangdMatcher {
 }
 
 /// fuzzy match `line` with `pattern`, returning the score and indices of matches
-pub fn fuzzy_indices(line: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
+pub fn fuzzy_indices(line: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> {
     ClangdMatcher::default().ignore_case().fuzzy_indices(line, pattern)
 }
 
diff --git a/src/fuzzy_matcher/frizbee.rs b/src/fuzzy_matcher/frizbee.rs
index 562510a2..bed38f0b 100644
--- a/src/fuzzy_matcher/frizbee.rs
+++ b/src/fuzzy_matcher/frizbee.rs
@@ -3,7 +3,7 @@ use frizbee::{Scoring, smith_waterman::SmithWatermanMatcher};
 
 use crate::{
     CaseMatching,
-    fuzzy_matcher::{FuzzyMatcher, IndexType, ScoreType},
+    fuzzy_matcher::{FuzzyMatcher, MatchIndices, ScoreType},
 };
 
 const RESPECT_CASE_BONUS: u16 = 10000;
@@ -31,7 +31,7 @@ impl FrizbeeMatcher {
 }
 
 impl FuzzyMatcher for FrizbeeMatcher {
-    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
+    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> {
         let scoring = Scoring {
             matching_case_bonus: match self.case {
                 CaseMatching::Respect => RESPECT_CASE_BONUS,
@@ -49,7 +49,7 @@ impl FuzzyMatcher for FrizbeeMatcher {
         let mut matcher = SmithWatermanMatcher::new(pattern.as_bytes(), &scoring);
         matcher
             .match_haystack_indices(choice.as_bytes(), 0, self.max_typos)
-            .and_then(|(m, indices)| {
+            .and_then(|(m, mut indices)| {
                 debug!("{choice}: {m} ({})", scoring.matching_case_bonus);
                 if m > scoring.matching_case_bonus.saturating_mul(
                     pattern
@@ -59,10 +59,31 @@ impl FuzzyMatcher for FrizbeeMatcher {
                         .try_into()
                         .unwrap(),
                 ) {
-                    Some((m.into(), indices))
+                    indices.reverse();
+                    Some((m.into(), MatchIndices::from(indices)))
                 } else {
                     None
                 }
             })
     }
+    fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<i64> {
+        let scoring = Scoring {
+            matching_case_bonus: match self.case {
+                CaseMatching::Respect => RESPECT_CASE_BONUS,
+                CaseMatching::Ignore => 0,
+                CaseMatching::Smart => {
+                    if pattern.chars().any(|c| c.is_uppercase()) {
+                        RESPECT_CASE_BONUS
+                    } else {
+                        0
+                    }
+                }
+            },
+            ..Default::default()
+        };
+        let mut matcher = SmithWatermanMatcher::new(pattern.as_bytes(), &scoring);
+        matcher
+            .match_haystack(choice.as_bytes(), self.max_typos)
+            .map(|x| x as ScoreType)
+    }
 }
diff --git a/src/fuzzy_matcher/fzy.rs b/src/fuzzy_matcher/fzy.rs
index 5ad40c9a..61d28586 100644
--- a/src/fuzzy_matcher/fzy.rs
+++ b/src/fuzzy_matcher/fzy.rs
@@ -41,7 +41,7 @@ use std::cell::RefCell;
 use thread_local::ThreadLocal;
 
 use crate::fuzzy_matcher::util::cheap_matches;
-use crate::fuzzy_matcher::{FuzzyMatcher, IndexType, ScoreType};
+use crate::fuzzy_matcher::{FuzzyMatcher, IndexType, MatchIndices, ScoreType};
 
 // ---------------------------------------------------------------------------
 // Score constants (from fzy's config.def.h, scaled ×200 to integer)
@@ -777,7 +777,7 @@ impl FzyMatcher {
 }
 
 impl FuzzyMatcher for FzyMatcher {
-    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
+    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> {
         let case_sensitive = self.is_case_sensitive(pattern);
 
         let mut choice_chars = self.c_cache.get_or(|| RefCell::new(Vec::new())).borrow_mut();
@@ -793,14 +793,14 @@ impl FuzzyMatcher for FzyMatcher {
                 cheap_matches(&choice_chars, &pattern_chars, case_sensitive)?;
                 let mut positions = Vec::with_capacity(pattern_chars.len());
                 let s = fzy_score(&pattern_chars, &choice_chars, case_sensitive, Some(&mut positions))?;
-                Some((internal_to_skim_score(s), positions))
+                Some((internal_to_skim_score(s), MatchIndices::from(positions)))
             }
             Some(max_t) => {
                 // Fast path: try exact subsequence match first
                 if cheap_matches(&choice_chars, &pattern_chars, case_sensitive).is_some() {
                     let mut positions = Vec::with_capacity(pattern_chars.len());
                     if let Some(s) = fzy_score(&pattern_chars, &choice_chars, case_sensitive, Some(&mut positions)) {
-                        return Some((internal_to_skim_score(s), positions));
+                        return Some((internal_to_skim_score(s), MatchIndices::from(positions)));
                     }
                 }
 
@@ -853,7 +853,7 @@ impl FuzzyMatcher for FzyMatcher {
                     self.lp_cache.get().map(|cell| cell.replace(vec![]));
                 }
 
-                Some((internal_to_skim_score(s), positions))
+                Some((internal_to_skim_score(s), MatchIndices::from(positions)))
             }
         }
     }
@@ -942,7 +942,7 @@ impl FuzzyMatcher for FzyMatcher {
 
 /// Fuzzy match `choice` against `pattern` using the fzy algorithm, returning
 /// the score and matched character indices.
-pub fn fuzzy_indices(choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
+pub fn fuzzy_indices(choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> {
     FzyMatcher::default().ignore_case().fuzzy_indices(choice, pattern)
 }
 
@@ -1052,7 +1052,7 @@ mod tests {
         let result = matcher.fuzzy_indices("Hello, 世界", "H世");
         assert!(result.is_some());
         let (_, indices) = result.unwrap();
-        assert_eq!(indices, vec![0, 7]);
+        assert_eq!(indices.as_slice(), &[0, 7]);
     }
 
     #[test]
@@ -1142,7 +1142,7 @@ mod tests {
         let result = matcher.fuzzy_indices("abx", "abc");
         assert!(result.is_some());
         let (_, indices) = result.unwrap();
-        assert_eq!(indices, vec![0, 1, 2]);
+        assert_eq!(indices.as_slice(), &[0, 1, 2]);
     }
 
     #[test]
@@ -1152,7 +1152,7 @@ mod tests {
         assert!(result.is_some());
         let (_, indices) = result.unwrap();
         // 'a'→0, 'b'→1, 'c' deleted (no index), 'd'→2
-        assert_eq!(indices, vec![0, 1, 2]);
+        assert_eq!(indices.as_slice(), &[0, 1, 2]);
     }
 
     #[test]
diff --git a/src/fuzzy_matcher/mod.rs b/src/fuzzy_matcher/mod.rs
index 83a818af..e9fb1bc1 100644
--- a/src/fuzzy_matcher/mod.rs
+++ b/src/fuzzy_matcher/mod.rs
@@ -3,6 +3,8 @@
 //! This module provides different fuzzy matching algorithms including
 //! skim's own algorithm and clangd's algorithm for matching text patterns.
 
+/// Arinae fuzzy matching algorithm (Smith-Waterman with affine gaps)
+pub mod arinae;
 /// Clangd fuzzy matching algorithm
 pub mod clangd;
 pub mod frizbee;
@@ -15,13 +17,31 @@ mod util;
 pub(crate) type IndexType = usize;
 pub(crate) type ScoreType = i64;
 
+pub(crate) type MatchIndices = Vec<IndexType>;
+
 /// Trait for fuzzy matching text patterns against choices
 pub trait FuzzyMatcher: Send + Sync {
     /// fuzzy match choice with pattern, and return the score & matched indices of characters
-    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(i64, Vec<usize>)>;
+    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(i64, MatchIndices)>;
 
     /// fuzzy match choice with pattern, and return the score of matching
     fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<i64> {
         self.fuzzy_indices(choice, pattern).map(|(score, _)| score)
     }
+
+    /// Fuzzy match and return (score, begin_char_index, end_char_index) without
+    /// computing per-character match indices. This avoids the Vec allocation and
+    /// traceback that `fuzzy_indices` requires, making it much faster for ranking.
+    ///
+    /// `begin` is the character index of the first matched pattern character,
+    /// `end` is the character index of the last matched pattern character.
+    ///
+    /// Default implementation falls back to `fuzzy_indices`.
+    fn fuzzy_match_range(&self, choice: &str, pattern: &str) -> Option<(i64, usize, usize)> {
+        self.fuzzy_indices(choice, pattern).map(|(score, indices)| {
+            let begin = indices.first().copied().unwrap_or(0);
+            let end = indices.last().copied().unwrap_or(0);
+            (score, begin, end)
+        })
+    }
 }
diff --git a/src/fuzzy_matcher/skim.rs b/src/fuzzy_matcher/skim.rs
index 6081aff5..67f1dadf 100644
--- a/src/fuzzy_matcher/skim.rs
+++ b/src/fuzzy_matcher/skim.rs
@@ -24,7 +24,7 @@ use thread_local::ThreadLocal;
 
 use super::skim::Movement::{Match, Skip};
 use super::util::{char_equal, cheap_matches};
-use super::{FuzzyMatcher, IndexType, ScoreType};
+use super::{FuzzyMatcher, IndexType, MatchIndices, ScoreType};
 
 const BONUS_MATCHED: ScoreType = 4;
 const BONUS_CASE_MATCH: ScoreType = 4;
@@ -49,8 +49,8 @@ pub struct SkimMatcher {}
 ///
 /// V1 algorithm is deprecated, checkout `FuzzyMatcherV2`
 impl FuzzyMatcher for SkimMatcher {
-    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
-        fuzzy_indices(choice, pattern)
+    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> {
+        fuzzy_indices(choice, pattern).map(|(s, v)| (s, MatchIndices::from(v)))
     }
 
     fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<ScoreType> {
@@ -1033,8 +1033,9 @@ impl SkimMatcherV2 {
 }
 
 impl FuzzyMatcher for SkimMatcherV2 {
-    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec<IndexType>)> {
+    fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, MatchIndices)> {
         self.fuzzy(choice, pattern, true)
+            .map(|(s, v)| (s, MatchIndices::from(v)))
     }
 
     fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option<ScoreType> {
diff --git a/src/helper/item_reader.rs b/src/helper/item_reader.rs
index 8e6eadc4..9ea44465 100644
--- a/src/helper/item_reader.rs
+++ b/src/helper/item_reader.rs
@@ -17,7 +17,7 @@ use crate::{SkimItem, SkimItemReceiver, SkimItemSender, SkimOptions};
 
 const DELIMITER_STR: &str = r"[\t\n ]+";
 const READ_BUFFER_SIZE: usize = 1024;
-const ITEMS_BUFFER_SIZE: usize = 128;
+const ITEMS_BUFFER_SIZE: usize = 1024;
 const SEND_TIMEOUT_MS: u64 = 100; // Send items if we haven't sent anything in 100ms
 
 pub enum CollectorInput {
diff --git a/src/item.rs b/src/item.rs
index 52bb6809..a2e41875 100644
--- a/src/item.rs
+++ b/src/item.rs
@@ -16,6 +16,7 @@ use clap::builder::PossibleValue;
 
 use crate::spinlock::{SpinLock, SpinLockGuard};
 use crate::{MatchRange, Rank, SkimItem};
+use tokio::sync::Notify;
 
 //------------------------------------------------------------------------------
 
@@ -143,20 +144,91 @@ impl MatchedItem {
             return existing;
         }
 
+        // Fast path: if all existing <= all incoming, we can append without merging.
+        if existing.last().unwrap() <= incoming.first().unwrap() {
+            let mut out = existing;
+            out.extend(incoming);
+            return out;
+        }
+
+        // Fast path: if all incoming <= all existing, prepend without complex merge.
+        if incoming.last().unwrap() <= existing.first().unwrap() {
+            let mut out = incoming;
+            out.extend(existing);
+            return out;
+        }
+
+        // Merge using direct next values to avoid Peekable overhead.
         let mut merged = Vec::with_capacity(existing.len() + incoming.len());
-        let mut a = existing.into_iter().peekable();
-        let mut b = incoming.into_iter().peekable();
-        while a.peek().is_some() && b.peek().is_some() {
-            if a.peek().unwrap() <= b.peek().unwrap() {
-                merged.push(a.next().unwrap());
-            } else {
-                merged.push(b.next().unwrap());
+        let mut a = existing.into_iter();
+        let mut b = incoming.into_iter();
+        let mut a_next = a.next();
+        let mut b_next = b.next();
+
+        loop {
+            match (&a_next, &b_next) {
+                (Some(av), Some(bv)) => {
+                    if av <= bv {
+                        // take a_next
+                        merged.push(a_next.take().unwrap());
+                        a_next = a.next();
+                    } else {
+                        merged.push(b_next.take().unwrap());
+                        b_next = b.next();
+                    }
+                }
+                (Some(_), None) => {
+                    merged.push(a_next.take().unwrap());
+                    merged.extend(a);
+                    break;
+                }
+                (None, Some(_)) => {
+                    merged.push(b_next.take().unwrap());
+                    merged.extend(b);
+                    break;
+                }
+                (None, None) => break,
             }
         }
-        merged.extend(a);
-        merged.extend(b);
+
         merged
     }
+
+    /// Merge `incoming` into an already-sorted `existing` vector in-place.
+    ///
+    /// This function chooses between two strategies:
+    /// - If `incoming` is small (few items), insert them one-by-one using binary
+    ///   search to find the insertion point. This is O(m log n) for m incoming
+    ///   items and is faster when m << n.
+    /// - Otherwise, fall back to the linear two-way merge which is O(n+m).
+    ///
+    /// `existing` must be sorted according to the same ordering used by
+    /// `MatchedItem::cmp`.
+    pub fn merge_into_sorted(existing: &mut Vec<MatchedItem>, incoming: Vec<MatchedItem>) {
+        if incoming.is_empty() {
+            return;
+        }
+
+        // Heuristic threshold: for small incoming batches, prefer binary-insert.
+        // This avoids allocating a new vector and copying the entire existing
+        // list when we only need to insert a few new items.
+        const SMALL_INSERT_THRESHOLD: usize = 256;
+
+        if incoming.len() <= SMALL_INSERT_THRESHOLD {
+            // Insert each incoming item into the existing sorted vector.
+            // For small m this is typically faster than allocating a new
+            // buffer and performing a full linear merge.
+            for item in incoming {
+                let pos = existing.binary_search_by(|e| e.cmp(&item)).unwrap_or_else(|p| p);
+                existing.insert(pos, item);
+            }
+        } else {
+            // For larger incoming batches, perform the linear two-way merge
+            // which is O(n+m) and avoids the O(n*m) cost of repeated inserts.
+            let old = std::mem::take(existing);
+            *existing = MatchedItem::sorted_merge(old, incoming);
+        }
+    }
 }
 
 use std::cmp::Ordering as CmpOrd;
@@ -200,6 +272,13 @@ pub struct ItemPool {
     lines_to_reserve: usize,
     /// Reverse the order of items (--tac flag)
     tac: bool,
+
+    /// Notified whenever new items are appended to the pool (async path).
+    ///
+    /// Listeners (e.g. the TUI event loop) can `await` this to wake up
+    /// immediately when items arrive instead of waiting for the next
+    /// periodic tick.
+    pub items_available: Arc<Notify>,
 }
 
 impl Default for ItemPool {
@@ -211,6 +290,7 @@ impl Default for ItemPool {
             reserved_items: SpinLock::new(Vec::new()),
             lines_to_reserve: 0,
             tac: false,
+            items_available: Arc::new(Notify::new()),
         }
     }
 }
@@ -230,6 +310,7 @@ impl ItemPool {
             reserved_items: SpinLock::new(Vec::new()),
             lines_to_reserve: options.header_lines,
             tac: options.tac,
+            items_available: Arc::new(Notify::new()),
         }
     }
 
@@ -305,7 +386,15 @@ impl ItemPool {
         }
         self.length.store(pool.len(), Ordering::SeqCst);
         trace!("item pool, done append {len} items, total: {}", pool.len());
-        pool.len()
+        let new_len = pool.len();
+        drop(pool);
+        drop(header_items);
+        // Wake any listener that is waiting for new items (e.g. the event loop
+        // or the filter-mode loop) so it can restart the matcher immediately
+        // instead of waiting for the next periodic tick.
+        self.items_available.notify_one();
+
+        new_len
     }
 
     /// Takes items from the pool, copying new items since last take and releasing lock immediately
diff --git a/src/lib.rs b/src/lib.rs
index bdfd29ee..b2b215e9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -31,6 +31,7 @@ use std::borrow::Cow;
 use std::fmt::Display;
 use std::sync::Arc;
 
+use crate::fuzzy_matcher::MatchIndices;
 use ratatui::{
     style::Style,
     text::{Line, Span},
@@ -97,7 +98,7 @@ pub enum Matches {
     #[default]
     None,
     /// Matches at specific character indices
-    CharIndices(Vec<usize>),
+    CharIndices(MatchIndices),
     /// Matches in a character range (start, end)
     CharRange(usize, usize),
     /// Matches in a byte range (start, end)
@@ -281,7 +282,7 @@ pub enum MatchRange {
     /// Range of bytes (start, end)
     ByteRange(usize, usize),
     /// Individual character indices that matched
-    Chars(Vec<usize>),
+    Chars(MatchIndices),
 }
 
 /// Rank stores the raw match measurements used for sorting results.
@@ -316,7 +317,7 @@ pub struct MatchResult {
 impl MatchResult {
     #[must_use]
     /// Converts the match range to character indices
-    pub fn range_char_indices(&self, text: &str) -> Vec<usize> {
+    pub fn range_char_indices(&self, text: &str) -> MatchIndices {
         match &self.matched_range {
             &MatchRange::ByteRange(start, end) => {
                 let first = text[..start].chars().count();
diff --git a/src/matcher.rs b/src/matcher.rs
index fc6d9803..534b1634 100644
--- a/src/matcher.rs
+++ b/src/matcher.rs
@@ -126,6 +126,7 @@ impl Matcher {
                 .fuzzy_algorithm(options.algorithm)
                 .exact_mode(options.exact)
                 .typos(options.typos)
+                .filter_mode(options.filter.is_some())
                 .rank_builder(rank_builder.clone())
                 .build();
 
@@ -192,43 +193,80 @@ impl Matcher {
         // could call kill() + reset() before the old closure runs, causing the old
         // closure to re-take items that should belong to the new matcher.
         let items = item_pool.take();
-        trace!("matcher start, total: {}", items.len());
+        let total = items.len();
+        trace!("matcher start, total: {}", total);
 
         thread_pool.spawn(move || {
+            // Process items in parallel using chunk-based accounting to minimize
+            // atomic contention. Each rayon work unit processes a chunk of items,
+            // updating the shared `processed` and `matched` counters only once per
+            // chunk instead of once per item. The interrupt flag is also checked
+            // only once per chunk to amortize the atomic load.
+            //
+            // `with_min_len` ensures rayon doesn't split work into chunks smaller
+            // than CHUNK_SIZE, keeping the overhead of the parallel iterator low
+            // relative to the actual matching work.
+            const CHUNK_SIZE: usize = 512;
+
             let matched_items: Vec<MatchedItem> = items
                 .into_par_iter()
-                .chunks(8196)
-                .take_any_while(|_chunk| {
-                    if interrupt.load(Ordering::Relaxed) {
-                        return false;
-                    }
+                .with_min_len(CHUNK_SIZE)
+                .fold(
+                    || (Vec::new(), 0usize, 0usize), // (local_matches, local_processed, local_matched)
+                    |(mut local_matches, mut local_processed, mut local_matched), item| {
+                        // Check interrupt once at the start of each chunk boundary.
+                        // The fold processes items sequentially within each rayon work unit,
+                        // so checking every CHUNK_SIZE items amortizes the atomic load.
+                        if local_processed % CHUNK_SIZE == 0 && interrupt.load(Ordering::Relaxed) {
+                            return (local_matches, local_processed, local_matched);
+                        }
 
-                    true
-                })
-                .map(|chunk| {
-                    processed.fetch_add(chunk.len(), Ordering::Relaxed);
-
-                    let matched_chunk: Vec<MatchedItem> = chunk
-                        .into_iter()
-                        .filter_map(|item| {
-                            matcher_engine.match_item(item.as_ref()).map(|match_result| {
-                                // item is Arc but we get &Arc from iterator, so one clone is needed
-                                MatchedItem {
-                                    item,
-                                    rank: match_result.rank,
-                                    rank_builder: rank_builder.clone(),
-                                    matched_range: Some(match_result.matched_range),
-                                }
-                            })
-                        })
-                        .collect();
-
-                    matched.fetch_add(matched_chunk.len(), Ordering::Relaxed);
-
-                    matched_chunk
+                        local_processed += 1;
+
+                        if let Some(match_result) = matcher_engine.match_item(item.as_ref()) {
+                            local_matched += 1;
+                            local_matches.push(MatchedItem {
+                                item,
+                                rank: match_result.rank,
+                                rank_builder: rank_builder.clone(),
+                                matched_range: Some(match_result.matched_range),
+                            });
+                        }
+
+                        // Flush counters periodically so the UI sees progress updates.
+                        if local_processed % CHUNK_SIZE == 0 {
+                            processed.fetch_add(CHUNK_SIZE, Ordering::Relaxed);
+                            if local_matched > 0 {
+                                matched.fetch_add(local_matched, Ordering::Relaxed);
+                                local_matched = 0;
+                            }
+                        }
+
+                        (local_matches, local_processed, local_matched)
+                    },
+                )
+                .map(|(local_matches, local_processed, local_matched)| {
+                    // Flush any remaining counts that didn't hit a chunk boundary.
+                    let remainder = local_processed % CHUNK_SIZE;
+                    if remainder > 0 {
+                        processed.fetch_add(remainder, Ordering::Relaxed);
+                    }
+                    if local_matched > 0 {
+                        matched.fetch_add(local_matched, Ordering::Relaxed);
+                    }
+                    local_matches
                 })
-                .flatten_iter()
-                .collect();
+                .reduce(Vec::new, |mut a, mut b| {
+                    // Merge per-thread result vectors. Always extend the larger one
+                    // to avoid unnecessary reallocations.
+                    if a.len() >= b.len() {
+                        a.extend(b);
+                        a
+                    } else {
+                        b.extend(a);
+                        b
+                    }
+                });
 
             if !interrupt.load(Ordering::SeqCst) {
                 trace!("matcher stop, total matched: {}", matched_items.len());
diff --git a/src/options.rs b/src/options.rs
index 51cb36d1..cf9b6bb0 100644
--- a/src/options.rs
+++ b/src/options.rs
@@ -1147,6 +1147,12 @@ impl SkimOptions {
             self.typos = Typos::Disabled;
         }
 
+        if let Some(ref filter_query) = self.filter
+            && self.query.is_none()
+        {
+            self.query = Some(filter_query.clone());
+        }
+
         self
     }
     /// Initializes history from configured history files
@@ -1229,6 +1235,8 @@ impl SkimOptions {
 pub enum FeatureFlag {
     /// Disable preview PTY on linux
     NoPreviewPty,
+    /// Display the item's match score before its value in the item list (for matcher debugging)
+    ShowScore,
 }
 
 #[allow(unused_macros)]
diff --git a/src/skim.rs b/src/skim.rs
index 6f116edc..dc6257f5 100644
--- a/src/skim.rs
+++ b/src/skim.rs
@@ -49,14 +49,8 @@ impl Skim {
     /// # Panics
     ///
     /// Panics if the tui fails to initilize
-    pub fn run_with(mut options: SkimOptions, source: Option<SkimItemReceiver>) -> Result<SkimOutput> {
+    pub fn run_with(options: SkimOptions, source: Option<SkimItemReceiver>) -> Result<SkimOutput> {
         trace!("running skim");
-        // In filter mode, use the filter string as the query for matching
-        if let Some(ref filter_query) = options.filter
-            && options.query.is_none()
-        {
-            options.query = Some(filter_query.clone());
-        }
         let mut skim = Self::init(options, source)?;
 
         skim.start();
@@ -329,7 +323,7 @@ where
                 && (!app.matcher_control.stopped() || !reader_control.is_done())
             {
                 trace!("still waiting");
-                std::thread::sleep(Duration::from_millis(10));
+                std::thread::sleep(Duration::from_millis(1));
                 app.restart_matcher(false);
             }
             trace!(
@@ -429,6 +423,7 @@ where
     /// ```
     pub async fn tick(&mut self) -> Result<bool> {
         let matcher_interval = &mut self.matcher_interval;
+        let items_available = self.app.item_pool.items_available.clone();
         select! {
             event = self.tui.as_mut().expect("TUI should be initialized before the event loop can start").next() => {
                 let evt = event.ok_or_eyre("Could not acquire next event")?;
@@ -458,6 +453,11 @@ where
             } => {
               self.app.restart_matcher(false);
             }
+            // Wake immediately when new items arrive in the pool so the matcher
+            // can pick them up without waiting for the next periodic interval.
+            _ = items_available.notified() => {
+                self.app.restart_matcher(false);
+            }
             Ok(stream) = async {
                 match &self.listener {
                     Some(l) => interprocess::local_socket::traits::tokio::Listener::accept(l).await,
@@ -490,7 +490,7 @@ where
     /// until the user accepts or aborts. Use `tick()` directly if you need
     /// to interleave your own logic between iterations.
     pub async fn run(&mut self) -> Result<()> {
-        self.matcher_interval = Some(tokio::time::interval(Duration::from_millis(100)));
+        self.matcher_interval = Some(tokio::time::interval(Duration::from_millis(10)));
         trace!("Starting event loop");
         loop {
             if self.tick().await? {
diff --git a/src/tui/app.rs b/src/tui/app.rs
index fd425dc1..2dece17f 100644
--- a/src/tui/app.rs
+++ b/src/tui/app.rs
@@ -1271,8 +1271,8 @@ impl App {
                         if no_sort {
                             existing.items.extend(matches);
                         } else {
-                            let old = std::mem::take(&mut existing.items);
-                            existing.items = MatchedItem::sorted_merge(old, matches);
+                            // Merge incoming matches into existing sorted list in-place.
+                            MatchedItem::merge_into_sorted(&mut existing.items, matches);
                         }
                     } else {
                         *guard = Some(ProcessedItems {
diff --git a/src/tui/item_list.rs b/src/tui/item_list.rs
index 19343f8a..89ce160d 100644
--- a/src/tui/item_list.rs
+++ b/src/tui/item_list.rs
@@ -74,6 +74,8 @@ pub struct ItemList {
     wrap: bool,
     /// Border type, if borders are enabled
     pub border: Option<BorderType>,
+    /// When true, prepend each item's match score to its display text
+    print_score: bool,
 }
 
 impl Default for ItemList {
@@ -107,6 +109,7 @@ impl Default for ItemList {
             cycle: false,
             wrap: false,
             border: None,
+            print_score: false,
         }
     }
 }
@@ -568,6 +571,7 @@ impl SkimWidget for ItemList {
             cycle: options.cycle,
             wrap: options.wrap_items,
             border: options.border,
+            print_score: options.flags.contains(&crate::options::FeatureFlag::ShowScore),
         }
     }
 
@@ -720,7 +724,7 @@ impl SkimWidget for ItemList {
 
                     // Prepend cursor indicators
                     // Pre-allocate capacity to avoid reallocation
-                    let mut spans: Vec<Span> = Vec::with_capacity(2 + display_line.spans.len());
+                    let mut spans: Vec<Span> = Vec::with_capacity(3 + display_line.spans.len());
                     spans.push(Span::styled(
                         if is_current {
                             selector_icon.to_owned()
@@ -737,6 +741,14 @@ impl SkimWidget for ItemList {
                         },
                         theme.selected,
                     ));
+                    // Optionally prepend the match score for debugging
+                    if this.print_score {
+                        let score = item.rank.score;
+                        spans.push(Span::styled(
+                            format!("[{score}] "),
+                            if is_current { theme.current } else { theme.normal },
+                        ));
+                    }
                     spans.extend(display_line.spans);
 
                     if *wrap {
diff --git a/tests/matcher.rs b/tests/matcher.rs
index b3d1e182..285ce3a3 100644
--- a/tests/matcher.rs
+++ b/tests/matcher.rs
@@ -79,3 +79,9 @@ insta_test!(matcher_fzy, INPUT_ITEMS, &["-q", "stum", "--algo", "fzy", "--no-typ
 insta_test!(matcher_fzy_typos, INPUT_ITEMS, &["-q", "stum", "--algo", "fzy"], {
     @snap;
 });
+insta_test!(matcher_arinae, INPUT_ITEMS, &["-q", "stum", "--algo", "arinae", "--no-typos"], {
+    @snap;
+});
+insta_test!(matcher_arinae_typos, INPUT_ITEMS, &["-q", "stum", "--algo", "arinae"], {
+    @snap;
+});
diff --git a/tests/snapshots/matcher__matcher_arinae.snap b/tests/snapshots/matcher__matcher_arinae.snap
new file mode 100644
index 00000000..b1262148
--- /dev/null
+++ b/tests/snapshots/matcher__matcher_arinae.snap
@@ -0,0 +1,29 @@
+---
+source: tests/matcher.rs
+expression: buf + & cursor_pos
+---
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"  src/tui/item_list.rs                                                          "
+"> src/tui/mod.rs                                                                "
+"  2/49                                                                       0/0"
+"> stum                                                                          "
+cursor: (24, 7)
diff --git a/tests/snapshots/matcher__matcher_arinae_typos.snap b/tests/snapshots/matcher__matcher_arinae_typos.snap
new file mode 100644
index 00000000..378135f3
--- /dev/null
+++ b/tests/snapshots/matcher__matcher_arinae_typos.snap
@@ -0,0 +1,29 @@
+---
+source: tests/matcher.rs
+expression: buf + & cursor_pos
+---
+"  src/manpage.rs                                                                "
+"  src/util.rs                                                                   "
+"  src/item.rs                                                                   "
+"  src/fuzzy_matcher/clangd.rs                                                   "
+"  src/fuzzy_matcher/frizbee.rs                                                  "
+"  src/fuzzy_matcher/skim.rs                                                     "
+"  src/fuzzy_matcher/util.rs                                                     "
+"  src/fuzzy_matcher/mod.rs                                                      "
+"  src/theme.rs                                                                  "
+"  src/tmux.rs                                                                   "
+"  src/tui/header.rs                                                             "
+"  src/tui/statusline.rs                                                         "
+"  src/tui/input.rs                                                              "
+"  src/tui/app.rs                                                                "
+"  src/tui/backend.rs                                                            "
+"  src/tui/event.rs                                                              "
+"  src/tui/preview.rs                                                            "
+"  src/tui/widget.rs                                                             "
+"  src/tui/options.rs                                                            "
+"  src/tui/util.rs                                                               "
+"  src/tui/item_list.rs                                                          "
+"> src/tui/mod.rs                                                                "
+"  38/49                                                                      0/0"
+"> stum                                                                          "
+cursor: (24, 7)
diff --git a/tests/snapshots/matcher__matcher_skim_v3.snap b/tests/snapshots/matcher__matcher_skim_v3.snap
new file mode 100644
index 00000000..b1262148
--- /dev/null
+++ b/tests/snapshots/matcher__matcher_skim_v3.snap
@@ -0,0 +1,29 @@
+---
+source: tests/matcher.rs
+expression: buf + & cursor_pos
+---
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"                                                                                "
+"  src/tui/item_list.rs                                                          "
+"> src/tui/mod.rs                                                                "
+"  2/49                                                                       0/0"
+"> stum                                                                          "
+cursor: (24, 7)
diff --git a/tests/snapshots/matcher__matcher_skim_v3_typos.snap b/tests/snapshots/matcher__matcher_skim_v3_typos.snap
new file mode 100644
index 00000000..9aefc072
--- /dev/null
+++ b/tests/snapshots/matcher__matcher_skim_v3_typos.snap
@@ -0,0 +1,29 @@
+---
+source: tests/matcher.rs
+expression: buf + & cursor_pos
+---
+"  src/manpage.rs                                                                "
+"  src/util.rs                                                                   "
+"  src/item.rs                                                                   "
+"  src/fuzzy_matcher/clangd.rs                                                   "
+"  src/fuzzy_matcher/frizbee.rs                                                  "
+"  src/fuzzy_matcher/skim.rs                                                     "
+"  src/fuzzy_matcher/util.rs                                                     "
+"  src/fuzzy_matcher/mod.rs                                                      "
+"  src/theme.rs                                                                  "
+"  src/tmux.rs                                                                   "
+"  src/tui/header.rs                                                             "
+"  src/tui/statusline.rs                                                         "
+"  src/tui/input.rs                                                              "
+"  src/tui/app.rs                                                                "
+"  src/tui/backend.rs                                                            "
+"  src/tui/event.rs                                                              "
+"  src/tui/preview.rs                                                            "
+"  src/tui/widget.rs                                                             "
+"  src/tui/options.rs                                                            "
+"  src/tui/util.rs                                                               "
+"  src/tui/item_list.rs                                                          "
+"> src/tui/mod.rs                                                                "
+"  37/49                                                                      0/0"
+"> stum                                                                          "
+cursor: (24, 7)