Skip to content

Commit 164d943

Browse files
committed
fix: Redundant O(m) Minimum Edit Distance Scan in Hot DFS Loops & High Heap Allocation Thrashing in FuzzyIterator::next DFS Traversal
1 parent c9f5d55 commit 164d943

2 files changed

Lines changed: 32 additions & 20 deletions

File tree

src/fsa/double_array/iterators.rs

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -114,20 +114,28 @@ pub struct FuzzyIterator<'a, V: MapValue> {
114114
pub(crate) max_dist: usize,
115115
/// dp_columns[d] = edit distance row for path[0..d] vs query[0..j], j in 0..=query.len()
116116
pub(crate) dp_columns: Vec<Vec<usize>>,
117+
/// Recycled DP row buffers to avoid heap allocation per DFS transition.
118+
pub(crate) spare_rows: Vec<Vec<usize>>,
117119
}
118120

119121
impl<'a, V: MapValue> FuzzyIterator<'a, V> {
120-
/// Compute the next DP row when appending character `c` at current depth.
121-
fn compute_row(prev_row: &[usize], query: &[u8], c: u8) -> Vec<usize> {
122-
let mut row = vec![0; query.len() + 1];
123-
row[0] = prev_row[0] + 1; // deletion
124-
for j in 1..=query.len() {
122+
/// Compute DP row in-place into `row` and return the row minimum.
123+
fn compute_row_inplace(prev_row: &[usize], query: &[u8], c: u8, row: &mut Vec<usize>) -> usize {
124+
let len = query.len() + 1;
125+
row.resize(len, 0);
126+
row[0] = prev_row[0] + 1;
127+
let mut min_val = row[0];
128+
for j in 1..len {
125129
let cost = if query[j - 1] == c { 0 } else { 1 };
126-
row[j] = (prev_row[j] + 1) // deletion
127-
.min(row[j - 1] + 1) // insertion
128-
.min(prev_row[j - 1] + cost); // substitution
130+
let val = (prev_row[j] + 1)
131+
.min(row[j - 1] + 1)
132+
.min(prev_row[j - 1] + cost);
133+
row[j] = val;
134+
if val < min_val {
135+
min_val = val;
136+
}
129137
}
130-
row
138+
min_val
131139
}
132140
}
133141

@@ -158,12 +166,15 @@ impl<'a, V: MapValue> Iterator for FuzzyIterator<'a, V> {
158166
// Try next child
159167
if frame.next_sibling == NINFO_NONE {
160168
self.stack.pop();
161-
// Restore dp_columns: each stack frame at depth d has dp_columns[0..=d].
162-
// When popping, truncate to the parent's depth + 1.
163-
if let Some(parent) = self.stack.last() {
164-
self.dp_columns.truncate(parent.depth + 1);
169+
let target_len = if let Some(parent) = self.stack.last() {
170+
parent.depth + 1
165171
} else {
166-
self.dp_columns.truncate(1); // keep root row only
172+
1
173+
};
174+
while self.dp_columns.len() > target_len {
175+
if let Some(row) = self.dp_columns.pop() {
176+
self.spare_rows.push(row);
177+
}
167178
}
168179
continue;
169180
}
@@ -187,12 +198,12 @@ impl<'a, V: MapValue> Iterator for FuzzyIterator<'a, V> {
187198
continue;
188199
}
189200

190-
// Compute DP row for this child
191-
let prev_row = &self.dp_columns[parent_depth];
192-
let new_row = Self::compute_row(prev_row, &self.query, label);
201+
// Compute DP row in-place using a recycled buffer
202+
let mut row = self.spare_rows.pop().unwrap_or_default();
203+
let min_val = Self::compute_row_inplace(&self.dp_columns[parent_depth], &self.query, label, &mut row);
193204

194-
// Prune: if min edit distance in row > max_dist, skip subtree
195-
if *new_row.iter().min().unwrap_or(&usize::MAX) > self.max_dist {
205+
if min_val > self.max_dist {
206+
self.spare_rows.push(row);
196207
continue;
197208
}
198209

@@ -203,7 +214,7 @@ impl<'a, V: MapValue> Iterator for FuzzyIterator<'a, V> {
203214

204215
// Set DP column for child depth
205216
self.dp_columns.truncate(child_depth);
206-
self.dp_columns.push(new_row);
217+
self.dp_columns.push(row);
207218

208219
// Push child frame
209220
let first_child = if child_pos < self.trie.trie.ninfos.len() {

src/fsa/double_array/map.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ impl<V: MapValue> DoubleArrayTrieMap<V> {
316316
query: query.to_vec(),
317317
max_dist,
318318
dp_columns: vec![row0],
319+
spare_rows: Vec::new(),
319320
}
320321
}
321322

0 commit comments

Comments
 (0)