diff --git a/Directory.Packages.props b/Directory.Packages.props
index 971d5960e27..d8437a2cfbc 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -32,6 +32,6 @@
     <PackageVersion Include="System.Numerics.Tensors" Version="10.0.3" />
     <PackageVersion Include="Microsoft.Extensions.Hosting" Version="10.0.3" />
     <PackageVersion Include="Microsoft.Extensions.Hosting.WindowsServices" Version="10.0.3" />
-    <PackageVersion Include="diskann-garnet" Version="1.0.23" />
+    <PackageVersion Include="diskann-garnet" Version="1.0.26" />
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/DiskANN b/DiskANN
new file mode 160000
index 00000000000..a2bb12269bf
--- /dev/null
+++ b/DiskANN
@@ -0,0 +1 @@
+Subproject commit a2bb12269bf5ba9c4b7105af48fc6b34a365715f
diff --git a/global.json b/global.json
index fa8e68b5fd5..fadf24c2020 100644
--- a/global.json
+++ b/global.json
@@ -1,6 +1,6 @@
 {
   "sdk": {
-    "version": "10.0.201",
+    "version": "10.0.104",
     "rollForward": "latestMajor",
     "allowPrerelease": false
   }
diff --git a/libs/server/Garnet.server.csproj b/libs/server/Garnet.server.csproj
index dc679f37e8f..b3de5774f9f 100644
--- a/libs/server/Garnet.server.csproj
+++ b/libs/server/Garnet.server.csproj
@@ -25,4 +25,19 @@
 	  <PackageReference Include="diskann-garnet" />
   </ItemGroup>
 
+  <!-- Copy locally-built diskann_garnet native library over the NuGet-provided one.
+       Usage: dotnet build -c Release -p:UseLocalDiskANN=true
+       Requires: cargo build (or cargo build release) in DiskANN/diskann-garnet first.
+       DiskANNProfile: debug (default) or release, maps to Cargo profile. -->
+  <Target Name="CopyLocalDiskANN" AfterTargets="Build" Condition="'$(UseLocalDiskANN)' == 'true'">
+    <PropertyGroup>
+      <DiskANNProfile Condition="'$(DiskANNProfile)' == '' and '$(Configuration)' == 'Release'">release</DiskANNProfile>
+      <DiskANNProfile Condition="'$(DiskANNProfile)' == ''">debug</DiskANNProfile>
+      <LocalDiskANNDll>$([MSBuild]::NormalizePath('$(MSBuildThisFileDirectory)..\..\DiskANN\target\$(DiskANNProfile)\diskann_garnet.dll'))</LocalDiskANNDll>
+      <NativeOutputDir>$(OutputPath)runtimes\win-x64\native\</NativeOutputDir>
+    </PropertyGroup>
+    <Copy SourceFiles="$(LocalDiskANNDll)" DestinationFolder="$(NativeOutputDir)" SkipUnchangedFiles="true" />
+    <Message Importance="High" Text="Copied local diskann_garnet.dll ($(DiskANNProfile)) to $(NativeOutputDir)" />
+  </Target>
+
 </Project>
\ No newline at end of file
diff --git a/libs/server/Resp/Vector/AttributeExtractor.cs b/libs/server/Resp/Vector/AttributeExtractor.cs
index 0fcecbb43a0..f15ab0e7170 100644
--- a/libs/server/Resp/Vector/AttributeExtractor.cs
+++ b/libs/server/Resp/Vector/AttributeExtractor.cs
@@ -35,11 +35,11 @@ public static int ExtractFields(
             if (s.IsEmpty || s[0] != (byte)'{') return 0;
             s = s[1..];
 
-            var found = 0;
-            var needed = selectorRanges.Length;
+                var found = 0;
+                var needed = selectorRanges.Length;
 
-            while (true)
-            {
+                while (true)
+                {
                 s = TrimWhiteSpace(s);
                 if (s.IsEmpty) return found;
                 if (s[0] == (byte)'}') return found;
@@ -50,16 +50,16 @@ public static int ExtractFields(
                 if (!SkipString(ref s)) return found;
                 var keyContent = afterOpenQuote[..(afterOpenQuote.Length - s.Length - 1)];
 
-                var matchIndex = -1;
-                for (var i = 0; i < selectorRanges.Length; i++)
-                {
+                    var matchIndex = -1;
+                    for (var i = 0; i < selectorRanges.Length; i++)
+                    {
                     if (results[i].IsNone &&
                         keyContent.SequenceEqual(filterBytes.Slice(selectorRanges[i].Start, selectorRanges[i].Length)))
-                    {
-                        matchIndex = i;
-                        break;
+                            {
+                                matchIndex = i;
+                                break;
+                            }
                     }
-                }
 
                 s = TrimWhiteSpace(s);
                 if (s.IsEmpty || s[0] != (byte)':') return found;
@@ -68,22 +68,22 @@ public static int ExtractFields(
                 s = TrimWhiteSpace(s);
                 if (s.IsEmpty) return found;
 
-                if (matchIndex >= 0)
-                {
+                    if (matchIndex >= 0)
+                    {
                     results[matchIndex] = ParseValueToken(json, ref s, ref program);
-                    found++;
-                    if (found == needed) return found;
-                }
-                else
-                {
+                        found++;
+                        if (found == needed) return found;
+                    }
+                    else
+                    {
                     if (!SkipValue(ref s)) return found;
-                }
+                    }
 
                 s = TrimWhiteSpace(s);
                 if (s.IsEmpty) return found;
                 if (s[0] == (byte)',') { s = s[1..]; continue; }
                 if (s[0] == (byte)'}') return found;
-                return found;
+                    return found;
             }
         }
 
@@ -98,8 +98,8 @@ public static ExprToken ExtractField(ReadOnlySpan<byte> json, ReadOnlySpan<byte>
             if (s.IsEmpty || s[0] != (byte)'{') return default;
             s = s[1..];
 
-            while (true)
-            {
+                while (true)
+                {
                 s = TrimWhiteSpace(s);
                 if (s.IsEmpty) return default;
                 if (s[0] == (byte)'}') return default;
@@ -119,7 +119,7 @@ public static ExprToken ExtractField(ReadOnlySpan<byte> json, ReadOnlySpan<byte>
                 s = TrimWhiteSpace(s);
                 if (s.IsEmpty) return default;
 
-                if (match)
+                    if (match)
                     return ParseValueToken(json, ref s);
 
                 if (!SkipValue(ref s)) return default;
@@ -375,5 +375,292 @@ internal static ReadOnlySpan<byte> TrimWhiteSpace(ReadOnlySpan<byte> s)
         private static bool IsNumberChar(byte b) =>
             IsDigit(b) || b == (byte)'-' || b == (byte)'+' ||
             b == (byte)'.' || b == (byte)'e' || b == (byte)'E';
+
+        // ======================== Binary attribute format ========================
+        //
+        // Pre-extracted binary format for fast filter evaluation:
+        //   [0xFF marker]
+        //   [num_fields: u8]
+        //   For each field:
+        //     [field_name_len: u8]
+        //     [field_name: N bytes]         ← raw UTF-8
+        //     [value_type: u8]              ← 0=string, 1=number, 2=bool_true, 3=bool_false, 4=null
+        //     [value_len: u16 LE]
+        //     [value_bytes: N bytes]        ← UTF-8 string or 8-byte f64 LE
+
+        internal const byte BinaryMarker = 0xFF;
+
+        private const byte BinTypeString = 0;
+        private const byte BinTypeNumber = 1;
+        private const byte BinTypeBoolTrue = 2;
+        private const byte BinTypeBoolFalse = 3;
+        private const byte BinTypeNull = 4;
+
+        /// <summary>
+        /// Convert a top-level JSON object to pre-extracted binary format.
+        /// Returns total bytes written, or -1 if output is too small.
+        /// </summary>
+        public static int ConvertJsonToBinary(ReadOnlySpan<byte> json, Span<byte> output)
+        {
+            var s = TrimWhiteSpace(json);
+            if (s.IsEmpty || s[0] != (byte)'{') return -1;
+            s = s[1..];
+
+            if (output.Length < 2) return -1;
+            output[0] = BinaryMarker;
+            // output[1] = num_fields — written at the end
+            var pos = 2;
+            byte fieldCount = 0;
+
+            while (true)
+            {
+                s = TrimWhiteSpace(s);
+                if (s.IsEmpty) return -1;
+                if (s[0] == (byte)'}') break;
+
+                if (s[0] != (byte)'"') return -1;
+
+                // Parse key
+                var afterOpenQuote = s[1..];
+                if (!SkipString(ref s)) return -1;
+                var keyContent = afterOpenQuote[..(afterOpenQuote.Length - s.Length - 1)];
+
+                // Check for escape sequences in key (rare)
+                var keyHasEscape = false;
+                for (var ki = 0; ki < keyContent.Length; ki++)
+                {
+                    if (keyContent[ki] == (byte)'\\') { keyHasEscape = true; break; }
+                }
+                if (keyHasEscape) return -1; // keys with escapes not supported
+
+                // Write field_name_len + field_name
+                if (keyContent.Length > 255) return -1;
+                if (pos + 1 + keyContent.Length + 1 + 2 > output.Length) return -1;
+                output[pos++] = (byte)keyContent.Length;
+                keyContent.CopyTo(output[pos..]);
+                pos += keyContent.Length;
+
+                // Skip colon
+                s = TrimWhiteSpace(s);
+                if (s.IsEmpty || s[0] != (byte)':') return -1;
+                s = s[1..];
+
+                // Parse value
+                s = TrimWhiteSpace(s);
+                if (s.IsEmpty) return -1;
+
+                var c = s[0];
+                if (c == (byte)'"')
+                {
+                    // String value — need to unescape
+                    s = s[1..]; // skip opening quote
+                    var body = s;
+                    var hasEscape = false;
+                    while (!s.IsEmpty)
+                    {
+                        if (s[0] == (byte)'\\') { hasEscape = true; s = s[2..]; continue; }
+                        if (s[0] == (byte)'"') break;
+                        s = s[1..];
+                    }
+                    if (s.IsEmpty) return -1;
+                    var strContent = body[..(body.Length - s.Length)];
+                    s = s[1..]; // skip closing quote
+
+                    output[pos++] = BinTypeString;
+
+                    if (!hasEscape)
+                    {
+                        // No escapes — direct copy
+                        if (pos + 2 + strContent.Length > output.Length) return -1;
+                        output[pos] = (byte)(strContent.Length & 0xFF);
+                        output[pos + 1] = (byte)((strContent.Length >> 8) & 0xFF);
+                        pos += 2;
+                        strContent.CopyTo(output[pos..]);
+                        pos += strContent.Length;
+                    }
+                    else
+                    {
+                        // Unescape into output
+                        var valueLenPos = pos;
+                        pos += 2; // reserve for value_len
+                        var valueStart = pos;
+                        for (var si = 0; si < strContent.Length; si++)
+                        {
+                            if (pos >= output.Length) return -1;
+                            if (strContent[si] == (byte)'\\' && si + 1 < strContent.Length)
+                            {
+                                si++;
+                                output[pos++] = strContent[si] switch
+                                {
+                                    (byte)'n' => (byte)'\n',
+                                    (byte)'r' => (byte)'\r',
+                                    (byte)'t' => (byte)'\t',
+                                    _ => strContent[si], // \", \\, \/ etc.
+                                };
+                            }
+                            else
+                            {
+                                output[pos++] = strContent[si];
+                            }
+                        }
+                        var valueLen = pos - valueStart;
+                        output[valueLenPos] = (byte)(valueLen & 0xFF);
+                        output[valueLenPos + 1] = (byte)((valueLen >> 8) & 0xFF);
+                    }
+                }
+                else if (IsDigit(c) || c == (byte)'-' || c == (byte)'+')
+                {
+                    // Number value — store as 8-byte f64 LE
+                    var numStart = s;
+                    while (!s.IsEmpty && IsNumberChar(s[0])) s = s[1..];
+                    var numSpan = numStart[..(numStart.Length - s.Length)];
+                    if (!Utf8Parser.TryParse(numSpan, out double numVal, out var consumed) || consumed != numSpan.Length)
+                        return -1;
+
+                    output[pos++] = BinTypeNumber;
+                    if (pos + 2 + 8 > output.Length) return -1;
+                    output[pos] = 8;
+                    output[pos + 1] = 0;
+                    pos += 2;
+                    System.BitConverter.TryWriteBytes(output[pos..], numVal);
+                    pos += 8;
+                }
+                else if (c == (byte)'t')
+                {
+                    if (!s.StartsWith("true"u8)) return -1;
+                    s = s[4..];
+                    output[pos++] = BinTypeBoolTrue;
+                    if (pos + 2 > output.Length) return -1;
+                    output[pos] = 0; output[pos + 1] = 0;
+                    pos += 2;
+                }
+                else if (c == (byte)'f')
+                {
+                    if (!s.StartsWith("false"u8)) return -1;
+                    s = s[5..];
+                    output[pos++] = BinTypeBoolFalse;
+                    if (pos + 2 > output.Length) return -1;
+                    output[pos] = 0; output[pos + 1] = 0;
+                    pos += 2;
+                }
+                else if (c == (byte)'n')
+                {
+                    if (!s.StartsWith("null"u8)) return -1;
+                    s = s[4..];
+                    output[pos++] = BinTypeNull;
+                    if (pos + 2 > output.Length) return -1;
+                    output[pos] = 0; output[pos + 1] = 0;
+                    pos += 2;
+                }
+                else
+                {
+                    // Nested objects/arrays — not supported in binary format
+                    return -1;
+                }
+
+                fieldCount++;
+
+                // Next field or end
+                s = TrimWhiteSpace(s);
+                if (s.IsEmpty) return -1;
+                if (s[0] == (byte)',') { s = s[1..]; continue; }
+                if (s[0] == (byte)'}') break;
+                return -1;
+            }
+
+            output[1] = fieldCount;
+            return pos;
+        }
+
+        /// <summary>
+        /// Extract fields from pre-extracted binary attribute data.
+        /// Same contract as ExtractFields but ~10x faster (no JSON parsing).
+        /// </summary>
+        public static int ExtractFieldsBinary(
+            ReadOnlySpan<byte> binary,
+            ReadOnlySpan<byte> filterBytes,
+            ReadOnlySpan<(int Start, int Length)> selectorRanges,
+            Span<ExprToken> results,
+            ref ExprProgram program)
+        {
+            for (var i = 0; i < selectorRanges.Length; i++)
+                results[i] = default;
+
+            if (binary.Length < 2 || binary[0] != BinaryMarker)
+                return 0;
+
+            var numFields = binary[1];
+            var pos = 2;
+            var found = 0;
+            var needed = selectorRanges.Length;
+
+            for (var f = 0; f < numFields && pos < binary.Length; f++)
+            {
+                // Read field name
+                if (pos >= binary.Length) break;
+                var nameLen = binary[pos++];
+                if (pos + nameLen > binary.Length) break;
+                var fieldName = binary.Slice(pos, nameLen);
+                pos += nameLen;
+
+                // Read value type
+                if (pos >= binary.Length) break;
+                var valueType = binary[pos++];
+
+                // Read value length
+                if (pos + 2 > binary.Length) break;
+                var valueLen = (int)(binary[pos] | (binary[pos + 1] << 8));
+                pos += 2;
+
+                // Read value bytes
+                if (pos + valueLen > binary.Length) break;
+
+                // Match against selectors
+                var matchIndex = -1;
+                for (var i = 0; i < selectorRanges.Length; i++)
+                {
+                    if (results[i].IsNone &&
+                        fieldName.SequenceEqual(filterBytes.Slice(selectorRanges[i].Start, selectorRanges[i].Length)))
+                    {
+                        matchIndex = i;
+                        break;
+                    }
+                }
+
+                if (matchIndex >= 0)
+                {
+                    switch (valueType)
+                    {
+                        case BinTypeString:
+                            // Create a Str token referencing the binary buffer offsets
+                            results[matchIndex] = ExprToken.NewStr(pos, valueLen, hasEscape: false);
+                            break;
+                        case BinTypeNumber:
+                            if (valueLen == 8)
+                            {
+                                var numVal = System.BitConverter.ToDouble(binary[pos..]);
+                                results[matchIndex] = ExprToken.NewNum(numVal);
+                            }
+                            break;
+                        case BinTypeBoolTrue:
+                            results[matchIndex] = ExprToken.NewNum(1);
+                            break;
+                        case BinTypeBoolFalse:
+                            results[matchIndex] = ExprToken.NewNum(0);
+                            break;
+                        case BinTypeNull:
+                            results[matchIndex] = ExprToken.NewNull();
+                            break;
+                    }
+
+                    found++;
+                    if (found == needed) return found;
+                }
+
+                pos += valueLen;
+            }
+
+            return found;
+        }
     }
-}
\ No newline at end of file
+}
diff --git a/libs/server/Resp/Vector/DiskANNService.cs b/libs/server/Resp/Vector/DiskANNService.cs
index 37c7ace6c31..3ac441a79cd 100644
--- a/libs/server/Resp/Vector/DiskANNService.cs
+++ b/libs/server/Resp/Vector/DiskANNService.cs
@@ -18,7 +18,7 @@ internal sealed unsafe class DiskANNService
         internal const byte Attributes = 3;
         private const byte Metadata = 4;
         internal const byte InternalIdMap = 5;
-        private const byte ExternalIdMap = 6;
+        internal const byte ExternalIdMap = 6;
 
         public nint CreateIndex(
             ulong context,
@@ -31,14 +31,13 @@ public nint CreateIndex(
             delegate* unmanaged[Cdecl]<ulong, uint, nint, nuint, nint, nint, void> readCallback,
             delegate* unmanaged[Cdecl]<ulong, nint, nuint, nint, nuint, byte> writeCallback,
             delegate* unmanaged[Cdecl]<ulong, nint, nuint, byte> deleteCallback,
-            delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> readModifyWriteCallback
+            delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> readModifyWriteCallback,
+            delegate* unmanaged[Cdecl]<ulong, uint, byte> filterCallback
         )
         {
-            // TODO: actually pass distance metric
-
             unsafe
             {
-                return NativeDiskANNMethods.create_index(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, (nint)readCallback, (nint)writeCallback, (nint)deleteCallback, (nint)readModifyWriteCallback);
+                return NativeDiskANNMethods.create_index(context, dimensions, reduceDims, quantType, (int)distanceMetric, buildExplorationFactor, numLinks, (nint)readCallback, (nint)writeCallback, (nint)deleteCallback, (nint)readModifyWriteCallback, (nint)filterCallback);
             }
         }
 
@@ -53,9 +52,10 @@ public nint RecreateIndex(
             delegate* unmanaged[Cdecl]<ulong, uint, nint, nuint, nint, nint, void> readCallback,
             delegate* unmanaged[Cdecl]<ulong, nint, nuint, nint, nuint, byte> writeCallback,
             delegate* unmanaged[Cdecl]<ulong, nint, nuint, byte> deleteCallback,
-            delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> readModifyWriteCallback
+            delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> readModifyWriteCallback,
+            delegate* unmanaged[Cdecl]<ulong, uint, byte> filterCallback
         )
-        => CreateIndex(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetricType, readCallback, writeCallback, deleteCallback, readModifyWriteCallback);
+        => CreateIndex(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetricType, readCallback, writeCallback, deleteCallback, readModifyWriteCallback, filterCallback);
 
         public void DropIndex(ulong context, nint index)
         {
@@ -308,12 +308,14 @@ public static partial nint create_index(
             uint dimensions,
             uint reduceDims,
             VectorQuantType quantType,
+            int metricType,
             uint buildExplorationFactor,
             uint numLinks,
             nint readCallback,
             nint writeCallback,
             nint deleteCallback,
-            nint readModifyWriteCallback
+            nint readModifyWriteCallback,
+            nint filterCallback
         );
 
         [LibraryImport(DISKANN_GARNET)]
diff --git a/libs/server/Resp/Vector/VectorManager.Callbacks.cs b/libs/server/Resp/Vector/VectorManager.Callbacks.cs
index 61b128af600..fb02914c181 100644
--- a/libs/server/Resp/Vector/VectorManager.Callbacks.cs
+++ b/libs/server/Resp/Vector/VectorManager.Callbacks.cs
@@ -179,6 +179,7 @@ public void Dispose()
         private unsafe delegate* unmanaged[Cdecl]<ulong, nint, nuint, nint, nuint, byte> WriteCallbackPtr { get; } = &WriteCallbackUnmanaged;
         private unsafe delegate* unmanaged[Cdecl]<ulong, nint, nuint, byte> DeleteCallbackPtr { get; } = &DeleteCallbackUnmanaged;
         private unsafe delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> ReadModifyWriteCallbackPtr { get; } = &ReadModifyWriteCallbackUnmanaged;
+        private unsafe delegate* unmanaged[Cdecl]<ulong, uint, byte> InlineFilterCallbackPtr { get; } = &InlineFilterCandidateCallbackImpl;
 
         /// <summary>
         /// Used to thread the active <see cref="StorageSession"/> across p/invoke and reverse p/invoke boundaries into DiskANN.
diff --git a/libs/server/Resp/Vector/VectorManager.Filter.cs b/libs/server/Resp/Vector/VectorManager.Filter.cs
index 1a1eedacf9d..ad1a2825e93 100644
--- a/libs/server/Resp/Vector/VectorManager.Filter.cs
+++ b/libs/server/Resp/Vector/VectorManager.Filter.cs
@@ -3,7 +3,9 @@
 
 using System;
 using System.Buffers.Binary;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using Tsavorite.core;
 
 namespace Garnet.server
 {
@@ -215,5 +217,138 @@ internal static int GetSelectorRanges(
             }
             return count;
         }
+
+        // ── Inline filter callback infrastructure ─────
+        //
+        // These types allow the Rust DiskANN pipeline to call
+        // back into C# for per-candidate filter evaluation, avoiding the need
+        // to over-fetch candidates and filter them afterwards.
+        //
+        // The compiled filter program and scratch buffers are stored in
+        // [ThreadStatic] fields before the FFI call. The callback runs on the
+        // same thread, so it reads the pre-compiled state directly — no need
+        // to marshal pointers through the FFI boundary.
+
+        /// <summary>
+        /// Thread-static state for the inline filter callback.
+        /// Set before the FFI call into Rust, read by <see cref="InlineFilterCandidateCallbackImpl"/>.
+        /// </summary>
+        [ThreadStatic]
+        internal static InlineFilterState t_inlineFilterState;
+
+
+        /// <summary>
+        /// Per-query filter state maintained on the C# side.
+        /// Populated before calling into Rust; the callback reads it from thread-static storage.
+        /// All Span/pointer fields reference pinned scratch-buffer memory that remains
+        /// valid for the duration of the FFI call.
+        /// </summary>
+        internal unsafe struct InlineFilterState
+        {
+            /// <summary>Base Garnet context (no term bits).</summary>
+            public ulong Context;
+
+            /// <summary>Compiled instruction count.</summary>
+            public int InstrCount;
+
+            /// <summary>Compile-time tuple pool count.</summary>
+            public int TupleCount;
+
+            /// <summary>Unique selector count.</summary>
+            public int SelectorCount;
+
+            // Pointers into scratch buffer (pinned for FFI duration):
+            public ExprToken* InstrBufPtr;
+            public ExprToken* TuplePoolBufPtr;
+            public ExprToken* RuntimePoolBufPtr;
+            public ExprToken* ExtractedFieldsPtr;
+            public ExprToken* StackBufPtr;
+            public (int Start, int Length)* SelectorRangesPtr;
+
+            /// <summary>Pointer to the filter expression bytes.</summary>
+            public byte* FilterBytesPtr;
+
+            /// <summary>Length of the filter expression bytes.</summary>
+            public int FilterBytesLen;
+        }
+
+        /// <summary>
+        /// Per-candidate filter callback invoked from Rust during DiskANN inline filtered search.
+        /// Reads the candidate's external ID and attributes from Garnet storage, then evaluates
+        /// the compiled filter expression stored in <see cref="t_inlineFilterState"/>.
+        /// </summary>
+        /// <returns>1 if the candidate passes the filter, 0 otherwise.</returns>
+        [UnmanagedCallersOnly(CallConvs = [typeof(CallConvCdecl)])]
+        internal static unsafe byte InlineFilterCandidateCallbackImpl(ulong context, uint internalId)
+        {
+            return EvaluateCandidateFilter(internalId);
+        }
+
+        /// <summary>
+        /// Shared filter evaluation logic for both single and batch callbacks.
+        /// Reads the candidate's external ID and attributes, then evaluates the compiled filter.
+        /// </summary>
+        private static unsafe byte EvaluateCandidateFilter(uint internalId)
+        {
+            ref var state = ref t_inlineFilterState;
+
+            // 1. Read external ID for this internal_id via ExtMap
+            Span<byte> iidKey = stackalloc byte[sizeof(uint)];
+            BinaryPrimitives.WriteUInt32LittleEndian(iidKey, internalId);
+
+            Span<byte> eidBuf = stackalloc byte[128];
+            var eidMem = SpanByteAndMemory.FromPinnedSpan(eidBuf);
+            try
+            {
+                if (!ReadSizeUnknown(state.Context | DiskANNService.ExternalIdMap, iidKey, ref eidMem))
+                    return 0; // can't find external ID → exclude
+
+                // 2. Read attributes by external ID
+                Span<byte> attrBuf = stackalloc byte[256];
+                var attrMem = SpanByteAndMemory.FromPinnedSpan(attrBuf);
+                try
+                {
+                    if (!ReadSizeUnknown(state.Context | DiskANNService.Attributes, eidMem.AsReadOnlySpan(), ref attrMem))
+                        return 0; // no attributes → exclude
+
+                    // 3. Rebuild ExprProgram from thread-static state pointers
+                    var instrSpan = new Span<ExprToken>(state.InstrBufPtr, state.InstrCount);
+                    var tuplePool = new Span<ExprToken>(state.TuplePoolBufPtr, state.TupleCount);
+                    var runtimePool = new Span<ExprToken>(state.RuntimePoolBufPtr, MaxRuntimePool);
+                    var extractedFields = new Span<ExprToken>(state.ExtractedFieldsPtr, Math.Max(state.SelectorCount, 1));
+                    var stackBuf = new Span<ExprToken>(state.StackBufPtr, StackCapacity);
+                    var selectorRanges = new Span<(int, int)>(state.SelectorRangesPtr, state.SelectorCount);
+                    var filterBytes = new ReadOnlySpan<byte>(state.FilterBytesPtr, state.FilterBytesLen);
+
+                    var program = new ExprProgram
+                    {
+                        Instructions = instrSpan,
+                        Length = state.InstrCount,
+                        TuplePool = tuplePool,
+                        TuplePoolLength = state.TupleCount,
+                        RuntimePool = runtimePool,
+                        RuntimePoolLength = 0,
+                    };
+
+                    program.ResetRuntimePool();
+
+                    AttributeExtractor.ExtractFields(attrMem.AsReadOnlySpan(), filterBytes, selectorRanges, extractedFields, ref program);
+
+                    var stack = new ExprStack(stackBuf);
+                    var pass = ExprRunner.Run(ref program, attrMem.AsReadOnlySpan(), filterBytes, selectorRanges, extractedFields, ref stack);
+
+                    return pass ? (byte)1 : (byte)0;
+                }
+                finally
+                {
+                    attrMem.Memory?.Dispose();
+                }
+            }
+            finally
+            {
+                eidMem.Memory?.Dispose();
+            }
+        }
+
     }
 }
\ No newline at end of file
diff --git a/libs/server/Resp/Vector/VectorManager.Locking.cs b/libs/server/Resp/Vector/VectorManager.Locking.cs
index f74c8c69134..5233da62780 100644
--- a/libs/server/Resp/Vector/VectorManager.Locking.cs
+++ b/libs/server/Resp/Vector/VectorManager.Locking.cs
@@ -168,7 +168,7 @@ internal ReadVectorLock ReadVectorIndex(StorageSession storageSession, ref SpanB
                     nint newlyAllocatedIndex;
                     unsafe
                     {
-                        newlyAllocatedIndex = Service.RecreateIndex(indexContext, dims, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr);
+                        newlyAllocatedIndex = Service.RecreateIndex(indexContext, dims, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, InlineFilterCallbackPtr);
                     }
 
                     input.header.cmd = RespCommand.VADD;
@@ -313,7 +313,13 @@ out GarnetStatus status
 
                         unsafe
                         {
-                            newlyAllocatedIndex = Service.RecreateIndex(indexContext, dims, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr);
+                            newlyAllocatedIndex = Service.RecreateIndex(indexContext, dims, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, InlineFilterCallbackPtr);
+                        }
+
+                        if (newlyAllocatedIndex == 0)
+                        {
+                            vectorSetLocks.ReleaseExclusiveLock(exclusiveLockToken);
+                            throw new InvalidOperationException($"DiskANN RecreateIndex returned null pointer (dims={dims}, quantType={quantType}, metric={distanceMetric})");
                         }
 
                         input.parseState.EnsureCapacity(12);
@@ -345,7 +351,13 @@ out GarnetStatus status
 
                         unsafe
                         {
-                            newlyAllocatedIndex = Service.CreateIndex(indexContext, dims, reduceDims, quantizer, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr);
+                            newlyAllocatedIndex = Service.CreateIndex(indexContext, dims, reduceDims, quantizer, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, InlineFilterCallbackPtr);
+                        }
+
+                        if (newlyAllocatedIndex == 0)
+                        {
+                            vectorSetLocks.ReleaseExclusiveLock(exclusiveLockToken);
+                            throw new InvalidOperationException($"DiskANN CreateIndex returned null pointer (dims={dims}, quantType={quantizer}, metric={distanceMetric})");
                         }
 
                         input.parseState.EnsureCapacity(12);
diff --git a/libs/server/Resp/Vector/VectorManager.Migration.cs b/libs/server/Resp/Vector/VectorManager.Migration.cs
index 69235d18aca..c4728fc0c5b 100644
--- a/libs/server/Resp/Vector/VectorManager.Migration.cs
+++ b/libs/server/Resp/Vector/VectorManager.Migration.cs
@@ -175,7 +175,7 @@ public void HandleMigratedIndexKey(
                 nint newlyAllocatedIndex;
                 unsafe
                 {
-                    newlyAllocatedIndex = Service.RecreateIndex(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr);
+                    newlyAllocatedIndex = Service.RecreateIndex(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetric, ReadCallbackPtr, WriteCallbackPtr, DeleteCallbackPtr, ReadModifyWriteCallbackPtr, InlineFilterCallbackPtr);
                 }
 
                 var ctxArg = ArgSlice.FromPinnedSpan(MemoryMarshal.Cast<ulong, byte>(MemoryMarshal.CreateSpan(ref context, 1)));
diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs
index 28952996a8e..9a024a4d2b0 100644
--- a/libs/server/Resp/Vector/VectorManager.cs
+++ b/libs/server/Resp/Vector/VectorManager.cs
@@ -281,7 +281,7 @@ public void ResumePostRecovery()
         public void Dispose()
         {
             // We must drain all these before disposing, otherwise we'll leave replicationBlockEvent unset
-            _ = replicationReplayChannel.Writer.TryComplete();
+            replicationReplayChannel.Writer.Complete();
             replicationReplayChannel.Reader.Completion.Wait();
 
             Task.WhenAll(replicationReplayTasks).Wait();
@@ -477,7 +477,7 @@ internal Status TryDeleteVectorSet(StorageSession storageSession, ref SpanByte k
         /// <summary>
         /// Perform a similarity search given a vector to compare against.
         /// </summary>
-        internal VectorManagerResult ValueSimilarity(
+        internal unsafe VectorManagerResult ValueSimilarity(
             ReadOnlySpan<byte> indexValue,
             VectorValueType valueType,
             ReadOnlySpan<byte> values,
@@ -505,120 +505,218 @@ ref SpanByteAndMemory filterBitmap
                 return VectorManagerResult.BadParams;
             }
 
-            // When a filter is present, over-retrieve candidates from DiskANN so that
-            // post-filtering has enough results to fill the requested count.
-            //
-            // FILTER-EF controls both the graph exploration breadth and the output
-            // buffer size when a filter is active, allowing it to be tuned independently
-            // from EF (which is used for unfiltered searches).
-            var retrieveCount = !filter.IsEmpty ? maxFilteringEffort : count;
-            var effectiveEF = !filter.IsEmpty
-                ? Math.Max(searchExplorationFactor, maxFilteringEffort)
-                : searchExplorationFactor;
-
-            // No point in asking for more data than the effort we'll put in
-            if (retrieveCount > effectiveEF)
-            {
-                retrieveCount = effectiveEF;
-            }
+            var effectiveEF = searchExplorationFactor;
 
-            // Make sure enough space in distances for requested count
-            if (retrieveCount > outputDistances.Length)
+            if (!filter.IsEmpty)
             {
-                if (!outputDistances.IsSpanByte)
+                // ── Inline filtered search path ─────────
+                // Compile the filter, set up callback state, and let Rust
+                // evaluate per-candidate via InlineFilterCandidateCallbackImpl.
+                // Only passing candidates are written to the output buffer,
+                // so we size it for the desired count, not the overfetch.
+
+                // Size output buffers for desired result count
+                if (count * sizeof(float) > outputDistances.Length)
                 {
-                    outputDistances.Memory.Dispose();
+                    if (!outputDistances.IsSpanByte)
+                        outputDistances.Memory.Dispose();
+                    outputDistances = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(count * sizeof(float)), count * sizeof(float));
                 }
+                outputDistances.Length = count * sizeof(float);
 
-                outputDistances = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(retrieveCount * sizeof(float)), retrieveCount * sizeof(float));
-            }
+                if (count * MinimumSpacePerId > outputIds.Length)
+                {
+                    if (!outputIds.IsSpanByte)
+                        outputIds.Memory.Dispose();
+                    outputIds = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(count * MinimumSpacePerId), count * MinimumSpacePerId);
+                }
 
-            // Indicate requested # of matches
-            outputDistances.Length = retrieveCount * sizeof(float);
+                // Borrow scratch space for compiled filter program
+                var bufferSlice = ActiveThreadSession.scratchBufferBuilder.CreateArgSlice(
+                    TotalPoolTokens * ExprToken.Size + MaxSelectors * 2 * sizeof(int));
+                var span = MemoryMarshal.Cast<byte, ExprToken>(bufferSlice.Span);
+                var selectorBuf = MemoryMarshal.Cast<byte, (int Start, int Length)>(
+                    bufferSlice.Span.Slice(TotalPoolTokens * ExprToken.Size));
 
-            // If we're fairly sure the ids won't fit, go ahead and grab more memory now
-            //
-            // If we're still wrong, we'll end up using continuation callbacks which have more overhead
-            if (retrieveCount * MinimumSpacePerId > outputIds.Length)
-            {
-                if (!outputIds.IsSpanByte)
+                try
                 {
-                    outputIds.Memory.Dispose();
-                }
+                    span.Clear();
+
+                    var offset = 0;
+                    var instrBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                    var tuplePoolBuf = span.Slice(offset, MaxTuplePool); offset += MaxTuplePool;
+                    var tokensBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                    var opsStackBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                    var runtimePoolBuf = span.Slice(offset, MaxRuntimePool); offset += MaxRuntimePool;
+                    var extractedFields = span.Slice(offset, MaxSelectors); offset += MaxSelectors;
+                    var stackBuf = span.Slice(offset, StackCapacity);
+
+                    var instrCount = ExprCompiler.TryCompile(filter, instrBuf, tuplePoolBuf, tokensBuf, opsStackBuf, out var tupleCount, out _);
+                    if (instrCount < 0)
+                    {
+                        // Compile failed — return zero results
+                        outputDistances.Length = 0;
+                        filterBitmap.Length = 0;
+                        outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                        return VectorManagerResult.OK;
+                    }
 
-                outputIds = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(retrieveCount * MinimumSpacePerId), retrieveCount * MinimumSpacePerId);
-            }
+                    var selectorCount = GetSelectorRanges(instrBuf[..instrCount], instrCount, filter, selectorBuf);
 
-            var found =
-                Service.SearchVector(
-                    context,
-                    indexPtr,
-                    valueType,
-                    values,
-                    delta,
-                    effectiveEF,
-                    filter,
-                    maxFilteringEffort,
-                    outputIds,
-                    outputDistances,
-                    out var continuation
-                );
+                    // Pin filter bytes and scratch buffer pointers, then populate thread-static state
+                    fixed (byte* filterPtr = filter)
+                    fixed (ExprToken* instrPtr = instrBuf, tuplePtr = tuplePoolBuf, runtimePtr = runtimePoolBuf, fieldsPtr = extractedFields, stackPtr = stackBuf)
+                    fixed ((int, int)* selPtr = selectorBuf)
+                    {
+                        t_inlineFilterState = new InlineFilterState
+                        {
+                            Context = context,
+                            InstrCount = instrCount,
+                            TupleCount = tupleCount,
+                            SelectorCount = selectorCount,
+                            InstrBufPtr = instrPtr,
+                            TuplePoolBufPtr = tuplePtr,
+                            RuntimePoolBufPtr = runtimePtr,
+                            ExtractedFieldsPtr = fieldsPtr,
+                            StackBufPtr = stackPtr,
+                            SelectorRangesPtr = selPtr,
+                            FilterBytesPtr = filterPtr,
+                            FilterBytesLen = filter.Length,
+                        };
+
+                        var found = Service.SearchVector(
+                            context,
+                            indexPtr,
+                            valueType,
+                            values,
+                            delta,
+                            effectiveEF,
+                            filter,
+                            maxFilteringEffort,
+                            outputIds,
+                            outputDistances,
+                            out var continuation
+                        );
+
+                        if (found < 0)
+                        {
+                            logger?.LogWarning("Error indicating response from vector service {found}", found);
+                            outputIdFormat = VectorIdFormat.Invalid;
+                            return VectorManagerResult.BadParams;
+                        }
 
-            if (found < 0)
-            {
-                logger?.LogWarning("Error indicating response from vector service {found}", found);
-                outputIdFormat = VectorIdFormat.Invalid;
-                return VectorManagerResult.BadParams;
-            }
+                        if (includeAttributes)
+                        {
+                            FetchVectorElementAttributes(context, found, outputIds, ref outputAttributes);
+                        }
 
-            if (includeAttributes || !filter.IsEmpty)
-            {
-                FetchVectorElementAttributes(context, found, outputIds, ref outputAttributes);
-            }
+                        if (continuation != 0)
+                        {
+                            throw new NotImplementedException();
+                        }
 
-            // Apply post-filtering if filter is specified
-            if (!filter.IsEmpty)
+                        outputDistances.Length = sizeof(float) * found;
+                        filterBitmap.Length = 0; // No bitmap needed — results are already filtered
+
+                        outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                        if (quantType == VectorQuantType.XPreQ8)
+                        {
+                            outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                        }
+
+                        return VectorManagerResult.OK;
+                    }
+                }
+                finally
+                {
+                    ActiveThreadSession.scratchBufferBuilder.RewindScratchBuffer(ref bufferSlice);
+                }
+            }
+            else
             {
-                // Ensure bitmap is large enough for the over-retrieved result set
-                var requiredBitmapBytes = (found + 7) >> 3;
-                if (requiredBitmapBytes > filterBitmap.Length)
+                // ── Unfiltered search path (unchanged) ───────────────────
+                var retrieveCount = count;
+
+                // Make sure enough space in distances for requested count
+                if (retrieveCount > outputDistances.Length)
                 {
-                    if (!filterBitmap.IsSpanByte)
+                    if (!outputDistances.IsSpanByte)
                     {
-                        filterBitmap.Memory.Dispose();
+                        outputDistances.Memory.Dispose();
                     }
 
-                    filterBitmap = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(requiredBitmapBytes), requiredBitmapBytes);
+                    outputDistances = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(retrieveCount * sizeof(float)), retrieveCount * sizeof(float));
                 }
 
-                ApplyPostFilter(filter, found, outputAttributes.AsReadOnlySpan(), filterBitmap.AsSpan(), ActiveThreadSession.scratchBufferBuilder);
-            }
+                // Indicate requested # of matches
+                outputDistances.Length = retrieveCount * sizeof(float);
 
-            if (continuation != 0)
-            {
-                // TODO: paged results!
-                throw new NotImplementedException();
-            }
+                // If we're fairly sure the ids won't fit, go ahead and grab more memory now
+                //
+                // If we're still wrong, we'll end up using continuation callbacks which have more overhead
+                if (retrieveCount * MinimumSpacePerId > outputIds.Length)
+                {
+                    if (!outputIds.IsSpanByte)
+                    {
+                        outputIds.Memory.Dispose();
+                    }
 
-            outputDistances.Length = sizeof(float) * found;
+                    outputIds = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(retrieveCount * MinimumSpacePerId), retrieveCount * MinimumSpacePerId);
+                }
 
-            // Default assumption is length prefixed
-            outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                var found =
+                    Service.SearchVector(
+                        context,
+                        indexPtr,
+                        valueType,
+                        values,
+                        delta,
+                        effectiveEF,
+                        filter,
+                        0,
+                        outputIds,
+                        outputDistances,
+                        out var continuation
+                    );
+
+                if (found < 0)
+                {
+                    logger?.LogWarning("Error indicating response from vector service {found}", found);
+                    outputIdFormat = VectorIdFormat.Invalid;
+                    return VectorManagerResult.BadParams;
+                }
 
-            if (quantType == VectorQuantType.XPreQ8)
-            {
-                // But in this special case, we force them to be 4-byte ids
-                //outputIdFormat = VectorIdFormat.FixedI32;
+                if (includeAttributes)
+                {
+                    FetchVectorElementAttributes(context, found, outputIds, ref outputAttributes);
+                }
+
+                if (continuation != 0)
+                {
+                    // TODO: paged results!
+                    throw new NotImplementedException();
+                }
+
+                outputDistances.Length = sizeof(float) * found;
+
+                // Default assumption is length prefixed
                 outputIdFormat = VectorIdFormat.I32LengthPrefixed;
-            }
 
-            return VectorManagerResult.OK;
+                if (quantType == VectorQuantType.XPreQ8)
+                {
+                    // But in this special case, we force them to be 4-byte ids
+                    //outputIdFormat = VectorIdFormat.FixedI32;
+                    outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                }
+
+                return VectorManagerResult.OK;
+            }
         }
 
         /// <summary>
         /// Perform a similarity search given a vector to compare against.
         /// </summary>
-        internal VectorManagerResult ElementSimilarity(
+        internal unsafe VectorManagerResult ElementSimilarity(
             ReadOnlySpan<byte> indexValue,
             ReadOnlySpan<byte> element,
             int count,
@@ -638,108 +736,203 @@ ref SpanByteAndMemory filterBitmap
 
             ReadIndex(indexValue, out var context, out _, out _, out var quantType, out _, out _, out _, out var indexPtr, out _);
 
-            // When a filter is present, over-retrieve candidates from DiskANN
-            var retrieveCount = !filter.IsEmpty ? maxFilteringEffort : count;
-            var effectiveEF = !filter.IsEmpty
-                ? Math.Max(searchExplorationFactor, maxFilteringEffort)
-                : searchExplorationFactor;
+            var effectiveEF = searchExplorationFactor;
 
-            // No point in asking for more data than the effort we'll put in
-            if (retrieveCount > effectiveEF)
+            if (!filter.IsEmpty)
             {
-                retrieveCount = effectiveEF;
-            }
+                // ── Inline-filtered search path ──────────────────────────
+                // Size output buffers for desired result count
+                if (count * sizeof(float) > outputDistances.Length)
+                {
+                    if (!outputDistances.IsSpanByte)
+                        outputDistances.Memory.Dispose();
+                    outputDistances = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(count * sizeof(float)), count * sizeof(float));
+                }
+                outputDistances.Length = count * sizeof(float);
 
-            // Make sure enough space in distances for requested count
-            if (retrieveCount * sizeof(float) > outputDistances.Length)
-            {
-                if (!outputDistances.IsSpanByte)
+                if (count * MinimumSpacePerId > outputIds.Length)
                 {
-                    outputDistances.Memory.Dispose();
+                    if (!outputIds.IsSpanByte)
+                        outputIds.Memory.Dispose();
+                    outputIds = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(count * MinimumSpacePerId), count * MinimumSpacePerId);
                 }
 
-                outputDistances = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(retrieveCount * sizeof(float)), retrieveCount * sizeof(float));
-            }
+                // Borrow scratch space for compiled filter program
+                var bufferSlice = ActiveThreadSession.scratchBufferBuilder.CreateArgSlice(
+                    TotalPoolTokens * ExprToken.Size + MaxSelectors * 2 * sizeof(int));
+                var span = MemoryMarshal.Cast<byte, ExprToken>(bufferSlice.Span);
+                var selectorBuf = MemoryMarshal.Cast<byte, (int Start, int Length)>(
+                    bufferSlice.Span.Slice(TotalPoolTokens * ExprToken.Size));
+
+                try
+                {
+                    span.Clear();
+
+                    var offset = 0;
+                    var instrBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                    var tuplePoolBuf = span.Slice(offset, MaxTuplePool); offset += MaxTuplePool;
+                    var tokensBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                    var opsStackBuf = span.Slice(offset, MaxInstructions); offset += MaxInstructions;
+                    var runtimePoolBuf = span.Slice(offset, MaxRuntimePool); offset += MaxRuntimePool;
+                    var extractedFields = span.Slice(offset, MaxSelectors); offset += MaxSelectors;
+                    var stackBuf = span.Slice(offset, StackCapacity);
+
+                    var instrCount = ExprCompiler.TryCompile(filter, instrBuf, tuplePoolBuf, tokensBuf, opsStackBuf, out var tupleCount, out _);
+                    if (instrCount < 0)
+                    {
+                        outputDistances.Length = 0;
+                        filterBitmap.Length = 0;
+                        outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                        return VectorManagerResult.OK;
+                    }
 
-            // Indicate requested # of matches
-            outputDistances.Length = retrieveCount * sizeof(float);
+                    var selectorCount = GetSelectorRanges(instrBuf[..instrCount], instrCount, filter, selectorBuf);
 
-            // If we're fairly sure the ids won't fit, go ahead and grab more memory now
-            //
-            // If we're still wrong, we'll end up using continuation callbacks which have more overhead
-            if (retrieveCount * MinimumSpacePerId > outputIds.Length)
-            {
-                if (!outputIds.IsSpanByte)
+                    fixed (byte* filterPtr = filter)
+                    fixed (ExprToken* instrPtr = instrBuf, tuplePtr = tuplePoolBuf, runtimePtr = runtimePoolBuf, fieldsPtr = extractedFields, stackPtr = stackBuf)
+                    fixed ((int, int)* selPtr = selectorBuf)
+                    {
+                        t_inlineFilterState = new InlineFilterState
+                        {
+                            Context = context,
+                            InstrCount = instrCount,
+                            TupleCount = tupleCount,
+                            SelectorCount = selectorCount,
+                            InstrBufPtr = instrPtr,
+                            TuplePoolBufPtr = tuplePtr,
+                            RuntimePoolBufPtr = runtimePtr,
+                            ExtractedFieldsPtr = fieldsPtr,
+                            StackBufPtr = stackPtr,
+                            SelectorRangesPtr = selPtr,
+                            FilterBytesPtr = filterPtr,
+                            FilterBytesLen = filter.Length,
+                        };
+
+                        var found = Service.SearchElement(
+                            context,
+                            indexPtr,
+                            element,
+                            delta,
+                            effectiveEF,
+                            filter,
+                            maxFilteringEffort,
+                            outputIds,
+                            outputDistances,
+                            out var continuation
+                        );
+
+                        if (found < 0)
+                        {
+                            logger?.LogWarning("Error indicating response from vector service {found}", found);
+                            outputIdFormat = VectorIdFormat.Invalid;
+                            return VectorManagerResult.BadParams;
+                        }
+
+                        if (includeAttributes)
+                        {
+                            FetchVectorElementAttributes(context, found, outputIds, ref outputAttributes);
+                        }
+
+                        if (continuation != 0)
+                        {
+                            throw new NotImplementedException();
+                        }
+
+                        outputDistances.Length = sizeof(float) * found;
+                        filterBitmap.Length = 0;
+
+                        outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                        if (quantType == VectorQuantType.XPreQ8)
+                        {
+                            outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                        }
+
+                        return VectorManagerResult.OK;
+                    }
+                }
+                finally
                 {
-                    outputIds.Memory.Dispose();
+                    ActiveThreadSession.scratchBufferBuilder.RewindScratchBuffer(ref bufferSlice);
                 }
-
-                outputIds = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(retrieveCount * MinimumSpacePerId), retrieveCount * MinimumSpacePerId);
             }
+            else
+            {
+                // ── Unfiltered search path (unchanged) ───────────────────
+                var retrieveCount = count;
 
-            var found =
-                Service.SearchElement(
-                    context,
-                    indexPtr,
-                    element,
-                    delta,
-                    effectiveEF,
-                    filter,
-                    maxFilteringEffort,
-                    outputIds,
-                    outputDistances,
-                    out var continuation
-                );
+                // Make sure enough space in distances for requested count
+                if (retrieveCount * sizeof(float) > outputDistances.Length)
+                {
+                    if (!outputDistances.IsSpanByte)
+                    {
+                        outputDistances.Memory.Dispose();
+                    }
 
-            if (found < 0)
-            {
-                logger?.LogWarning("Error indicating response from vector service {found}", found);
-                outputIdFormat = VectorIdFormat.Invalid;
-                return VectorManagerResult.BadParams;
-            }
+                    outputDistances = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(retrieveCount * sizeof(float)), retrieveCount * sizeof(float));
+                }
 
-            if (includeAttributes || !filter.IsEmpty)
-            {
-                FetchVectorElementAttributes(context, found, outputIds, ref outputAttributes);
-            }
+                // Indicate requested # of matches
+                outputDistances.Length = retrieveCount * sizeof(float);
 
-            // Apply post-filtering if filter is specified
-            if (!filter.IsEmpty)
-            {
-                // Ensure bitmap is large enough for the over-retrieved result set
-                var requiredBitmapBytes = (found + 7) >> 3;
-                if (requiredBitmapBytes > filterBitmap.Length)
+                // If we're fairly sure the ids won't fit, go ahead and grab more memory now
+                //
+                // If we're still wrong, we'll end up using continuation callbacks which have more overhead
+                if (retrieveCount * MinimumSpacePerId > outputIds.Length)
                 {
-                    if (!filterBitmap.IsSpanByte)
+                    if (!outputIds.IsSpanByte)
                     {
-                        filterBitmap.Memory.Dispose();
+                        outputIds.Memory.Dispose();
                     }
 
-                    filterBitmap = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(requiredBitmapBytes), requiredBitmapBytes);
+                    outputIds = new SpanByteAndMemory(MemoryPool<byte>.Shared.Rent(retrieveCount * MinimumSpacePerId), retrieveCount * MinimumSpacePerId);
                 }
 
-                ApplyPostFilter(filter, found, outputAttributes.AsReadOnlySpan(), filterBitmap.AsSpan(), ActiveThreadSession.scratchBufferBuilder);
-            }
+                var found =
+                    Service.SearchElement(
+                        context,
+                        indexPtr,
+                        element,
+                        delta,
+                        effectiveEF,
+                        filter,
+                        0,
+                        outputIds,
+                        outputDistances,
+                        out var continuation
+                    );
+
+                if (found < 0)
+                {
+                    logger?.LogWarning("Error indicating response from vector service {found}", found);
+                    outputIdFormat = VectorIdFormat.Invalid;
+                    return VectorManagerResult.BadParams;
+                }
 
-            if (continuation != 0)
-            {
-                // TODO: paged results!
-                throw new NotImplementedException();
-            }
+                if (includeAttributes)
+                {
+                    FetchVectorElementAttributes(context, found, outputIds, ref outputAttributes);
+                }
 
-            outputDistances.Length = sizeof(float) * found;
+                if (continuation != 0)
+                {
+                    // TODO: paged results!
+                    throw new NotImplementedException();
+                }
 
-            // Default assumption is length prefixed
-            outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                outputDistances.Length = sizeof(float) * found;
 
-            if (quantType == VectorQuantType.XPreQ8)
-            {
-                // But in this special case, we force them to be 4-byte ids
-                //outputIdFormat = VectorIdFormat.FixedI32;
+                // Default assumption is length prefixed
                 outputIdFormat = VectorIdFormat.I32LengthPrefixed;
-            }
 
-            return VectorManagerResult.OK;
+                if (quantType == VectorQuantType.XPreQ8)
+                {
+                    // But in this special case, we force them to be 4-byte ids
+                    //outputIdFormat = VectorIdFormat.FixedI32;
+                    outputIdFormat = VectorIdFormat.I32LengthPrefixed;
+                }
+
+                return VectorManagerResult.OK;
+            }
         }
 
         /// <summary>
diff --git a/website/docs/dev/filtered-search-design.md b/website/docs/dev/filtered-search-design.md
new file mode 100644
index 00000000000..a77b49eeac1
--- /dev/null
+++ b/website/docs/dev/filtered-search-design.md
@@ -0,0 +1,417 @@
+# Filtered Vector Search — End-to-End Design Document
+
+## 1. Motivation
+
+Garnet's vector search (`VSIM` command family) supports similarity search over DiskANN graph indexes. Users frequently need to combine similarity search with metadata filtering (e.g., "find the 10 nearest images where `year > 2020 AND genre IN ['action', 'comedy']`").
+
+### Problem with post-filtering
+
+The naive approach — fetch K results, then discard non-matching ones — suffers from two issues:
+
+1. **Overfetch waste**: To return K filtered results, you must fetch K×(1/selectivity) candidates. At 1% selectivity, that's 100× overfetch.
+2. **Recall loss**: Even with overfetch, the final result set may contain fewer than K results or miss closer matches that were pruned before the filter was applied.
+
+### Solution: Inline filtering
+
+Evaluate the filter predicate *during* graph traversal so that non-matching candidates never occupy result slots. This eliminates overfetch and improves recall for selective filters. This requires changes on both the Garnet side (attribute storage design) and the DiskANN library side (search algorithm).
+
+---
+
+## 2. Garnet-Side: Attribute Storage Design for Inline Filtering (Current Change)
+
+### Existing Attribute Store
+
+The existing Garnet attribute store was designed for general-purpose access — attributes are stored as **raw JSON keyed by external (user-facing) ID**. This is the natural choice for a key-value store: the user inserts a vector with key `"doc:42"` and attributes `{"year": 2021, "genre": "action"}`, so the attributes are stored under that same key. This store serves RESP command operations (e.g., `VGETATTR`) and remains unchanged.
+
+However, this store creates a mismatch with how DiskANN's graph traversal operates during inline filtering. DiskANN works entirely in **internal ID space** — every candidate is a `uint32` internal ID. To evaluate a filter using only the existing store, the callback must:
+
+1. **Read `ExternalIdMap[internal_id]`** → translate the internal ID to the external key (one Garnet store read)
+2. **Read `Attributes[external_key]`** → fetch the raw JSON payload (second Garnet store read)
+3. **Parse JSON at query time** → `ExtractFields()` runs a JSON tokenizer to locate and parse the fields referenced by the filter expression
+
+With inline filtering, this callback runs on **every candidate the graph traversal considers** (potentially thousands per query). The two store reads and JSON parsing per candidate become the dominant cost on the hot path.
+
+### Solution: Add a second attribute store optimized for query-time filter evaluation
+
+The current change **adds a new attribute store** alongside the existing one. The two stores serve different purposes:
+
+| Store | Keyed by | Format | Purpose |
+|-------|----------|--------|---------|
+| Existing | External ID (user key) | Raw JSON | RESP command operations (`VGETATTR`, `VSETATTR`, etc.) |
+| **New** | Internal ID (DiskANN ID) | Binary | Inline filter evaluation at query time |
+
+The existing external ID keyed JSON store is untouched — it continues to serve all RESP command operations. The new internal ID keyed binary store is a **write-time derived projection** of the same data, optimized purely for the inline filter callback's access pattern.
+
+### Why key by internal ID
+
+DiskANN hands the callback an internal ID; the existing attribute store expects an external key. Bridging this gap requires reading the `ExternalIdMap` — a store read that exists purely because of the keying mismatch. By adding a store keyed by internal ID, the filter callback can look up attributes directly without any ID translation. This eliminates the `ExternalIdMap` read entirely — one fewer store read per candidate.
+
+### Why store in binary format
+
+Raw JSON forces parsing on every candidate at query time. Extracting a numeric field like `.year` requires scanning for the key, skipping whitespace, and parsing a number string into a double. This work is repeated identically for every candidate, every query. The JSON structure does not change between queries — this is wasted work.
+
+The binary store **shifts the cost of JSON parsing from query time to ingestion time:**
+
+- **At ingestion** (vector insert/update): JSON is parsed once and converted to binary via `ConvertJsonToBinary()`. The binary format is `[0xFF marker][field count][per-field: name_len, name, type_tag, value_len, value_bytes]`, with numbers pre-converted to 8-byte LE f64. This is a one-time cost, written to the new store alongside the existing JSON store.
+- **At query time** (per-candidate): `ExtractFieldsBinary()` performs a direct scan over length-prefixed fields. No JSON tokenizer. Field names compared as raw byte spans. Numbers read directly as f64 — no string parsing. ~10× faster than JSON extraction.
+
+Since each vector is inserted once but may be evaluated as a candidate across thousands of queries, this tradeoff — pay more at write, pay less at read — is the correct one for a read-heavy similarity search workload.
+
+### Per-candidate callback comparison
+
+```
+Without binary attribute store (2 store reads + JSON parse per candidate):
+  1. Read ExternalIdMap[internal_id] → external key       ← ID translation
+  2. Read Attributes[external_key] → JSON bytes           ← existing JSON store
+  3. ExtractFields(json, selectors) → field values         ← JSON parse at query time
+  4. ExprRunner.Run(program) → bool
+
+With binary attribute store (1 store read + binary scan per candidate):
+  1. Read BinaryAttributes[internal_id] → binary bytes     ← new store, direct lookup
+  2. ExtractFieldsBinary(binary, selectors) → field values ← pre-parsed, ~10× faster
+  3. ExprRunner.Run(program) → bool
+```
+
+### Summary of inline filter per-candidate cost
+
+| Aspect | Only external ID keyed JSON attribute store | Current change (internal ID keyed binary attribute) | Further optimization (co-locate binary attribute with vector data) |
+|--------|---------------------------------------------|---------------------------------------|----------------------------------------------|
+| Store reads per candidate | 2 (ExternalIdMap + Attributes) | 1 (Attributes only) | 0 (already accessible during traversal) |
+| ID translation | Required (internal → external) | Eliminated (keyed by internal ID) | Eliminated |
+| Field extraction | JSON parse at query time | Binary scan (~10× faster) | Binary scan (~10× faster) |
+| Parse cost paid at | Query time (per candidate, per query) | Ingestion time (once per insert) | Ingestion time (once per insert) |
+| Total per-candidate overhead | 2 reads + JSON parse + eval | 1 read + binary scan + eval | Binary scan + eval |
+
+### Further optimization: Co-locate attributes with vector data
+
+The current change still requires one Garnet store read per candidate to fetch the binary attributes by internal ID. A further optimization is to **co-locate the binary attribute payload directly after the vector data** in the same Garnet record.
+
+During graph traversal, DiskANN already accesses the vector record for each candidate to compute distances. If the binary attributes are stored as trailing bytes in the same record, the callback can read them from the data DiskANN already has a reference to — no additional store read required.
+
+```
+Current change (1 store read per candidate):
+  1. Read Attributes[internal_id] → binary bytes           ← still a separate read
+  2. ExtractFieldsBinary(binary, selectors) → field values
+  3. ExprRunner.Run(program) → bool
+
+Co-located (0 extra store reads per candidate):
+  1. Read trailing bytes from vector record[internal_id]   ← already accessible during traversal
+  2. ExtractFieldsBinary(binary, selectors) → field values
+  3. ExprRunner.Run(program) → bool
+```
+
+This would reduce the per-candidate cost to **zero extra store reads** — the only remaining overhead is the binary field scan and expression evaluation.
+
+### Further with attibute index: Pre-built attribute index to replace per-candidate filter evaluation
+
+If an attribute index is available (e.g., inverted indexes or roaring bitmaps built over attribute values), the filter predicate can be evaluated **at query planning time** rather than per-candidate during graph traversal. The index would produce a pre-computed set of matching internal IDs (e.g., a bitmap), which can be fed directly into DiskANN as a `GarnetFilter::Bitmap`. This replaces the per-candidate FFI callback entirely — DiskANN checks the bitmap with a single bit lookup instead of reading attributes and running the expression evaluator.
+
+This would shift the filter cost from O(candidates_visited) callback invocations to a single O(matching_vectors) bitmap construction at query start, eliminating per-candidate attribute reads and expression evaluation altogether.
+
+---
+
+## 3. DiskANN-Side: Filtered Search Algorithms
+
+The DiskANN library provides multiple search algorithms for filtered queries. All receive a filter predicate via the `QueryLabelProvider` trait and differ in how they integrate filtering into graph traversal.
+
+### 3.1 Comparison of DiskANN Filtered Search Algorithms
+
+| Aspect | MultihopSearch | BetaFilter | TwoQueueSearch |
+|--------|---------------|------------|----------------|
+| Filter integration | Evaluate during standard single-queue search | Scale distances by beta factor for non-matching nodes | Separate explore queue (unfiltered) and result queue (filtered only) |
+| Data structures | `NeighborPriorityQueue` (sorted array) | Wraps any search strategy | `candidates` min-heap + `filtered_results` max-heap |
+| Exploration breadth at low selectivity | Limited — non-matching nodes occupy result slots | Moderate — non-matching nodes appear farther but still compete | Broad — all neighbors enter explore queue regardless of filter |
+| Convergence | Standard greedy convergence | Standard greedy convergence | Converges only when closest unexplored candidate is farther than worst *filtered* result |
+| Adaptive budget | No | No | Yes — doubles hop budget when fewer than K results found |
+
+#### Performance Comparison (TBD)
+
+Benchmark results on the 100K YFCC dataset comparing recall and latency across MultihopSearch, BetaFilter, and TwoQueueSearch at various selectivity levels are pending.
+
+### 3.2 TwoQueueSearch Algorithm (Current Choice)
+
+**File**: `DiskANN/diskann/src/graph/search/two_queue_search.rs`
+
+#### Data Structures
+
+| Queue | Type | Purpose |
+|-------|------|---------|
+| `candidates` | `BinaryHeap<Reverse<Neighbor>>` (min-heap) | Exploration frontier — all neighbors regardless of filter |
+| `filtered_results` | `BinaryHeap<Neighbor>` (max-heap) | Result accumulator — only filter-passing neighbors |
+
+#### Algorithm
+
+```
+Initialize: insert start_point into candidates and visited set
+
+while candidates is not empty AND hops < max_candidates:
+    Pop up to beam_width closest candidates
+
+    Convergence check:
+        if |filtered_results| >= result_cap
+        AND closest_candidate.distance > worst_filtered_result.distance:
+            → Converged, stop
+
+    For each popped candidate:
+        Expand neighbors via graph adjacency
+        For each neighbor not yet visited:
+            Compute distance to query
+            if |filtered_results| < result_cap OR distance < worst_filtered.distance:
+                Insert into candidates
+
+            Call filter_provider.on_visit(neighbor):
+                Accept → insert into filtered_results
+                Reject → skip
+                Terminate → abort immediately
+
+    Prune filtered_results to result_cap (= k × RESULT_SIZE_FACTOR)
+
+    Adaptive budget: if |filtered_results| < k after budget exhausted:
+        Double budget to 2 × max_candidates
+
+Return filtered_results sorted by distance, truncated to k
+```
+
+#### Key Parameters
+
+| Parameter | Source | Description |
+|-----------|--------|-------------|
+| `beam_width` | `search_l` (ef) | Number of candidates to expand per iteration |
+| `max_candidates` | `max(ef, maxFilteringEffort)` | Hop budget before stopping |
+| `result_cap` | `k × RESULT_SIZE_FACTOR` | Max size of filtered_results before pruning |
+| `RESULT_SIZE_FACTOR` | Constant | Overallocation factor for result queue |
+
+#### Termination Modes
+
+- **Exhausted**: candidates queue empty
+- **MaxCandidates**: hop budget reached
+- **Converged**: closest unexplored candidate is farther than worst result
+- **FilterTerminated**: filter callback returned `Terminate`
+
+#### Why TwoQueueSearch over MultihopSearch
+
+The key advantage of TwoQueueSearch is the **separation of exploration from result collection**. In MultihopSearch, non-matching candidates occupy slots in the single priority queue, limiting how far the search can explore. At low selectivity (e.g., 1% match rate), the queue fills with non-matching nodes and the search converges prematurely, missing closer matches that lie further in the graph.
+
+TwoQueueSearch solves this by maintaining two separate heaps: all neighbors enter the explore queue (keeping exploration broad), but only matching neighbors enter the result queue. The convergence check compares against the worst *filtered* result, not the worst candidate overall. This allows the search to keep exploring through non-matching regions of the graph until it finds enough filtered results.
+
+### 3.3 Filter Mode Dispatch (Rust)
+
+**File**: `DiskANN/diskann-garnet/src/labels.rs`, `dyn_index.rs`
+
+```rust
+enum GarnetFilter {
+    Bitmap(GarnetQueryLabelProvider, f32),  // pre-computed bitmap + beta factor
+    Callback(GarnetFilterProvider, u32),    // per-candidate FFI callback + max_effort
+    None,
+}
+```
+
+| Filter Mode | Search Algorithm | When Used |
+|-------------|-----------------|-----------|
+| `None` | Standard greedy KNN | No filter specified |
+| `Bitmap` | BetaFilter (scale distances) | Pre-computed bitmap available (future/alternative path) |
+| `Callback` | **TwoQueueSearch** | Filter expression provided in VSIM command |
+
+The `Callback` variant creates a `GarnetFilterProvider` that wraps the FFI callback. The `TwoQueueSearch` calls `on_visit()` which invokes the callback for each candidate.
+
+---
+
+## 4. Architecture Overview
+
+```
+┌──────────────────────────────────────────────────────┐
+│  Client (RESP)                                       │
+│  VSIM key 10 VALUES vec... FILTER ".year > 2020"     │
+│         MAXFILTERINGEFFORT 2000                       │
+└──────────┬───────────────────────────────────────────┘
+           │
+           ▼
+┌──────────────────────────────────────────────────────┐
+│  Garnet Server (C#)                                  │
+│                                                      │
+│  VectorManager.ValueSimilarity()                     │
+│    ├─ ExprCompiler.TryCompile(filter) → postfix pgm  │
+│    ├─ Pin scratch buffers, set t_inlineFilterState   │
+│    └─ DiskANNService.SearchVector(                   │
+│         ..., filterData, filterLen, maxFilterEffort)  │
+└──────────┬───────────────────────────────────────────┘
+           │  P/Invoke (FFI)
+           ▼
+┌──────────────────────────────────────────────────────┐
+│  DiskANN (Rust, diskann-garnet)                      │
+│                                                      │
+│  search_vector()                                     │
+│    ├─ GarnetFilter::Callback → TwoQueueSearch        │
+│    │    ├─ candidates: min-heap (explore)             │
+│    │    └─ filtered_results: max-heap (results)       │
+│    │                                                  │
+│    │  For each candidate node:                        │
+│    │    ├─ Insert into candidates (unfiltered)        │
+│    │    ├─ Call filterCallback(ctx, internal_id)──┐   │
+│    │    │                    ┌────────────────────┘   │
+│    │    │                    ▼                        │
+│    │    │  ┌─────────────────────────────────────┐    │
+│    │    │  │ C# InlineFilterCandidateCallback     │    │
+│    │    │  │  ├─ Read BinaryAttrs[internal_id]   │    │
+│    │    │  │  ├─ ExtractFieldsBinary(selectors)   │    │
+│    │    │  │  └─ ExprRunner.Run(program)→0/1      │    │
+│    │    │  └─────────────────────────────────────┘    │
+│    │    │                                             │
+│    │    └─ If pass: insert into filtered_results      │
+│    └─ Return top-K from filtered_results              │
+└──────────────────────────────────────────────────────┘
+```
+
+---
+
+## 5. Filter Compilation (C#)
+
+**File**: `libs/server/Resp/Vector/VectorManager.Filter.cs`
+
+### Expression Language
+
+Supports boolean expressions over JSON attributes:
+
+```
+.year > 2020 AND .genre IN ["action", "comedy"] AND NOT .archived
+```
+
+Operators: `=`, `!=`, `<`, `<=`, `>`, `>=`, `IN`, `NOT IN`, `AND`, `OR`, `NOT`
+
+### Compilation Pipeline
+
+1. **Tokenize** — extract field selectors (`.field`), operators, literals
+2. **Shunting-yard** — convert infix to postfix via `ExprCompiler.TryCompile`
+3. **Output** — array of `ExprToken` (instruction stream) + selector ranges (unique field names referenced)
+
+### Zero-Allocation Design
+
+All compilation and evaluation buffers come from a session-local `ScratchBufferBuilder` with a fixed ~9 KB layout:
+
+| Buffer | Size | Purpose |
+|--------|------|---------|
+| `instrBuf` | 2048 B | Compiled instructions |
+| `tuplePoolBuf` | 2048 B | Tuple literal storage |
+| `tokensBuf` | 1024 B | Tokenizer workspace |
+| `opsStackBuf` | 512 B | Shunting-yard operator stack |
+| `runtimePoolBuf` | 1024 B | IN-operator array expansion |
+| `extractedFields` | 1024 B | Field extraction output |
+| `stackBuf` | 1024 B | Expression evaluation stack |
+
+No heap allocations occur during filter compilation or evaluation.
+
+---
+
+## 6. FFI Callback Protocol
+
+### Registration
+
+At index creation (`CreateIndex` / `RecreateIndex`), C# passes `InlineFilterCallbackPtr` to Rust:
+
+```csharp
+delegate* unmanaged[Cdecl]<ulong, uint, byte> InlineFilterCallbackPtr
+    = &InlineFilterCandidateCallbackImpl;
+```
+
+Rust stores this in its `Callbacks` struct alongside read/write/delete callbacks.
+
+### Per-Search Setup (C# side)
+
+Before each FFI search call:
+
+1. Compile filter expression
+2. Pin all scratch buffers
+3. Populate `[ThreadStatic] t_inlineFilterState` with pointers to:
+   - Compiled instructions
+   - Tuple pool
+   - Selector ranges
+   - Filter bytes
+   - Garnet storage context
+4. Call `Service.SearchVector(...)` with `filter_data`, `filter_len`, `max_filtering_effort`
+
+### Per-Candidate Callback (Rust → C#)
+
+```
+Rust calls: filterCallback(context: u64, internal_id: u32) → u8
+                                                            └─ 1 = pass, 0 = reject
+
+C# InlineFilterCandidateCallbackImpl:
+  1. Read BinaryAttributes[internal_id] → binary bytes (via ReadSizeUnknown)
+  2. ExtractFieldsBinary(binary, selectors) → field values
+  3. ExprRunner.Run(instructions, fields) → bool
+  4. Return 1 or 0
+```
+
+### Thread Safety
+
+- DiskANN search is single-threaded per query
+- `[ThreadStatic]` state ensures no cross-query interference
+- `ActiveThreadSession` is set before FFI and cleared on lock release
+
+---
+
+## 7. Attribute Extraction
+
+**File**: `libs/server/Resp/Vector/AttributeExtractor.cs`
+
+Two storage formats are supported:
+
+### JSON Format
+
+Default format for the existing external ID keyed store. Attributes stored as raw JSON (e.g., `{"year": 2021, "genre": "action"}`). `ExtractFields()` performs a single-pass scan, matching field names against selectors and parsing values into `ExprToken`.
+
+### Binary Format
+
+Used by the new internal ID keyed store. Pre-extracted binary layout: `[0xFF marker][field count][per-field: name_len, name, type_tag, value_len, value_bytes]`. Numbers stored as 8-byte LE f64. `ExtractFieldsBinary()` is ~10× faster than JSON extraction. Conversion via `ConvertJsonToBinary()`.
+
+Both paths are zero-allocation, operating on `ReadOnlySpan<byte>`.
+
+---
+
+## 8. End-to-End Data Flow
+
+```
+1. VSIM command parsed → filter bytes + maxFilteringEffort extracted
+
+2. VectorManager.ValueSimilarity()
+   ├─ filter non-empty → inline filtered path
+   ├─ ExprCompiler.TryCompile(filter) → postfix program
+   ├─ Pin buffers, populate t_inlineFilterState
+   └─ DiskANNService.SearchVector(query, k, ef, filterData, filterLen, maxEffort)
+
+3. P/Invoke → Rust search_vector()
+   ├─ Detect GarnetFilter::Callback
+   ├─ Create TwoQueueSearch with GarnetFilterProvider
+   └─ Run two-queue algorithm:
+       For each candidate:
+         ├─ Compute distance
+         ├─ Insert into candidates min-heap
+         ├─ FFI callback → C# evaluates filter → accept/reject
+         └─ If accepted → insert into filtered_results max-heap
+
+4. Return top-K internal IDs + distances (only matching candidates)
+
+5. Back in C# VectorManager:
+   ├─ Map internal IDs → external keys via ExternalIdMap
+   ├─ Optionally fetch attributes for results
+   └─ Serialize RESP response to client
+```
+
+---
+
+## 9. Performance Characteristics
+
+### Compared to Post-Filtering
+
+| Aspect | Post-Filter | Two-Queue Inline |
+|--------|-------------|------------------|
+| Overfetch required | Yes (K/selectivity) | No |
+| Recall at low selectivity | Poor (misses nearby matches) | High (explores broadly) |
+| Per-candidate cost | Distance only | Distance + FFI callback + attribute read + filter eval |
+| Memory | Large result buffers | Fixed-size heaps |
+
+### Tuning
+
+- **`maxFilteringEffort`** — Controls the hop budget. Higher values improve recall for selective filters at the cost of latency. Recommended: 2-10× the `ef` (search_l) parameter.
+- **`RESULT_SIZE_FACTOR`** — Overallocates the result queue to improve result quality during pruning.
+- **Adaptive budget doubling** — When fewer than K results are found within the initial budget, the algorithm automatically doubles exploration depth.