diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
new file mode 100644
index 000000000..74ff0bb58
--- /dev/null
+++ b/.github/workflows/python.yml
@@ -0,0 +1,38 @@
+name: Python
+
+on: [push, pull_request]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
+      - name: Prepare
+        run: bash scripts/install_dependency.sh
+
+      - name: Build main libCacheSim project
+        run: |
+          cmake -G Ninja -B build
+          ninja -C build
+
+      - name: Install Python dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pytest
+
+      - name: Build libCacheSim-python
+        run: |
+          cd libCacheSim-python
+          pip install -e .
+
+      - name: Run tests
+        run: |
+          cd libCacheSim-python
+          pytest tests/
diff --git a/.gitignore b/.gitignore
index 9913f147a..620e8536b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,5 @@ sftp-config.json
 # Clangd cache
 *.cache/
 .lint-logs/
+# Python wheels
+*.whl
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 89513c28f..a2623b470 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -247,6 +247,8 @@ else()
     message(STATUS "Building without test")
 endif()
 
+# Export variables for scikit-build -> build/export_vars.cmake
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export)
 
 # libCacheSim unified library compilation and installation
 # Create a single library that combines all modular libraries
diff --git a/libCacheSim-python/.gitignore b/libCacheSim-python/.gitignore
new file mode 100644
index 000000000..34712f29d
--- /dev/null
+++ b/libCacheSim-python/.gitignore
@@ -0,0 +1,59 @@
+# Automatically generated by `hgimportsvn`
+.svn
+.hgsvn
+
+# Ignore local virtualenvs
+lib/
+bin/
+include/
+.Python/
+
+# These lines are suggested according to the svn:ignore property
+# Feel free to enable them by uncommenting them
+*.pyc
+*.pyo
+*.swp
+*.class
+*.orig
+*~
+.hypothesis/
+
+# autogenerated
+src/_pytest/_version.py
+# setuptools
+.eggs/
+
+doc/*/_build
+doc/*/.doctrees
+build/
+dist/
+*.egg-info
+htmlcov/
+issue/
+env/
+.env/
+.venv/
+/pythonenv*/
+3rdparty/
+.tox
+.cache
+.pytest_cache
+.mypy_cache
+.coverage
+.coverage.*
+coverage.xml
+.ropeproject
+.idea
+.hypothesis
+.pydevproject
+.project
+.settings
+.vscode
+__pycache__/
+.python-version
+
+# generated by pip
+pip-wheel-metadata/
+
+# pytest debug logs generated via --debug
+pytestdebug.log
\ No newline at end of file
diff --git a/libCacheSim-python/CMakeLists.txt b/libCacheSim-python/CMakeLists.txt
new file mode 100644
index 000000000..f3e1c5d6b
--- /dev/null
+++ b/libCacheSim-python/CMakeLists.txt
@@ -0,0 +1,103 @@
+cmake_minimum_required(VERSION 3.15...3.27)
+
+# Include exported variables from cache
+if(DEFINED LIBCB_BUILD_DIR)
+    set(PARENT_BUILD_DIR "${LIBCB_BUILD_DIR}")
+    message(STATUS "Using provided LIBCB_BUILD_DIR: ${LIBCB_BUILD_DIR}")
+else()
+    set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build")
+endif()
+set(EXPORT_FILE "${PARENT_BUILD_DIR}/export_vars.cmake")
+
+if(EXISTS "${EXPORT_FILE}")
+    include("${EXPORT_FILE}")
+    message(STATUS "Loaded variables from export_vars.cmake")
+else()
+    message(FATAL_ERROR "export_vars.cmake not found at ${EXPORT_FILE}. Please build the main project first (e.g. cd .. && cmake -G Ninja -B build)")
+endif()
+
+# Force enable -fPIC
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+
+project(libCacheSim-python VERSION "${LIBCACHESIM_VERSION}")
+
+if(LOG_LEVEL_LOWER STREQUAL "default")
+    if(CMAKE_BUILD_TYPE_LOWER MATCHES "debug")
+        add_compile_definitions(LOGLEVEL=6)
+    else()
+        add_compile_definitions(LOGLEVEL=7)
+    endif()
+elseif(LOG_LEVEL_LOWER STREQUAL "verbose")
+    add_compile_definitions(LOGLEVEL=5)
+elseif(LOG_LEVEL_LOWER STREQUAL "debug")
+    add_compile_definitions(LOGLEVEL=6)
+elseif(LOG_LEVEL_LOWER STREQUAL "info")
+    add_compile_definitions(LOGLEVEL=7)
+elseif(LOG_LEVEL_LOWER STREQUAL "warn")
+    add_compile_definitions(LOGLEVEL=8)
+elseif(LOG_LEVEL_LOWER STREQUAL "error")
+    add_compile_definitions(LOGLEVEL=9)
+else()
+    add_compile_definitions(LOGLEVEL=7)
+endif()
+
+# Find python and pybind11
+find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
+find_package(pybind11 CONFIG REQUIRED)
+
+# Include directories for dependencies
+include_directories(${GLib_INCLUDE_DIRS})
+include_directories(${GLib_CONFIG_INCLUDE_DIR})
+include_directories(${XGBOOST_INCLUDE_DIR})
+include_directories(${LIGHTGBM_PATH})
+include_directories(${ZSTD_INCLUDE_DIR})
+include_directories(${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin)
+
+# Find the main libCacheSim library
+set(MAIN_PROJECT_BUILD_DIR "${PARENT_BUILD_DIR}")
+set(MAIN_PROJECT_LIB_PATH "${MAIN_PROJECT_BUILD_DIR}/liblibCacheSim.a")
+
+if(EXISTS "${MAIN_PROJECT_LIB_PATH}")
+    message(STATUS "Found pre-built libCacheSim library at ${MAIN_PROJECT_LIB_PATH}")
+
+    # Import the main library as an imported target
+    add_library(libCacheSim_main STATIC IMPORTED)
+    set_target_properties(libCacheSim_main PROPERTIES
+        IMPORTED_LOCATION "${MAIN_PROJECT_LIB_PATH}"
+        INTERFACE_INCLUDE_DIRECTORIES "${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/utils/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim"
+    )
+
+    # Link dependencies that the main library needs
+    target_link_libraries(libCacheSim_main INTERFACE ${dependency_libs})
+    set(LIBCACHESIM_TARGET libCacheSim_main)
+
+else()
+    message(FATAL_ERROR "Pre-built libCacheSim library not found. Please build the main project first: cd .. && cmake -G Ninja -B build && ninja -C build")
+endif()
+
+python_add_library(_libcachesim MODULE
+    src/pylibcachesim.cpp
+    ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/cli_reader_utils.c
+    WITH_SOABI
+)
+
+set_target_properties(_libcachesim PROPERTIES
+    POSITION_INDEPENDENT_CODE ON
+    INSTALL_RPATH_USE_LINK_PATH TRUE
+    BUILD_WITH_INSTALL_RPATH TRUE
+    INSTALL_RPATH "$ORIGIN"
+)
+
+target_compile_definitions(_libcachesim PRIVATE VERSION_INFO=${PROJECT_VERSION})
+
+target_link_libraries(_libcachesim PRIVATE
+    ${LIBCACHESIM_TARGET}
+    pybind11::headers
+    pybind11::module
+    -Wl,--no-as-needed -ldl
+)
+
+# install to wheel directory
+install(TARGETS _libcachesim LIBRARY DESTINATION libcachesim)
diff --git a/libCacheSim-python/MAINFEST.in b/libCacheSim-python/MAINFEST.in
new file mode 100644
index 000000000..e69de29bb
diff --git a/libCacheSim-python/README.md b/libCacheSim-python/README.md
new file mode 100644
index 000000000..db46af6a1
--- /dev/null
+++ b/libCacheSim-python/README.md
@@ -0,0 +1,522 @@
+# libCacheSim Python Binding
+
+Python bindings for libCacheSim, a high-performance cache simulator and analysis library.
+
+## Installation
+
+### Quick Install (Recommended)
+```bash
+# From the libCacheSim root directory
+bash scripts/install_python.sh
+```
+
+### Manual Install
+```bash
+# Build the main libCacheSim library first
+cmake -G Ninja -B build
+ninja -C build
+
+# Install Python binding
+cd libCacheSim-python
+pip install -e . -v
+```
+
+### Testing
+```bash
+# Run all tests
+python -m pytest .
+
+# Test import
+python -c "import libcachesim; print('Success!')"
+```
+
+## Quick Start
+
+### Basic Usage
+
+```python
+import libcachesim as lcs
+
+# Create a cache
+cache = lcs.LRU(cache_size=1024*1024)  # 1MB cache
+
+# Process requests
+req = lcs.Request()
+req.obj_id = 1
+req.obj_size = 100
+
+hit = cache.get(req)  # False (first access)
+hit = cache.get(req)  # True (second access)
+
+# Check statistics
+print(f"Hit rate: {(cache.n_req - cache.n_miss)/cache.n_req:.2%}")
+```
+
+### Trace Processing
+
+```python
+import libcachesim as lcs
+
+# Open trace and process efficiently
+reader = lcs.open_trace("trace.bin", lcs.TraceType.ORACLE_GENERAL_TRACE.value)
+cache = lcs.S3FIFO(cache_size=1024*1024)
+
+# Process entire trace efficiently (C++ backend)
+miss_ratio = cache.process_trace(reader)
+print(f"Miss ratio: {miss_ratio:.4f}")
+
+# Process with limits and time ranges
+miss_ratio = cache.process_trace(
+    reader,
+    max_req=10000,      # Process max 10K requests
+    max_sec=3600,       # Process max 1 hour
+    start_time=1000,    # Start from timestamp 1000
+    end_time=5000       # End at timestamp 5000
+)
+```
+
+## Custom Cache Policies
+
+Implement custom cache replacement algorithms using pure Python functions - no C/C++ compilation required.
+
+### Python Hook Cache Overview
+
+The `PythonHookCachePolicy` allows you to define custom caching behavior through Python callback functions. This is perfect for:
+- Prototyping new cache algorithms
+- Educational purposes and learning
+- Research and experimentation
+- Custom business logic implementation
+
+### Hook Functions
+
+You need to implement these callback functions:
+
+- **`init_hook(cache_size: int) -> Any`**: Initialize your data structure
+- **`hit_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache hits
+- **`miss_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache misses
+- **`eviction_hook(data: Any, obj_id: int, obj_size: int) -> int`**: Return object ID to evict
+- **`remove_hook(data: Any, obj_id: int) -> None`**: Clean up when object removed
+- **`free_hook(data: Any) -> None`**: [Optional] Final cleanup
+
+### Example: Custom LRU Implementation
+
+```python
+import libcachesim as lcs
+from collections import OrderedDict
+
+# Create a Python hook-based cache
+cache = lcs.PythonHookCachePolicy(cache_size=1024*1024, cache_name="MyLRU")
+
+# Define LRU policy hooks
+def init_hook(cache_size):
+    return OrderedDict()  # Track access order
+
+def hit_hook(lru_dict, obj_id, obj_size):
+    lru_dict.move_to_end(obj_id)  # Move to most recent
+
+def miss_hook(lru_dict, obj_id, obj_size):
+    lru_dict[obj_id] = True  # Add to end
+
+def eviction_hook(lru_dict, obj_id, obj_size):
+    return next(iter(lru_dict))  # Return least recent
+
+def remove_hook(lru_dict, obj_id):
+    lru_dict.pop(obj_id, None)
+
+# Set the hooks
+cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+
+# Use it like any other cache
+req = lcs.Request()
+req.obj_id = 1
+req.obj_size = 100
+hit = cache.get(req)
+```
+
+### Example: Custom FIFO Implementation
+
+```python
+import libcachesim as lcs
+from collections import deque
+
+# Create a custom FIFO cache
+cache = lcs.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO")
+
+def init_hook(cache_size):
+    return deque()  # Use deque for FIFO order
+
+def hit_hook(fifo_queue, obj_id, obj_size):
+    pass  # FIFO doesn't reorder on hit
+
+def miss_hook(fifo_queue, obj_id, obj_size):
+    fifo_queue.append(obj_id)  # Add to end of queue
+
+def eviction_hook(fifo_queue, obj_id, obj_size):
+    return fifo_queue[0]  # Return first item (oldest)
+
+def remove_hook(fifo_queue, obj_id):
+    if fifo_queue and fifo_queue[0] == obj_id:
+        fifo_queue.popleft()
+
+# Set the hooks and test
+cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+
+req = lcs.Request()
+req.obj_id = 1
+req.obj_size = 100
+hit = cache.get(req)
+print(f"Cache hit: {hit}")  # Should be False (miss)
+```
+
+## Available Algorithms
+
+### Built-in Cache Algorithms
+
+#### Basic Algorithms
+- **FIFO**: First-In-First-Out
+- **LRU**: Least Recently Used
+- **LFU**: Least Frequently Used
+- **Clock**: Clock/Second-chance algorithm
+
+#### Advanced Algorithms
+- **S3FIFO**: Simple, Fast, Fair FIFO (recommended for most workloads)
+- **Sieve**: High-performance eviction algorithm
+- **ARC**: Adaptive Replacement Cache
+- **TwoQ**: Two-Queue algorithm
+- **TinyLFU**: TinyLFU with window
+- **SLRU**: Segmented LRU
+
+#### Research/ML Algorithms
+- **LRB**: Learning-based cache (if enabled)
+- **GLCache**: Machine learning-based cache
+- **ThreeLCache**: Three-level cache hierarchy (if enabled)
+
+```python
+import libcachesim as lcs
+
+# All algorithms use the same unified interface
+cache_size = 1024 * 1024  # 1MB
+
+lru_cache = lcs.LRU(cache_size)
+s3fifo_cache = lcs.S3FIFO(cache_size)      # Recommended
+sieve_cache = lcs.Sieve(cache_size)
+arc_cache = lcs.ARC(cache_size)
+
+# All caches work identically
+req = lcs.Request()
+req.obj_id = 1
+req.obj_size = 100
+hit = lru_cache.get(req)
+```
+
+## Examples and Testing
+
+### Algorithm Comparison
+```python
+import libcachesim as lcs
+
+def compare_algorithms(trace_path):
+    reader = lcs.open_trace(trace_path, lcs.TraceType.VSCSI_TRACE.value)
+    algorithms = ['LRU', 'S3FIFO', 'Sieve', 'ARC']
+
+    print("Algorithm\tMiss Ratio")
+    print("-" * 25)
+    for algo_name in algorithms:
+        cache = getattr(lcs, algo_name)(cache_size=1024*1024)
+        miss_ratio = cache.process_trace(reader)
+        print(f"{algo_name}\t\t{miss_ratio:.4f}")
+
+compare_algorithms("workload.vscsi")
+```
+
+### Performance Benchmarking
+```python
+import time
+
+def benchmark_cache(cache, num_requests=100000):
+    """Benchmark cache performance"""
+    start_time = time.time()
+
+    for i in range(num_requests):
+        req = lcs.Request()
+        req.obj_id = i % 1000  # Working set of 1000 objects
+        req.obj_size = 100
+        cache.get(req)
+
+    end_time = time.time()
+    throughput = num_requests / (end_time - start_time)
+
+    print(f"Processed {num_requests} requests in {end_time - start_time:.2f}s")
+    print(f"Throughput: {throughput:.0f} requests/sec")
+    print(f"Miss ratio: {cache.n_miss / cache.n_req:.4f}")
+
+# Compare performance
+lru_cache = lcs.LRU(cache_size=1024*1024)
+s3fifo_cache = lcs.S3FIFO(cache_size=1024*1024)
+
+print("LRU Performance:")
+benchmark_cache(lru_cache)
+
+print("\nS3-FIFO Performance:")
+benchmark_cache(s3fifo_cache)
+```
+
+### Validate Custom Implementation
+```python
+def test_custom_vs_builtin():
+    """Test custom cache against built-in implementation"""
+    cache_size = 1024
+
+    # Your custom LRU implementation
+    custom_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU")
+    # ... set up your LRU hooks here ...
+
+    # Built-in LRU for comparison
+    builtin_cache = lcs.LRU(cache_size)
+
+    # Test with same request sequence
+    test_requests = [(1, 100), (2, 100), (3, 100), (1, 100)]
+
+    for obj_id, obj_size in test_requests:
+        req1 = lcs.Request()
+        req1.obj_id = obj_id
+        req1.obj_size = obj_size
+
+        req2 = lcs.Request()
+        req2.obj_id = obj_id
+        req2.obj_size = obj_size
+
+        custom_result = custom_cache.get(req1)
+        builtin_result = builtin_cache.get(req2)
+
+        assert custom_result == builtin_result, f"Mismatch at obj_id {obj_id}"
+        print(f"obj_id {obj_id}: {'HIT' if custom_result else 'MISS'} ✓")
+```
+
+## Advanced Usage
+
+### Multi-Format Trace Processing
+
+```python
+import libcachesim as lcs
+
+# Supported trace types
+trace_types = {
+    "oracle": lcs.TraceType.ORACLE_GENERAL_TRACE.value,
+    "csv": lcs.TraceType.CSV_TRACE.value,
+    "vscsi": lcs.TraceType.VSCSI_TRACE.value,
+    "txt": lcs.TraceType.TXT_TRACE.value
+}
+
+# Open different trace formats
+oracle_reader = lcs.open_trace("trace.bin", trace_types["oracle"])
+csv_reader = lcs.open_trace("trace.csv", trace_types["csv"],
+                           "time-col=1,obj-id-col=2,obj-size-col=3,delimiter=,")
+
+# Process traces with different caches
+caches = [
+    lcs.LRU(cache_size=1024*1024),
+    lcs.S3FIFO(cache_size=1024*1024),
+    lcs.Sieve(cache_size=1024*1024)
+]
+
+for i, cache in enumerate(caches):
+    miss_ratio = cache.process_trace(oracle_reader)
+    print(f"Cache {i} miss ratio: {miss_ratio:.4f}")
+```
+
+### Cache Hierarchy Simulation
+
+```python
+def simulate_cache_hierarchy():
+    """Simulate a two-level cache hierarchy"""
+
+    # L1 cache (small, fast)
+    l1_cache = lcs.LRU(cache_size=64*1024)  # 64KB
+
+    # L2 cache (larger, slower)
+    l2_cache = lcs.LRU(cache_size=1024*1024)  # 1MB
+
+    # Simulate requests
+    total_requests = 0
+    l1_hits = 0
+    l2_hits = 0
+
+    for obj_id in range(1000):
+        req = lcs.Request()
+        req.obj_id = obj_id % 100  # Working set of 100 objects
+        req.obj_size = 1024
+
+        total_requests += 1
+
+        # Check L1 first
+        if l1_cache.get(req):
+            l1_hits += 1
+        # Check L2 on L1 miss
+        elif l2_cache.get(req):
+            l2_hits += 1
+            # Promote to L1
+            l1_cache.get(req)
+
+    print(f"L1 hit rate: {l1_hits/total_requests:.2%}")
+    print(f"L2 hit rate: {l2_hits/total_requests:.2%}")
+    print(f"Overall hit rate: {(l1_hits+l2_hits)/total_requests:.2%}")
+
+simulate_cache_hierarchy()
+```
+
+### Cache Statistics Monitoring
+
+```python
+def analyze_cache_behavior():
+    """Detailed cache statistics analysis"""
+    cache = lcs.S3FIFO(cache_size=1024*1024)
+
+    # Process some requests
+    for i in range(1000):
+        req = lcs.Request()
+        req.obj_id = i % 100
+        req.obj_size = 1024
+        cache.get(req)
+
+    # Access detailed statistics
+    print("=== Cache Statistics ===")
+    print(f"Cache size: {cache.cache_size:,} bytes")
+    print(f"Occupied space: {cache.occupied_byte:,} bytes")
+    print(f"Utilization: {cache.occupied_byte/cache.cache_size:.2%}")
+    print(f"Objects stored: {cache.n_obj:,}")
+    print(f"Total requests: {cache.n_req:,}")
+    print(f"Cache hits: {cache.n_req - cache.n_miss:,}")
+    print(f"Cache misses: {cache.n_miss:,}")
+    print(f"Hit rate: {(cache.n_req - cache.n_miss)/cache.n_req:.2%}")
+    print(f"Miss rate: {cache.n_miss/cache.n_req:.2%}")
+
+analyze_cache_behavior()
+```
+
+## API Reference
+
+### Unified Cache Interface
+
+All cache policies (built-in and Python hook-based) share the same interface:
+
+```python
+import libcachesim as lcs
+
+# All cache policies work the same way
+cache = lcs.LRU(cache_size=1024*1024)
+# or
+cache = lcs.PythonHookCachePolicy(cache_size=1024*1024, cache_name="Custom")
+
+# Unified methods for all caches:
+req = lcs.Request()
+req.obj_id = 123        # Object identifier (required)
+req.obj_size = 1024     # Object size in bytes (required)
+req.timestamp = 1000    # Request timestamp (optional)
+req.op = 1              # Operation type (optional, default=1)
+
+hit = cache.get(req)    # Process single request - returns True if hit, False if miss
+
+# Batch processing (faster for large traces)
+reader = lcs.open_trace("trace.bin", lcs.TraceType.ORACLE_GENERAL_TRACE.value)
+miss_ratio = cache.process_trace(reader, max_req=10000)
+
+# Unified properties for all caches:
+print(f"Cache size: {cache.cache_size}")
+print(f"Objects: {cache.n_obj}")
+print(f"Occupied bytes: {cache.occupied_byte}")
+print(f"Total requests: {cache.n_req}")
+print(f"Cache misses: {cache.n_miss}")
+print(f"Hit rate: {(cache.n_req - cache.n_miss) / cache.n_req:.2%}")
+```
+
+### Trace Reader
+
+```python
+# Open trace with specific format
+reader = lcs.open_trace(
+    trace_path="trace.csv",
+    trace_type=lcs.TraceType.CSV_TRACE.value,
+    trace_type_params="time-col=1,obj-id-col=2,obj-size-col=3,delimiter=,"
+)
+
+# Process trace with options
+miss_ratio = cache.process_trace(
+    reader,
+    max_req=10000,      # Process max requests
+    max_sec=3600,       # Process max seconds of trace
+    start_time=1000,    # Start from timestamp
+    end_time=5000       # End at timestamp
+)
+```
+
+### Supported Trace Formats
+```python
+# Oracle format (binary, fastest)
+reader = lcs.open_trace("trace.bin", lcs.TraceType.ORACLE_GENERAL_TRACE.value)
+
+# CSV format with custom parameters
+reader = lcs.open_trace("trace.csv", lcs.TraceType.CSV_TRACE.value,
+                       "time-col=1,obj-id-col=2,obj-size-col=3,delimiter=,")
+
+# VSCSI format
+reader = lcs.open_trace("trace.vscsi", lcs.TraceType.VSCSI_TRACE.value)
+
+# Plain text format
+reader = lcs.open_trace("trace.txt", lcs.TraceType.TXT_TRACE.value)
+```
+
+### Python Hook Cache Reference
+
+When implementing `PythonHookCachePolicy`, provide these hook functions:
+
+```python
+def init_hook(cache_size: int) -> Any:
+    """Initialize and return plugin data structure"""
+    return {}  # Can be any Python object
+
+def hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None:
+    """Handle cache hits - update your data structure"""
+    pass
+
+def miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None:
+    """Handle cache misses - add object to your data structure"""
+    pass
+
+def eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int:
+    """Return object ID to evict when cache is full"""
+    return victim_obj_id
+
+def remove_hook(plugin_data: Any, obj_id: int) -> None:
+    """Clean up when object is removed from cache"""
+    pass
+
+def free_hook(plugin_data: Any) -> None:
+    """[Optional] Final cleanup when cache is destroyed"""
+    pass
+
+# Set hooks
+cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook)
+```
+
+## Troubleshooting
+
+### Common Issues
+
+**Import Error**: Make sure libCacheSim C++ library is built first:
+```bash
+cmake -G Ninja -B build && ninja -C build
+```
+
+**Performance Issues**: Use `process_trace()` for large workloads instead of individual `get()` calls for better performance.
+
+**Memory Usage**: Monitor cache statistics (`cache.occupied_byte`) and ensure proper cache size limits for your system.
+
+**Custom Cache Issues**: Validate your custom implementation against built-in algorithms using the test functions above.
+
+### Getting Help
+
+- Check the [main documentation](/doc/) for detailed guides
+- Run tests: `python -m pytest libCacheSim-python/`
+- Open issues on [GitHub](https://github.com/1a1a11a/libCacheSim/issues)
+- Review [examples](/example) in the main repository
diff --git a/libCacheSim-python/examples/demo_unified_interface.py b/libCacheSim-python/examples/demo_unified_interface.py
new file mode 100644
index 000000000..c51c3e344
--- /dev/null
+++ b/libCacheSim-python/examples/demo_unified_interface.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Demo script showing the unified interface for all cache policies.
+This demonstrates how to use both native and Python hook-based caches
+with the same API for seamless algorithm comparison and switching.
+"""
+
+import sys
+import os
+
+# Add parent directory for development testing
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+try:
+    import libcachesim as lcs
+except ImportError as e:
+    print(f"Error importing libcachesim: {e}")
+    print("Make sure the Python binding is built and installed")
+    sys.exit(1)
+
+from collections import OrderedDict
+
+
+def create_trace_reader():
+    """Helper function to create a trace reader."""
+    data_file = os.path.join(
+        os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
+        "data",
+        "cloudPhysicsIO.oracleGeneral.bin"
+    )
+    if not os.path.exists(data_file):
+        print(f"Warning: Trace file not found at {data_file}")
+        return None
+    return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE.value)
+
+
+def create_demo_lru_hooks():
+    """Create demo LRU hooks for Python-based cache policy."""
+
+    def init_hook(cache_size):
+        print(f"  Initializing custom LRU with {cache_size} bytes")
+        return OrderedDict()
+
+    def hit_hook(lru_dict, obj_id, obj_size):
+        if obj_id in lru_dict:
+            lru_dict.move_to_end(obj_id)
+
+    def miss_hook(lru_dict, obj_id, obj_size):
+        lru_dict[obj_id] = obj_size
+
+    def eviction_hook(lru_dict, obj_id, obj_size):
+        if lru_dict:
+            return next(iter(lru_dict))
+        return obj_id
+
+    def remove_hook(lru_dict, obj_id):
+        lru_dict.pop(obj_id, None)
+
+    return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
+
+
+def demo_unified_interface():
+    """Demonstrate the unified interface across different cache policies."""
+    print("libCacheSim Python Binding - Unified Interface Demo")
+    print("=" * 60)
+
+    cache_size = 1024 * 1024  # 1MB
+
+    # Create different cache policies
+    caches = {
+        "LRU": lcs.LRU(cache_size),
+        "FIFO": lcs.FIFO(cache_size),
+        "ARC": lcs.ARC(cache_size),
+    }
+
+    # Create Python hook-based LRU
+    python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU")
+    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_demo_lru_hooks()
+    python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+    caches["Custom Python LRU"] = python_cache
+
+    print(f"Testing {len(caches)} different cache policies with unified interface:")
+
+    # Demo 1: Single request interface
+    print("1. Single Request Interface:")
+    print("   All caches use: cache.get(request)")
+
+    test_req = lcs.Request()
+    test_req.obj_id = 1
+    test_req.obj_size = 1024
+
+    for name, cache in caches.items():
+        result = cache.get(test_req)
+        print(f"   {name:20s}: {'HIT' if result else 'MISS'}")
+
+    # Demo 2: Unified properties interface
+    print("\n2. Unified Properties Interface:")
+    print("   All caches provide: cache_size, n_obj, occupied_byte, n_req")
+
+    for name, cache in caches.items():
+        print(f"   {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, "
+              f"bytes={cache.occupied_byte}, reqs={cache.n_req}")
+
+    # Demo 3: Efficient trace processing
+    print("\n3. Efficient Trace Processing Interface:")
+    print("   All caches use: cache.process_trace(reader, max_req=N)")
+
+    max_requests = 1000
+
+    for name, cache in caches.items():
+        # Create fresh reader for each cache
+        reader = create_trace_reader()
+        if not reader:
+            print(f"   {name:20s}: trace file not available")
+            continue
+
+        miss_ratio = cache.process_trace(reader, max_req=max_requests)
+        print(f"   {name:20s}: miss_ratio={miss_ratio:.4f}")
+
+    print("\nKey Benefits of Unified Interface:")
+    print("   • Same API for all cache policies (built-in + custom)")
+    print("   • Easy to switch between different algorithms")
+    print("   • Efficient trace processing in C++ (no Python overhead)")
+    print("   • Consistent properties and statistics")
+    print("   • Type-safe and well-documented")
+
+    print("\nDemo completed! All cache policies work with the same interface.")
+
+
+if __name__ == "__main__":
+    demo_unified_interface()
diff --git a/libCacheSim-python/examples/python_hook_cache_example.py b/libCacheSim-python/examples/python_hook_cache_example.py
new file mode 100644
index 000000000..daef56a73
--- /dev/null
+++ b/libCacheSim-python/examples/python_hook_cache_example.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+"""
+Example demonstrating how to create custom cache policies using Python hooks.
+
+This example shows how to implement LRU and FIFO cache policies using the
+PythonHookCachePolicy class, which allows users to define cache behavior using
+pure Python functions instead of C/C++ plugins.
+"""
+
+import libcachesim as lcs
+from collections import OrderedDict, deque
+
+
+class LRUPolicy:
+    """LRU (Least Recently Used) cache policy implementation."""
+
+    def __init__(self, cache_size):
+        self.cache_size = cache_size
+        self.access_order = OrderedDict()  # obj_id -> True (for ordering)
+
+    def on_hit(self, obj_id, obj_size):
+        """Move accessed object to end (most recent)."""
+        if obj_id in self.access_order:
+            # Move to end (most recent)
+            self.access_order.move_to_end(obj_id)
+
+    def on_miss(self, obj_id, obj_size):
+        """Add new object to end (most recent)."""
+        self.access_order[obj_id] = True
+
+    def evict(self, obj_id, obj_size):
+        """Return the least recently used object ID."""
+        if self.access_order:
+            # Return first item (least recent)
+            victim_id = next(iter(self.access_order))
+            return victim_id
+        raise RuntimeError("No objects to evict")
+
+    def on_remove(self, obj_id):
+        """Remove object from tracking."""
+        self.access_order.pop(obj_id, None)
+
+
+class FIFOPolicy:
+    """FIFO (First In First Out) cache policy implementation."""
+
+    def __init__(self, cache_size):
+        self.cache_size = cache_size
+        self.insertion_order = deque()  # obj_id queue
+
+    def on_hit(self, obj_id, obj_size):
+        """FIFO doesn't change order on hits."""
+        pass
+
+    def on_miss(self, obj_id, obj_size):
+        """Add new object to end of queue."""
+        self.insertion_order.append(obj_id)
+
+    def evict(self, obj_id, obj_size):
+        """Return the first inserted object ID."""
+        if self.insertion_order:
+            victim_id = self.insertion_order.popleft()
+            return victim_id
+        raise RuntimeError("No objects to evict")
+
+    def on_remove(self, obj_id):
+        """Remove object from tracking."""
+        try:
+            self.insertion_order.remove(obj_id)
+        except ValueError:
+            pass  # Object not in queue
+
+
+def create_lru_cache(cache_size):
+    """Create an LRU cache using Python hooks."""
+    cache = lcs.PythonHookCachePolicy(cache_size, "PythonLRU")
+
+    def init_hook(cache_size):
+        return LRUPolicy(cache_size)
+
+    def hit_hook(policy, obj_id, obj_size):
+        policy.on_hit(obj_id, obj_size)
+
+    def miss_hook(policy, obj_id, obj_size):
+        policy.on_miss(obj_id, obj_size)
+
+    def eviction_hook(policy, obj_id, obj_size):
+        return policy.evict(obj_id, obj_size)
+
+    def remove_hook(policy, obj_id):
+        policy.on_remove(obj_id)
+
+    def free_hook(policy):
+        # Python garbage collection handles cleanup
+        pass
+
+    cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook)
+    return cache
+
+
+def create_fifo_cache(cache_size):
+    """Create a FIFO cache using Python hooks."""
+    cache = lcs.PythonHookCachePolicy(cache_size, "PythonFIFO")
+
+    def init_hook(cache_size):
+        return FIFOPolicy(cache_size)
+
+    def hit_hook(policy, obj_id, obj_size):
+        policy.on_hit(obj_id, obj_size)
+
+    def miss_hook(policy, obj_id, obj_size):
+        policy.on_miss(obj_id, obj_size)
+
+    def eviction_hook(policy, obj_id, obj_size):
+        return policy.evict(obj_id, obj_size)
+
+    def remove_hook(policy, obj_id):
+        policy.on_remove(obj_id)
+
+    cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+    return cache
+
+
+def test_cache_policy(cache, name):
+    """Test a cache policy with sample requests."""
+    print(f"\n=== Testing {name} Cache ===")
+
+    # Test requests: obj_id, obj_size
+    test_requests = [
+        (1, 100), (2, 100), (3, 100), (4, 100), (5, 100),  # Fill cache
+        (1, 100),  # Hit
+        (6, 100),  # Miss, should evict something
+        (2, 100),  # Hit or miss depending on policy
+        (7, 100),  # Miss, should evict something
+    ]
+
+    hits = 0
+    misses = 0
+
+    for obj_id, obj_size in test_requests:
+        req = lcs.Request()
+        req.obj_id = obj_id
+        req.obj_size = obj_size
+
+        hit = cache.get(req)
+        if hit:
+            hits += 1
+            print(f"Request {obj_id}: HIT")
+        else:
+            misses += 1
+            print(f"Request {obj_id}: MISS")
+
+    print(f"Total: {hits} hits, {misses} misses")
+    print(f"Cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes occupied")
+
+
+def main():
+    """Main example function."""
+    cache_size = 400  # Bytes (can hold 4 objects of size 100 each)
+
+    # Test LRU cache
+    lru_cache = create_lru_cache(cache_size)
+    test_cache_policy(lru_cache, "LRU")
+
+    # Test FIFO cache
+    fifo_cache = create_fifo_cache(cache_size)
+    test_cache_policy(fifo_cache, "FIFO")
+
+    print("\n=== Comparison ===")
+    print("LRU keeps recently accessed items, evicting least recently used")
+    print("FIFO keeps items in insertion order, evicting oldest inserted")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libCacheSim-python/export/CMakeLists.txt b/libCacheSim-python/export/CMakeLists.txt
new file mode 100644
index 000000000..06a3566ac
--- /dev/null
+++ b/libCacheSim-python/export/CMakeLists.txt
@@ -0,0 +1,33 @@
+# Helper functions are removed since we don't export source files anymore
+
+set(EXPORT_FILE "${CMAKE_BINARY_DIR}/export_vars.cmake")
+file(WRITE "${EXPORT_FILE}" "")
+
+get_filename_component(MAIN_PROJECT_SOURCE_DIR ${CMAKE_SOURCE_DIR} ABSOLUTE)
+file(WRITE ${CMAKE_BINARY_DIR}/export_vars.cmake "set(MAIN_PROJECT_SOURCE_DIR \"${MAIN_PROJECT_SOURCE_DIR}\")\n")
+file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(dependency_libs \"${dependency_libs}\")\n")
+file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(LIBCACHESIM_VERSION \"${LIBCACHESIM_VERSION}\")\n")
+
+# ==============================================================================
+# Export project metadata
+# ==============================================================================
+file(APPEND "${EXPORT_FILE}" "set(LIBCACHESIM_VERSION \"${${PROJECT_NAME}_VERSION}\")\n")
+
+# ==============================================================================
+# Export essential include directory variables
+# ==============================================================================
+foreach(var IN ITEMS GLib_INCLUDE_DIRS GLib_CONFIG_INCLUDE_DIR XGBOOST_INCLUDE_DIR LIGHTGBM_PATH ZSTD_INCLUDE_DIR)
+    file(APPEND "${EXPORT_FILE}" "set(${var} \"${${var}}\")\n")
+endforeach()
+
+# ==============================================================================
+# Export dependency library variables
+# ==============================================================================
+file(APPEND "${EXPORT_FILE}" "set(dependency_libs \"${dependency_libs}\")\n")
+
+# ==============================================================================
+# Export essential build option variables
+# ==============================================================================
+file(APPEND "${EXPORT_FILE}" "set(LOG_LEVEL_LOWER \"${LOG_LEVEL_LOWER}\")\n")
+
+message(STATUS "Exported essential variables to ${EXPORT_FILE}")
diff --git a/libCacheSim-python/export/README.md b/libCacheSim-python/export/README.md
new file mode 100644
index 000000000..976b1daa8
--- /dev/null
+++ b/libCacheSim-python/export/README.md
@@ -0,0 +1,47 @@
+# Python Binding Export System
+
+Build system bridge for sharing CMake variables between the main libCacheSim project and Python binding.
+
+## Purpose
+
+The `export/CMakeLists.txt` exports all necessary build variables (source files, include directories, compiler flags, etc.) from the main project to the Python binding, enabling consistent builds without duplicating configuration.
+
+## How It Works
+
+1. **Export**: Main project writes variables to `export_vars.cmake`
+2. **Import**: Python binding includes this file during CMake configuration
+3. **Build**: Python binding uses shared variables for consistent compilation
+
+## Key Exported Variables
+
+### Source Files
+- Cache algorithms, data structures, trace readers
+- Profilers, utilities, analyzers
+
+### Build Configuration
+- Include directories (main, GLib, ZSTD, XGBoost, LightGBM)
+- Compiler flags (C/C++)
+- Dependency libraries
+- Build options (hugepage, tests, optional features)
+
+## Usage
+
+**Main Project** (`CMakeLists.txt`):
+```cmake
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export)
+```
+
+**Python Binding** (`libCacheSim-python/CMakeLists.txt`):
+```cmake
+set(EXPORT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../build/export_vars.cmake")
+include("${EXPORT_FILE}")
+```
+
+## For Developers
+
+This system ensures the Python binding automatically picks up changes to:
+- New source files added to the main project
+- Updated compiler flags or dependencies
+- Modified build options
+
+No manual synchronization needed between main project and Python binding builds.
diff --git a/libCacheSim-python/libcachesim/__init__.py b/libCacheSim-python/libcachesim/__init__.py
new file mode 100644
index 000000000..5cac3c360
--- /dev/null
+++ b/libCacheSim-python/libcachesim/__init__.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from ._libcachesim import (
+    Cache,
+    Reader,
+    Request,
+    __doc__,
+    __version__,
+    open_trace,
+    process_trace,
+    process_trace_python_hook,
+)
+from .const import TraceType
+from .eviction import (
+    ARC,
+    FIFO,
+    LRB,
+    LRU,
+    S3FIFO,
+    Clock,
+    Sieve,
+    ThreeLCache,
+    TinyLFU,
+    TwoQ,
+    PythonHookCachePolicy,
+)
+
+__all__ = [
+    "ARC",
+    "FIFO",
+    "LRB",
+    "LRU",
+    "S3FIFO",
+    "Cache",
+    "Clock",
+    "Reader",
+    "Request",
+    "Sieve",
+    "ThreeLCache",
+    "TinyLFU",
+    "TraceType",
+    "TwoQ",
+    "PythonHookCachePolicy",
+    "__doc__",
+    "__version__",
+    "open_trace",
+    "process_trace",
+    "process_trace_python_hook",
+    # TODO(haocheng): add more eviction policies
+]
diff --git a/libCacheSim-python/libcachesim/__init__.pyi b/libCacheSim-python/libcachesim/__init__.pyi
new file mode 100644
index 000000000..4148ddc84
--- /dev/null
+++ b/libCacheSim-python/libcachesim/__init__.pyi
@@ -0,0 +1,139 @@
+"""
+libCacheSim Python bindings
+--------------------------
+
+.. currentmodule:: libcachesim
+
+.. autosummary::
+    :toctree: _generate
+
+    open_trace
+    ARC_init
+    Clock_init
+    FIFO_init
+    LRB_init
+    LRU_init
+    S3FIFO_init
+    Sieve_init
+    ThreeLCache_init
+    TinyLFU_init
+    TwoQ_init
+    Cache
+    Request
+    Reader
+    reader_init_param_t
+    TraceType
+"""
+
+from .const import TraceType
+
+def open_trace(
+    trace_path: str,
+    type: TraceType,
+    reader_init_param: dict | reader_init_param_t | None = None
+) -> Reader: ...
+
+
+def FIFO_init(cache_size: int) -> Cache:
+    """
+    Create a FIFO cache instance.
+    """
+
+
+def ARC_init(cache_size: int) -> Cache:
+    """
+    Create a ARC cache instance.
+    """
+
+
+def Clock_init(cache_size: int, n_bit_counter: int = 1, init_freq: int = 0) -> Cache:
+    """
+    Create a Clock cache instance.
+    """
+
+
+def LRB_init(cache_size: int, objective: str = "byte-miss-ratio") -> Cache:
+    """
+    Create a LRB cache instance.
+    """
+
+
+def LRU_init(cache_size: int) -> Cache:
+    """
+    Create a LRU cache instance.
+    """
+
+
+def S3FIFO_init(
+    cache_size: int,
+    fifo_size_ratio: float = 0.10,
+    ghost_size_ratio: float = 0.90,
+    move_to_main_threshold: int = 2
+) -> Cache:
+    """
+    Create a S3FIFO cache instance.
+    """
+
+
+def Sieve_init(cache_size: int) -> Cache:
+    """
+    Create a Sieve cache instance.
+    """
+
+
+def ThreeLCache_init(cache_size: int, objective: str = "byte-miss-ratio") -> Cache:
+    """
+    Create a ThreeLCache cache instance.
+    """
+
+
+def TinyLFU_init(
+    cache_size: int,
+    main_cache: str = "SLRU",
+    window_size: float = 0.01
+) -> Cache:
+    """
+    Create a TinyLFU cache instance.
+    """
+
+
+def TwoQ_init(
+    cache_size: int,
+    Ain_size_ratio: float = 0.25,
+    Aout_size_ratio: float = 0.5
+) -> Cache:
+    """
+    Create a TwoQ cache instance.
+    """
+
+class reader_init_param_t:
+    time_field: int
+    obj_id_field: int
+    obj_size_field: int
+    delimiter: str
+    has_header: bool
+
+
+class Cache:
+    n_req: int
+    n_obj: int
+    occupied_byte: int
+    cache_size: int
+    def get(self, req: Request) -> bool: ...
+
+
+class Request:
+    clock_time: int
+    hv: int
+    obj_id: int
+    obj_size: int
+
+
+class Reader:
+    n_read_req: int
+    n_total_req: int
+    trace_path: str
+    file_size: int
+    def get_wss(self, ignore_obj_size: bool = False) -> int: ...
+    def __iter__(self) -> Reader: ...
+    def __next__(self) -> Request: ...
diff --git a/libCacheSim-python/libcachesim/const.py b/libCacheSim-python/libcachesim/const.py
new file mode 100644
index 000000000..142f3cccb
--- /dev/null
+++ b/libCacheSim-python/libcachesim/const.py
@@ -0,0 +1,4 @@
+from __future__ import annotations
+
+# Import TraceType directly from the C++ binding to avoid duplication
+from ._libcachesim import TraceType
diff --git a/libCacheSim-python/libcachesim/eviction.py b/libCacheSim-python/libcachesim/eviction.py
new file mode 100644
index 000000000..fa1cfb836
--- /dev/null
+++ b/libCacheSim-python/libcachesim/eviction.py
@@ -0,0 +1,512 @@
+"""Registry of eviction policies."""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+from ._libcachesim import (
+    ARC_init,
+    Cache,
+    Clock_init,
+    FIFO_init,
+    LRB_init,
+    LRU_init,
+    Request,
+    S3FIFO_init,
+    Sieve_init,
+    ThreeLCache_init,
+    TinyLFU_init,
+    TwoQ_init,
+    PythonHookCache,
+)
+
+
+class EvictionPolicyBase(ABC):
+    """Abstract base class for all eviction policies."""
+    @abstractmethod
+    def get(self, req: Request) -> bool:
+        pass
+
+    @abstractmethod
+    def __repr__(self) -> str:
+        pass
+
+    @abstractmethod
+    def process_trace(self, reader, max_req: int = -1, max_sec: int = -1, start_time: int = -1, end_time: int = -1) -> float:
+        """Process a trace with this cache and return miss ratio.
+
+        This method processes trace data entirely on the C++ side to avoid
+        data movement overhead between Python and C++.
+
+        Args:
+            reader: The trace reader instance
+            max_req: Maximum number of requests to process (-1 for no limit)
+            max_sec: Maximum seconds to process (-1 for no limit)
+            start_time: Start time filter (-1 for no filter)
+            end_time: End time filter (-1 for no filter)
+
+        Returns:
+            float: Miss ratio (0.0 to 1.0)
+        """
+        pass
+
+
+class EvictionPolicy(EvictionPolicyBase):
+    """Base class for all eviction policies."""
+    def __init__(self, cache_size: int, **kwargs) -> None:
+        self.cache: Cache = self.init_cache(cache_size, **kwargs)
+
+    @abstractmethod
+    def init_cache(self, cache_size: int, **kwargs) -> Cache:
+        pass
+
+    def get(self, req: Request) -> bool:
+        return self.cache.get(req)
+
+    def process_trace(self, reader, max_req: int = -1, max_sec: int = -1, start_time: int = -1, end_time: int = -1) -> float:
+        """Process a trace with this cache and return miss ratio.
+
+        This method processes trace data entirely on the C++ side to avoid
+        data movement overhead between Python and C++.
+
+        Args:
+            reader: The trace reader instance
+            max_req: Maximum number of requests to process (-1 for no limit)
+            max_sec: Maximum seconds to process (-1 for no limit)
+            start_time: Start time filter (-1 for no filter)
+            end_time: End time filter (-1 for no filter)
+
+        Returns:
+            float: Miss ratio (0.0 to 1.0)
+
+        Example:
+            >>> cache = LRU(1024*1024)
+            >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE)
+            >>> miss_ratio = cache.process_trace(reader)
+            >>> print(f"Miss ratio: {miss_ratio:.4f}")
+        """
+        from ._libcachesim import process_trace
+        return process_trace(self.cache, reader, max_req, max_sec, start_time, end_time)
+
+    def __repr__(self):
+        return f"{self.__class__.__name__}(cache_size={self.cache.cache_size})"
+
+    @property
+    def n_req(self):
+        """Number of requests processed."""
+        return self.cache.n_req
+
+    @property
+    def n_obj(self):
+        """Number of objects currently in cache."""
+        return self.cache.n_obj
+
+    @property
+    def occupied_byte(self):
+        """Number of bytes currently occupied in cache."""
+        return self.cache.occupied_byte
+
+    @property
+    def cache_size(self):
+        """Total cache size in bytes."""
+        return self.cache.cache_size
+
+
+class FIFO(EvictionPolicy):
+    """First In First Out replacement policy.
+
+    Args:
+        cache_size: Size of the cache
+    """
+    def init_cache(self, cache_size: int, **kwargs) -> Cache:  # noqa: ARG002
+        return FIFO_init(cache_size)
+
+
+class Clock(EvictionPolicy):
+    """Clock (Second Chance or FIFO-Reinsertion) replacement policy.
+
+    Args:
+        cache_size: Size of the cache
+        n_bit_counter: Number of bits for counter (default: 1)
+        init_freq: Initial frequency value (default: 0)
+    """
+    def __init__(self, cache_size: int, n_bit_counter: int = 1, init_freq: int = 0):
+        super().__init__(cache_size, n_bit_counter=n_bit_counter, init_freq=init_freq)
+
+    def init_cache(self, cache_size: int, **kwargs):
+        init_freq = kwargs.get('init_freq', 0)
+        n_bit_counter = kwargs.get('n_bit_counter', 1)
+
+        if n_bit_counter < 1 or n_bit_counter > 32:
+            msg = "n_bit_counter must be between 1 and 32"
+            raise ValueError(msg)
+        if init_freq < 0 or init_freq > 2**n_bit_counter - 1:
+            msg = "init_freq must be between 0 and 2^n_bit_counter - 1"
+            raise ValueError(msg)
+
+        self.init_freq = init_freq
+        self.n_bit_counter = n_bit_counter
+
+        return Clock_init(cache_size, n_bit_counter, init_freq)
+
+    def __repr__(self):
+        return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
+                f"n_bit_counter={self.n_bit_counter}, "
+                f"init_freq={self.init_freq})")
+
+
+class TwoQ(EvictionPolicy):
+    """2Q replacement policy.
+
+    2Q has three queues: Ain, Aout, Am. When a obj hits in Aout, it will be
+    inserted into Am otherwise it will be inserted into Ain.
+
+    Args:
+        cache_size: Total size of the cache
+        ain_size_ratio: Size ratio for Ain queue (default: 0.25)
+        aout_size_ratio: Size ratio for Aout queue (default: 0.5)
+    """
+    def __init__(self, cache_size: int, ain_size_ratio: float = 0.25, aout_size_ratio: float = 0.5):
+        super().__init__(cache_size, ain_size_ratio=ain_size_ratio, aout_size_ratio=aout_size_ratio)
+
+    def init_cache(self, cache_size: int, **kwargs):
+        ain_size_ratio = kwargs.get('ain_size_ratio', 0.25)
+        aout_size_ratio = kwargs.get('aout_size_ratio', 0.5)
+
+        if ain_size_ratio <= 0 or aout_size_ratio <= 0:
+            msg = "ain_size_ratio and aout_size_ratio must be greater than 0"
+            raise ValueError(msg)
+
+        self.ain_size_ratio = ain_size_ratio
+        self.aout_size_ratio = aout_size_ratio
+
+        return TwoQ_init(cache_size, ain_size_ratio, aout_size_ratio)
+
+    def __repr__(self):
+        return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
+                f"ain_size_ratio={self.ain_size_ratio}, "
+                f"aout_size_ratio={self.aout_size_ratio})")
+
+
+class LRB(EvictionPolicy):
+    """LRB (Learning Relaxed Belady) replacement policy.
+
+    LRB is a learning-based replacement policy that uses a neural network to
+    predict the future access patterns of the cache, randomly select one obj
+    outside the Belady boundary to evict.
+
+    Args:
+        cache_size: Size of the cache
+        objective: Objective function to optimize (default: "byte-miss-ratio")
+    """
+    def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"):
+        super().__init__(cache_size, objective=objective)
+
+    def init_cache(self, cache_size: int, **kwargs) -> Cache:
+        objective = kwargs.get('objective', "byte-miss-ratio")
+
+        if objective not in ["byte-miss-ratio", "byte-hit-ratio"]:
+            msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'"
+            raise ValueError(msg)
+
+        self.objective = objective
+
+        return LRB_init(cache_size, objective)
+
+    def __repr__(self):
+        return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
+                f"objective={self.objective})")
+
+
+class LRU(EvictionPolicy):
+    """Least Recently Used replacement policy.
+
+    Args:
+        cache_size: Size of the cache
+    """
+    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
+        return LRU_init(cache_size)
+
+
+class ARC(EvictionPolicy):
+    """Adaptive Replacement Cache policy.
+
+    ARC is a two-tiered cache with two LRU caches (T1 and T2) and two ghost
+    lists (B1 and B2). T1 records the obj accessed only once, T2 records
+    the obj accessed more than once. ARC has an internal parameter `p` to
+    learn and dynamically control the size of T1 and T2.
+
+    Args:
+        cache_size: Size of the cache
+    """
+    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
+        return ARC_init(cache_size)
+
+
+class S3FIFO(EvictionPolicy):
+    """S3FIFO replacement policy.
+
+    S3FIFO consists of three FIFO queues: Small, Main, and Ghost. Small
+    queue gets the obj and records the freq.
+    When small queue is full, if the obj to evict satisfies the threshold,
+    it will be moved to main queue. Otherwise, it will be evicted from small
+    queue and inserted into ghost queue.
+    When main queue is full, the obj to evict will be evicted and reinserted
+    like Clock.
+    If obj hits in the ghost queue, it will be moved to main queue.
+
+    Args:
+        cache_size: Size of the cache
+        fifo_size_ratio: Size ratio for FIFO queue (default: 0.1)
+        ghost_size_ratio: Size ratio for ghost queue (default: 0.9)
+        move_to_main_threshold: Threshold for moving obj from ghost to main (default: 2)
+    """
+    def __init__(self, cache_size: int, fifo_size_ratio: float = 0.1,
+                 ghost_size_ratio: float = 0.9, move_to_main_threshold: int = 2):
+        super().__init__(cache_size, fifo_size_ratio=fifo_size_ratio,
+                         ghost_size_ratio=ghost_size_ratio,
+                         move_to_main_threshold=move_to_main_threshold)
+
+    def init_cache(self, cache_size: int, **kwargs):
+        fifo_size_ratio = kwargs.get('fifo_size_ratio', 0.1)
+        ghost_size_ratio = kwargs.get('ghost_size_ratio', 0.9)
+        move_to_main_threshold = kwargs.get('move_to_main_threshold', 2)
+
+        if fifo_size_ratio <= 0 or ghost_size_ratio <= 0:
+            msg = "fifo_size_ratio and ghost_size_ratio must be greater than 0"
+            raise ValueError(msg)
+        if move_to_main_threshold < 0:
+            msg = "move_to_main_threshold must be greater or equal to 0"
+            raise ValueError(msg)
+
+        self.fifo_size_ratio = fifo_size_ratio
+        self.ghost_size_ratio = ghost_size_ratio
+        self.move_to_main_threshold = move_to_main_threshold
+
+        return S3FIFO_init(cache_size, fifo_size_ratio, ghost_size_ratio, move_to_main_threshold)
+
+    def __repr__(self):
+        return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
+                f"fifo_size_ratio={self.fifo_size_ratio}, "
+                f"ghost_size_ratio={self.ghost_size_ratio}, "
+                f"move_to_main_threshold={self.move_to_main_threshold})")
+
+
+class Sieve(EvictionPolicy):
+    """Sieve replacement policy.
+
+    FIFO-Reinsertion with check pointer.
+
+    Args:
+        cache_size: Size of the cache
+    """
+    def init_cache(self, cache_size: int, **kwargs):  # noqa: ARG002
+        return Sieve_init(cache_size)
+
+
+class ThreeLCache(EvictionPolicy):
+    """3L-Cache replacement policy.
+
+    Args:
+        cache_size: Size of the cache
+        objective: Objective function to optimize (default: "byte-miss-ratio")
+    """
+    def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"):
+        super().__init__(cache_size, objective=objective)
+
+    def init_cache(self, cache_size: int, **kwargs):
+        objective = kwargs.get('objective', "byte-miss-ratio")
+
+        if objective not in ["byte-miss-ratio", "byte-hit-ratio"]:
+            msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'"
+            raise ValueError(msg)
+
+        self.objective = objective
+
+        return ThreeLCache_init(cache_size, objective)
+
+    def __repr__(self):
+        return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
+                f"objective={self.objective})")
+
+
+class TinyLFU(EvictionPolicy):
+    """TinyLFU replacement policy.
+
+    Args:
+        cache_size: Size of the cache
+        main_cache: Main cache to use (default: "SLRU")
+        window_size: Window size for TinyLFU (default: 0.01)
+    """
+    def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01):
+        super().__init__(cache_size, main_cache=main_cache, window_size=window_size)
+
+    def init_cache(self, cache_size: int, **kwargs):
+        main_cache = kwargs.get('main_cache', "SLRU")
+        window_size = kwargs.get('window_size', 0.01)
+
+        if window_size <= 0:
+            msg = "window_size must be greater than 0"
+            raise ValueError(msg)
+
+        self.main_cache = main_cache
+        self.window_size = window_size
+
+        return TinyLFU_init(cache_size, main_cache, window_size)
+
+    def __repr__(self):
+        return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, "
+                f"main_cache={self.main_cache}, "
+                f"window_size={self.window_size})")
+
+
+
+class PythonHookCachePolicy(EvictionPolicyBase):
+    """Python hook-based cache that allows defining custom policies using Python functions.
+
+    This cache implementation allows users to define custom cache replacement algorithms
+    using pure Python functions instead of compiling C/C++ plugins. Users provide hook
+    functions for cache initialization, hit handling, miss handling, eviction decisions,
+    and cleanup.
+
+    Args:
+        cache_size: Size of the cache in bytes
+        cache_name: Optional name for the cache (default: "PythonHookCache")
+
+    Hook Functions Required:
+        init_hook(cache_size: int) -> Any:
+            Initialize plugin data structures. Return any object to be passed to other hooks.
+
+        hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None:
+            Handle cache hit events. Update internal state as needed.
+
+        miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None:
+            Handle cache miss events. Update internal state for new object.
+
+        eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int:
+            Determine which object to evict. Return the object ID to be evicted.
+
+        remove_hook(plugin_data: Any, obj_id: int) -> None:
+            Clean up when objects are removed from cache.
+
+        free_hook(plugin_data: Any) -> None: [Optional]
+            Clean up plugin resources when cache is destroyed.
+
+    Example:
+        >>> from collections import OrderedDict
+        >>>
+        >>> cache = PythonHookCachePolicy(1024)
+        >>>
+        >>> def init_hook(cache_size):
+        ...     return OrderedDict()  # LRU tracking
+        >>>
+        >>> def hit_hook(lru_dict, obj_id, obj_size):
+        ...     lru_dict.move_to_end(obj_id)  # Move to end (most recent)
+        >>>
+        >>> def miss_hook(lru_dict, obj_id, obj_size):
+        ...     lru_dict[obj_id] = True  # Add to end
+        >>>
+        >>> def eviction_hook(lru_dict, obj_id, obj_size):
+        ...     return next(iter(lru_dict))  # Return least recent
+        >>>
+        >>> def remove_hook(lru_dict, obj_id):
+        ...     lru_dict.pop(obj_id, None)
+        >>>
+        >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+        >>>
+        >>> req = Request()
+        >>> req.obj_id = 1
+        >>> req.obj_size = 100
+        >>> hit = cache.get(req)
+    """
+    def __init__(self, cache_size: int, cache_name: str = "PythonHookCache"):
+        self._cache_size = cache_size
+        self.cache_name = cache_name
+        self.cache = PythonHookCache(cache_size, cache_name)
+        self._hooks_set = False
+
+    def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=None):
+        """Set the hook functions for the cache.
+
+        Args:
+            init_hook: Function called during cache initialization
+            hit_hook: Function called on cache hit
+            miss_hook: Function called on cache miss
+            eviction_hook: Function called to select eviction candidate
+            remove_hook: Function called when object is removed
+            free_hook: Optional function called during cache cleanup
+        """
+        self.cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook)
+        self._hooks_set = True
+
+    def get(self, req: Request) -> bool:
+        """Process a cache request.
+
+        Args:
+            req: The cache request to process
+
+        Returns:
+            True if cache hit, False if cache miss
+
+        Raises:
+            RuntimeError: If hooks have not been set
+        """
+        if not self._hooks_set:
+            raise RuntimeError("Hooks must be set before using the cache. Call set_hooks() first.")
+        return self.cache.get(req)
+
+    def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1):
+        """Process a trace with this cache and return miss ratio.
+
+        This method processes trace data entirely on the C++ side to avoid
+        data movement overhead between Python and C++.
+
+        Args:
+            reader: The trace reader instance
+            max_req: Maximum number of requests to process (-1 for no limit)
+            max_sec: Maximum seconds to process (-1 for no limit)
+            start_time: Start time filter (-1 for no filter)
+            end_time: End time filter (-1 for no filter)
+
+        Returns:
+            float: Miss ratio (0.0 to 1.0)
+
+        Raises:
+            RuntimeError: If hooks have not been set
+
+        Example:
+            >>> cache = PythonHookCachePolicy(1024*1024)
+            >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+            >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE)
+            >>> miss_ratio = cache.process_trace(reader)
+            >>> print(f"Miss ratio: {miss_ratio:.4f}")
+        """
+        if not self._hooks_set:
+            raise RuntimeError("Hooks must be set before processing trace. Call set_hooks() first.")
+
+        from ._libcachesim import process_trace_python_hook
+        return process_trace_python_hook(self.cache, reader, max_req, max_sec, start_time, end_time)
+
+    @property
+    def n_req(self):
+        """Number of requests processed."""
+        return self.cache.n_req
+
+    @property
+    def n_obj(self):
+        """Number of objects currently in cache."""
+        return self.cache.n_obj
+
+    @property
+    def occupied_byte(self):
+        """Number of bytes currently occupied in cache."""
+        return self.cache.occupied_byte
+
+    @property
+    def cache_size(self):
+        """Total cache size in bytes."""
+        return self.cache.cache_size
+
+    def __repr__(self):
+        return (f"{self.__class__.__name__}(cache_size={self._cache_size}, "
+                f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})")
diff --git a/libCacheSim-python/pyproject.toml b/libCacheSim-python/pyproject.toml
new file mode 100644
index 000000000..3bf6c66e8
--- /dev/null
+++ b/libCacheSim-python/pyproject.toml
@@ -0,0 +1,82 @@
+[build-system]
+requires = ["scikit-build-core>=0.10", "pybind11"]
+build-backend = "scikit_build_core.build"
+
+
+[project]
+name = "libcachesim"
+version = "0.3.1"
+description="Python bindings for libCacheSim"
+readme = "README.md"
+requires-python = ">=3.9"
+
+[project.optional-dependencies]
+test = ["pytest"]
+
+
+[tool.scikit-build]
+wheel.expand-macos-universal-tags = true
+
+[tool.pytest.ini_options]
+minversion = "8.0"
+addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
+xfail_strict = true
+log_cli_level = "INFO"
+filterwarnings = [
+  "error",
+  "ignore::pytest.PytestCacheWarning",
+]
+testpaths = ["tests"]
+
+
+[tool.cibuildwheel]
+build-frontend = "build"
+manylinux-x86_64-image = "quay.io/pypa/manylinux_2_34_x86_64"
+# Only build for x86_64 architectures
+build = "*-manylinux_x86_64"
+# Install build dependencies using dnf. This runs once per container.
+before-all = "dnf install -y ninja-build cmake libzstd-devel glib2-devel"
+# We add 'rm -rf build' to ensure a clean build inside the container.
+# The C++ core is built first, then the wheel build will use it.
+# Use absolute paths to avoid issues with the working directory.
+before-build = "rm -rf /project/build && cmake -S /project -B /project/build -G Ninja && cmake --build /project/build"
+# Set the environment variable for the wheel build step.
+environment = { LCS_BUILD_DIR = "/project/build" }
+# test-requires = "pytest"
+# test-command = "pytest {project}/tests"
+
+# [tool.cibuildwheel.pyodide]
+# build-frontend = {name = "build", args = ["--exports", "whole_archive"]}
+
+[tool.ruff.lint]
+extend-select = [
+  "B",           # flake8-bugbear
+  "I",           # isort
+  "ARG",         # flake8-unused-arguments
+  "C4",          # flake8-comprehensions
+  "EM",          # flake8-errmsg
+  "ICN",         # flake8-import-conventions
+  "G",           # flake8-logging-format
+  "PGH",         # pygrep-hooks
+  "PIE",         # flake8-pie
+  "PL",          # pylint
+  "PT",          # flake8-pytest-style
+  "PTH",         # flake8-use-pathlib
+  "RET",         # flake8-return
+  "RUF",         # Ruff-specific
+  "SIM",         # flake8-simplify
+  "T20",         # flake8-print
+  "UP",          # pyupgrade
+  "YTT",         # flake8-2020
+  "EXE",         # flake8-executable
+  "NPY",         # NumPy specific rules
+  "PD",          # pandas-vet
+]
+ignore = [
+  "PLR09",    # Too many X
+  "PLR2004",  # Magic comparison
+]
+isort.required-imports = ["from __future__ import annotations"]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/**" = ["T20"]
diff --git a/libCacheSim-python/requirements.txt b/libCacheSim-python/requirements.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp
new file mode 100644
index 000000000..43d875788
--- /dev/null
+++ b/libCacheSim-python/src/pylibcachesim.cpp
@@ -0,0 +1,842 @@
+#include <pybind11/functional.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+// Suppress visibility warnings for pybind11 types
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wattributes"
+
+#include <iostream>
+#include <memory>
+#include <unordered_map>
+
+#include "config.h"
+#include "libCacheSim/cache.h"
+#include "libCacheSim/cacheObj.h"
+#include "libCacheSim/const.h"
+#include "libCacheSim/enum.h"
+#include "libCacheSim/logging.h"
+#include "libCacheSim/macro.h"
+#include "libCacheSim/reader.h"
+#include "libCacheSim/request.h"
+#include "libCacheSim/sampling.h"
+#include "mystr.h"
+
+/* admission */
+#include "libCacheSim/admissionAlgo.h"
+
+/* eviction */
+#include "libCacheSim/evictionAlgo.h"
+
+/* cache simulator */
+#include "libCacheSim/profilerLRU.h"
+#include "libCacheSim/simulator.h"
+
+/* bin */
+#include "cachesim/cache_init.h"
+#include "cli_reader_utils.h"
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+namespace py = pybind11;
+
+// Python Hook Cache Implementation
+class PythonHookCache {
+ private:
+  uint64_t cache_size_;
+  std::string cache_name_;
+  std::unordered_map<uint64_t, uint64_t> objects_;  // obj_id -> obj_size
+  py::object plugin_data_;
+
+  // Hook functions
+  py::function init_hook_;
+  py::function hit_hook_;
+  py::function miss_hook_;
+  py::function eviction_hook_;
+  py::function remove_hook_;
+  py::object free_hook_;  // Changed to py::object to allow py::none()
+
+ public:
+  uint64_t n_req = 0;
+  uint64_t n_obj = 0;
+  uint64_t occupied_byte = 0;
+  uint64_t cache_size;
+
+  PythonHookCache(uint64_t cache_size,
+                  const std::string& cache_name = "PythonHookCache")
+      : cache_size_(cache_size),
+        cache_name_(cache_name),
+        cache_size(cache_size),
+        free_hook_(py::none()) {}
+
+  void set_hooks(py::function init_hook, py::function hit_hook,
+                 py::function miss_hook, py::function eviction_hook,
+                 py::function remove_hook, py::object free_hook = py::none()) {
+    init_hook_ = init_hook;
+    hit_hook_ = hit_hook;
+    miss_hook_ = miss_hook;
+    eviction_hook_ = eviction_hook;
+    remove_hook_ = remove_hook;
+
+    // Handle free_hook properly
+    if (!free_hook.is_none()) {
+      free_hook_ = free_hook;
+    } else {
+      free_hook_ = py::none();
+    }
+
+    // Initialize plugin data
+    plugin_data_ = init_hook_(cache_size_);
+  }
+
+  bool get(const request_t& req) {
+    n_req++;
+
+    auto it = objects_.find(req.obj_id);
+    if (it != objects_.end()) {
+      // Cache hit
+      hit_hook_(plugin_data_, req.obj_id, req.obj_size);
+      return true;
+    } else {
+      // Cache miss - call miss hook first
+      miss_hook_(plugin_data_, req.obj_id, req.obj_size);
+
+      // Check if eviction is needed
+      while (occupied_byte + req.obj_size > cache_size_ && !objects_.empty()) {
+        // Need to evict
+        uint64_t victim_id =
+            eviction_hook_(plugin_data_, req.obj_id, req.obj_size)
+                .cast<uint64_t>();
+        auto victim_it = objects_.find(victim_id);
+        if (victim_it != objects_.end()) {
+          occupied_byte -= victim_it->second;
+          objects_.erase(victim_it);
+          n_obj--;
+          remove_hook_(plugin_data_, victim_id);
+        } else {
+          // Safety check: if eviction hook returns invalid ID, break to avoid
+          // infinite loop
+          break;
+        }
+      }
+
+      // Insert new object if there's space
+      if (occupied_byte + req.obj_size <= cache_size_) {
+        objects_[req.obj_id] = req.obj_size;
+        occupied_byte += req.obj_size;
+        n_obj++;
+      }
+
+      return false;
+    }
+  }
+
+  ~PythonHookCache() {
+    if (!free_hook_.is_none()) {
+      py::function free_func = free_hook_.cast<py::function>();
+      free_func(plugin_data_);
+    }
+  }
+};
+
+// Restore visibility warnings
+#pragma GCC diagnostic pop
+
+struct CacheDeleter {
+  void operator()(cache_t* ptr) const {
+    if (ptr != nullptr) ptr->cache_free(ptr);
+  }
+};
+
+struct RequestDeleter {
+  void operator()(request_t* ptr) const {
+    if (ptr != nullptr) free_request(ptr);
+  }
+};
+
+struct ReaderDeleter {
+  void operator()(reader_t* ptr) const {
+    if (ptr != nullptr) close_trace(ptr);
+  }
+};
+
+PYBIND11_MODULE(_libcachesim, m) {  // NOLINT(readability-named-parameter)
+  m.doc() = R"pbdoc(
+        libCacheSim Python bindings
+        --------------------------
+
+        .. currentmodule:: libcachesim
+
+        .. autosummary::
+           :toctree: _generate
+
+           TODO(haocheng): add meaningful methods
+    )pbdoc";
+
+  py::enum_<trace_type_e>(m, "TraceType")
+      .value("CSV_TRACE", trace_type_e::CSV_TRACE)
+      .value("BIN_TRACE", trace_type_e::BIN_TRACE)
+      .value("PLAIN_TXT_TRACE", trace_type_e::PLAIN_TXT_TRACE)
+      .value("ORACLE_GENERAL_TRACE", trace_type_e::ORACLE_GENERAL_TRACE)
+      .value("LCS_TRACE", trace_type_e::LCS_TRACE)
+      .value("VSCSI_TRACE", trace_type_e::VSCSI_TRACE)
+      .value("TWR_TRACE", trace_type_e::TWR_TRACE)
+      .value("TWRNS_TRACE", trace_type_e::TWRNS_TRACE)
+      .value("ORACLE_SIM_TWR_TRACE", trace_type_e::ORACLE_SIM_TWR_TRACE)
+      .value("ORACLE_SYS_TWR_TRACE", trace_type_e::ORACLE_SYS_TWR_TRACE)
+      .value("ORACLE_SIM_TWRNS_TRACE", trace_type_e::ORACLE_SIM_TWRNS_TRACE)
+      .value("ORACLE_SYS_TWRNS_TRACE", trace_type_e::ORACLE_SYS_TWRNS_TRACE)
+      .value("VALPIN_TRACE", trace_type_e::VALPIN_TRACE)
+      .value("UNKNOWN_TRACE", trace_type_e::UNKNOWN_TRACE)
+      .export_values();
+
+  // *************** structs ***************
+  /**
+   * @brief Cache structure
+   */
+  py::class_<cache_t, std::unique_ptr<cache_t, CacheDeleter>>(m, "Cache")
+      .def_readwrite("n_req", &cache_t::n_req)
+      .def_readwrite("n_obj", &cache_t::n_obj)
+      .def_readwrite("occupied_byte", &cache_t::occupied_byte)
+      .def_readwrite("cache_size", &cache_t::cache_size)
+      // methods
+      .def("get", [](cache_t& self, const request_t& req) {
+        return self.get(&self, &req);
+      });
+
+  /**
+   * @brief Request structure
+   */
+  py::class_<request_t, std::unique_ptr<request_t, RequestDeleter>>(m,
+                                                                    "Request")
+      .def(py::init([]() { return new_request(); }))
+      .def_readwrite("clock_time", &request_t::clock_time)
+      .def_readwrite("hv", &request_t::hv)
+      .def_readwrite("obj_id", &request_t::obj_id)
+      .def_readwrite("obj_size", &request_t::obj_size)
+      .def_readwrite("op", &request_t::op);
+
+  /**
+   * @brief Reader structure
+   */
+  py::class_<reader_t, std::unique_ptr<reader_t, ReaderDeleter>>(m, "Reader")
+      .def_readwrite("n_read_req", &reader_t::n_read_req)
+      .def_readwrite("n_total_req", &reader_t::n_total_req)
+      .def_readwrite("trace_path", &reader_t::trace_path)
+      .def_readwrite("file_size", &reader_t::file_size)
+      // methods
+      .def(
+          "get_wss",
+          [](reader_t& self, bool ignore_obj_size) {
+            int64_t wss_obj = 0, wss_byte = 0;
+            cal_working_set_size(&self, &wss_obj, &wss_byte);
+            return ignore_obj_size ? wss_obj : wss_byte;
+          },
+          py::arg("ignore_obj_size") = false,
+          R"pbdoc(
+            Get the working set size of the trace.
+
+            Args:
+                ignore_obj_size (bool): Whether to ignore the object size.
+
+            Returns:
+                int: The working set size of the trace.
+      )pbdoc")
+      .def("__iter__", [](reader_t& self) -> reader_t& { return self; })
+      .def("__next__", [](reader_t& self) {
+        auto req = std::unique_ptr<request_t, RequestDeleter>(new_request());
+        int ret = read_one_req(&self, req.get());
+        if (ret != 0) {
+          throw py::stop_iteration();
+        }
+        // std::cout << "Read request: " << req->obj_id
+        //           << ", size: " << req->obj_size << std::endl;
+        return req;
+      });
+
+  py::class_<reader_init_param_t>(m, "reader_init_param_t")
+      .def(py::init<>())
+      .def_readwrite("time_field", &reader_init_param_t::time_field)
+      .def_readwrite("obj_id_field", &reader_init_param_t::obj_id_field)
+      .def_readwrite("obj_size_field", &reader_init_param_t::obj_size_field)
+      .def_readwrite("delimiter", &reader_init_param_t::delimiter)
+      .def_readwrite("has_header", &reader_init_param_t::has_header)
+      .def_property(
+          "binary_fmt_str",
+          // Getter: C char* to Python string (returns copy)
+          [](const reader_init_param_t& self) {
+            return self.binary_fmt_str ? std::string(self.binary_fmt_str) : "";
+          },
+          // Setter: Python string to C char* (handles deep copy and old memory)
+          [](reader_init_param_t& self, const std::string& value) {
+            // Free existing memory if any
+            if (self.binary_fmt_str != nullptr) {
+              free(self.binary_fmt_str);  // Use free() since it was
+                                          // strdup'd/malloc'd
+            }
+            // Deep copy the new string
+            self.binary_fmt_str = strdup(value.c_str());
+            if (self.binary_fmt_str == nullptr && !value.empty()) {
+              throw std::runtime_error(
+                  "Failed to allocate memory for binary_fmt_str");
+            }
+          });
+
+  // *************** functions ***************
+  /**
+   * @brief Open a trace file for reading
+   */
+  m.def(
+      "open_trace",
+      [](const std::string& trace_path, int type, const py::object& params) {
+        // Create an init_param instance, it will be populated from Python
+        reader_init_param_t init_param = {};
+
+        // === IMPORTANT: Initialize binary_fmt_str to nullptr ===
+        // This is crucial if it's not always set from Python,
+        // so that free() won't be called on uninitialized memory if not set
+        // later.
+        init_param.binary_fmt_str = nullptr;
+
+        // Populate other fields from Python dict or object
+        if (py::isinstance<py::dict>(params)) {
+          py::dict dict_params = params.cast<py::dict>();
+          init_param.time_field = dict_params["time_field"].cast<int>();
+          init_param.obj_id_field = dict_params["obj_id_field"].cast<int>();
+          init_param.obj_size_field = dict_params["obj_size_field"].cast<int>();
+          init_param.delimiter =
+              dict_params["delimiter"].cast<std::string>()[0];
+          init_param.has_header = dict_params["has_header"].cast<bool>();
+          // If binary_fmt_str is in dict_params, set it via property setter
+          if (dict_params.contains("binary_fmt_str") &&
+              !dict_params["binary_fmt_str"].is_none()) {
+            std::string bfs_val =
+                dict_params["binary_fmt_str"].cast<std::string>();
+            if (init_param.binary_fmt_str != nullptr)
+              free(init_param.binary_fmt_str);
+            init_param.binary_fmt_str = strdup(bfs_val.c_str());
+            if (init_param.binary_fmt_str == nullptr && !bfs_val.empty()) {
+              throw std::runtime_error(
+                  "Failed to allocate memory for binary_fmt_str from dict");
+            }
+          }
+        } else if (!params.is_none()) {
+          // If using a reader_init_param_t object from Python, its members are
+          // already set via def_property (No need to copy here, just ensure
+          // it's reader_init_param_t object) If `params` is a
+          // `reader_init_param_t` object, Pybind11 will pass its fields
+          // directly We need to ensure that the `binary_fmt_str` member of
+          // `params` is correctly handled. The direct `getattr` below is for
+          // other fields, for binary_fmt_str, the `def_property` takes care.
+          init_param.time_field = py::getattr(params, "time_field").cast<int>();
+          init_param.obj_id_field =
+              py::getattr(params, "obj_id_field").cast<int>();
+          init_param.obj_size_field =
+              py::getattr(params, "obj_size_field").cast<int>();
+          init_param.delimiter =
+              py::getattr(params, "delimiter").cast<std::string>()[0];
+          init_param.has_header =
+              py::getattr(params, "has_header").cast<bool>();
+          // Handle binary_fmt_str if it's set on the Python object
+          if (py::hasattr(params, "binary_fmt_str") &&
+              !py::getattr(params, "binary_fmt_str").is_none()) {
+            std::string bfs_val =
+                py::getattr(params, "binary_fmt_str").cast<std::string>();
+            if (init_param.binary_fmt_str != nullptr)
+              free(init_param.binary_fmt_str);
+            init_param.binary_fmt_str = strdup(bfs_val.c_str());
+            if (init_param.binary_fmt_str == nullptr && !bfs_val.empty()) {
+              throw std::runtime_error(
+                  "Failed to allocate memory for binary_fmt_str from object");
+            }
+          }
+        }
+        // ... (rest of open_trace function) ...
+        reader_t* ptr = open_trace(
+            trace_path.c_str(), static_cast<trace_type_e>(type), &init_param);
+        return std::unique_ptr<reader_t, ReaderDeleter>(ptr);
+      },
+      py::arg("trace_path"), py::arg("type"),
+      py::arg("reader_init_param") = py::none(),
+      R"pbdoc(
+            Open a trace file for reading.
+
+            Args:
+                trace_path (str): Path to the trace file.
+                type (int): Type of the trace (e.g., CSV_TRACE).
+                reader_init_param (Union[dict, reader_init_param_t, None]): Initialization parameters for the reader.
+
+            Returns:
+                Reader: A new reader instance for the trace.
+        )pbdoc");
+
+  /**
+   * @brief Generic function to create a cache instance.
+   */
+  m.def(
+      "create_cache",
+      [](const std::string& eviction_algo, const uint64_t cache_size,
+         const std::string& eviction_params,
+         bool consider_obj_metadata) { return nullptr; },
+      py::arg("eviction_algo"), py::arg("cache_size"),
+      py::arg("eviction_params"), py::arg("consider_obj_metadata"),
+      R"pbdoc(
+            Create a cache instance.
+
+            Args:
+                eviction_algo (str): Eviction algorithm to use (e.g., "LRU", "FIFO", "Random").
+                cache_size (int): Size of the cache in bytes.
+                eviction_params (str): Additional parameters for the eviction algorithm.
+                consider_obj_metadata (bool): Whether to consider object metadata in eviction decisions.
+
+            Returns:
+                Cache: A new cache instance.
+        )pbdoc");
+
+  /* TODO(haocheng): should we support all parameters in the
+   * common_cache_params_t? (hash_power, etc.) */
+
+  // Currently supported eviction algorithms with direct initialization:
+  //   - "ARC"
+  //   - "Clock"
+  //   - "FIFO"
+  //   - "LRB"
+  //   - "LRU"
+  //   - "S3FIFO"
+  //   - "Sieve"
+  //   - "ThreeLCache"
+  //   - "TinyLFU"
+  //   - "TwoQ"
+
+  /**
+   * @brief Create a ARC cache instance.
+   */
+  m.def(
+      "ARC_init",
+      [](uint64_t cache_size) {
+        common_cache_params_t cc_params = {.cache_size = cache_size};
+        cache_t* ptr = ARC_init(cc_params, nullptr);
+        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+      },
+      py::arg("cache_size"),
+      R"pbdoc(
+            Create a ARC cache instance.
+
+            Args:
+                cache_size (int): Size of the cache in bytes.
+      )pbdoc");
+
+  /**
+   * @brief Create a Clock cache instance.
+   */
+  m.def(
+      "Clock_init",
+      [](uint64_t cache_size, long int n_bit_counter, long int init_freq) {
+        common_cache_params_t cc_params = {.cache_size = cache_size};
+        // assemble the cache specific parameters
+        std::string cache_specific_params =
+            "n-bit-counter=" + std::to_string(n_bit_counter) + "," +
+            "init-freq=" + std::to_string(init_freq);
+
+        cache_t* ptr = Clock_init(cc_params, cache_specific_params.c_str());
+        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+      },
+      py::arg("cache_size"), py::arg("n_bit_counter") = 1,
+      py::arg("init_freq") = 0,
+      R"pbdoc(
+            Create a Clock cache instance.
+
+            Args:
+                cache_size (int): Size of the cache in bytes.
+                n_bit_counter (int): Number of bits for counter (default: 1).
+                init_freq (int): Initial frequency value (default: 0).
+
+            Returns:
+                Cache: A new Clock cache instance.
+      )pbdoc");
+
+  /**
+   * @brief Create a FIFO cache instance.
+   */
+  m.def(
+      "FIFO_init",
+      [](uint64_t cache_size) {
+        // Construct common cache parameters
+        common_cache_params_t cc_params = {.cache_size = cache_size};
+        // FIFO no specific parameters, so we pass nullptr
+        cache_t* ptr = FIFO_init(cc_params, nullptr);
+        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+      },
+      py::arg("cache_size"),
+      R"pbdoc(
+            Create a FIFO cache instance.
+
+            Args:
+                cache_size (int): Size of the cache in bytes.
+
+            Returns:
+                Cache: A new FIFO cache instance.
+      )pbdoc");
+
+#ifdef ENABLE_LRB
+  /**
+   * @brief Create a LRB cache instance.
+   */
+  m.def(
+      "LRB_init",
+      [](uint64_t cache_size, std::string objective) {
+        common_cache_params_t cc_params = {.cache_size = cache_size};
+        cache_t* ptr = LRB_init(cc_params, ("objective=" + objective).c_str());
+        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+      },
+      py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio",
+      R"pbdoc(
+            Create a LRB cache instance.
+
+            Args:
+                cache_size (int): Size of the cache in bytes.
+                objective (str): Objective function to optimize (default: "byte-miss-ratio").
+
+            Returns:
+                Cache: A new LRB cache instance.
+      )pbdoc");
+#else
+  // TODO(haocheng): add a dummy function to avoid the error when LRB is not
+  // enabled
+  m.def(
+      "LRB_init",
+      [](uint64_t cache_size, std::string objective) {
+        throw std::runtime_error("LRB is not enabled");
+      },
+      py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio");
+#endif
+
+  /**
+   * @brief Create a LRU cache instance.
+   */
+  m.def(
+      "LRU_init",
+      [](uint64_t cache_size) {
+        common_cache_params_t cc_params = {.cache_size = cache_size};
+        cache_t* ptr = LRU_init(cc_params, nullptr);
+        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+      },
+      py::arg("cache_size"),
+      R"pbdoc(
+            Create a LRU cache instance.
+
+            Args:
+                cache_size (int): Size of the cache in bytes.
+
+            Returns:
+                Cache: A new LRU cache instance.
+      )pbdoc");
+
+  /**
+   * @brief Create a S3FIFO cache instance.
+   */
+  m.def(
+      "S3FIFO_init",
+      [](uint64_t cache_size, double fifo_size_ratio, double ghost_size_ratio,
+         int move_to_main_threshold) {
+        common_cache_params_t cc_params = {.cache_size = cache_size};
+        cache_t* ptr = S3FIFO_init(
+            cc_params,
+            ("fifo-size-ratio=" + std::to_string(fifo_size_ratio) + "," +
+             "ghost-size-ratio=" + std::to_string(ghost_size_ratio) + "," +
+             "move-to-main-threshold=" + std::to_string(move_to_main_threshold))
+                .c_str());
+        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+      },
+      py::arg("cache_size"), py::arg("fifo_size_ratio") = 0.10,
+      py::arg("ghost_size_ratio") = 0.90, py::arg("move_to_main_threshold") = 2,
+      R"pbdoc(
+            Create a S3FIFO cache instance.
+
+            Args:
+                cache_size (int): Size of the cache in bytes.
+                fifo_size_ratio (float): Ratio of FIFO size to cache size (default: 0.10).
+                ghost_size_ratio (float): Ratio of ghost size to cache size (default: 0.90).
+                move_to_main_threshold (int): Threshold for moving to main queue (default: 2).
+
+            Returns:
+                Cache: A new S3FIFO cache instance.
+      )pbdoc");
+
+  /**
+   * @brief Create a Sieve cache instance.
+   */
+  m.def(
+      "Sieve_init",
+      [](uint64_t cache_size) {
+        common_cache_params_t cc_params = {.cache_size = cache_size};
+        cache_t* ptr = Sieve_init(cc_params, nullptr);
+        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+      },
+      py::arg("cache_size"),
+      R"pbdoc(
+            Create a Sieve cache instance.
+
+            Args:
+                cache_size (int): Size of the cache in bytes.
+
+            Returns:
+                Cache: A new Sieve cache instance.
+      )pbdoc");
+
+#ifdef ENABLE_3L_CACHE
+  /**
+   * @brief Create a ThreeL cache instance.
+   */
+  m.def(
+      "ThreeLCache_init",
+      [](uint64_t cache_size, std::string objective) {
+        common_cache_params_t cc_params = {.cache_size = cache_size};
+        cache_t* ptr =
+            ThreeLCache_init(cc_params, ("objective=" + objective).c_str());
+        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+      },
+      py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio",
+      R"pbdoc(
+            Create a ThreeL cache instance.
+
+            Args:
+                cache_size (int): Size of the cache in bytes.
+                objective (str): Objective function to optimize (default: "byte-miss-ratio").
+
+            Returns:
+                Cache: A new ThreeL cache instance.
+      )pbdoc");
+#else
+  // TODO(haocheng): add a dummy function to avoid the error when ThreeLCache is
+  // not enabled
+  m.def(
+      "ThreeLCache_init",
+      [](uint64_t cache_size, std::string objective) {
+        throw std::runtime_error("ThreeLCache is not enabled");
+      },
+      py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio");
+#endif
+
+  /**
+   * @brief Create a TinyLFU cache instance.
+   */
+  // mark evivtion parsing need change
+  m.def(
+      "TinyLFU_init",
+      [](uint64_t cache_size, std::string main_cache, double window_size) {
+        common_cache_params_t cc_params = {.cache_size = cache_size};
+        cache_t* ptr = WTinyLFU_init(
+            cc_params, ("main-cache=" + main_cache + "," +
+                        "window-size=" + std::to_string(window_size))
+                           .c_str());
+        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+      },
+      py::arg("cache_size"), py::arg("main_cache") = "SLRU",
+      py::arg("window_size") = 0.01,
+      R"pbdoc(
+            Create a TinyLFU cache instance.
+
+            Args:
+                cache_size (int): Size of the cache in bytes.
+                main_cache (str): Main cache to use (default: "SLRU").
+                window_size (float): Window size for TinyLFU (default: 0.01).
+
+            Returns:
+                Cache: A new TinyLFU cache instance.
+      )pbdoc");
+
+  /**
+   * @brief Create a TwoQ cache instance.
+   */
+  m.def(
+      "TwoQ_init",
+      [](uint64_t cache_size, double Ain_size_ratio, double Aout_size_ratio) {
+        common_cache_params_t cc_params = {.cache_size = cache_size};
+        cache_t* ptr = TwoQ_init(
+            cc_params,
+            ("Ain-size-ratio=" + std::to_string(Ain_size_ratio) + "," +
+             "Aout-size-ratio=" + std::to_string(Aout_size_ratio))
+                .c_str());
+        return std::unique_ptr<cache_t, CacheDeleter>(ptr);
+      },
+      py::arg("cache_size"), py::arg("Ain_size_ratio") = 0.25,
+      py::arg("Aout_size_ratio") = 0.5,
+      R"pbdoc(
+            Create a TwoQ cache instance.
+
+            Args:
+                cache_size (int): Size of the cache in bytes.
+                Ain_size_ratio (float): Ratio of A-in size to cache size (default: 0.25).
+                Aout_size_ratio (float): Ratio of A-out size to cache size (default: 0.5).
+
+            Returns:
+                Cache: A new TwoQ cache instance.
+      )pbdoc");
+
+  /**
+   * @brief Create a Python hook-based cache instance.
+   */
+  py::class_<PythonHookCache>(m, "PythonHookCache")
+      .def(py::init<uint64_t, const std::string&>(), py::arg("cache_size"),
+           py::arg("cache_name") = "PythonHookCache")
+      .def("set_hooks", &PythonHookCache::set_hooks, py::arg("init_hook"),
+           py::arg("hit_hook"), py::arg("miss_hook"), py::arg("eviction_hook"),
+           py::arg("remove_hook"), py::arg("free_hook") = py::none(),
+           R"pbdoc(
+            Set the hook functions for the cache.
+
+            Args:
+                init_hook (callable): Function called during cache initialization.
+                    Signature: init_hook(cache_size: int) -> Any
+                hit_hook (callable): Function called on cache hit.
+                    Signature: hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None
+                miss_hook (callable): Function called on cache miss.
+                    Signature: miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None
+                eviction_hook (callable): Function called to select eviction candidate.
+                    Signature: eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int
+                remove_hook (callable): Function called when object is removed.
+                    Signature: remove_hook(plugin_data: Any, obj_id: int) -> None
+                free_hook (callable, optional): Function called during cache cleanup.
+                    Signature: free_hook(plugin_data: Any) -> None
+      )pbdoc")
+      .def("get", &PythonHookCache::get, py::arg("req"),
+           R"pbdoc(
+            Process a cache request.
+
+            Args:
+                req (Request): The cache request to process.
+
+            Returns:
+                bool: True if cache hit, False if cache miss.
+      )pbdoc")
+      .def_readwrite("n_req", &PythonHookCache::n_req)
+      .def_readwrite("n_obj", &PythonHookCache::n_obj)
+      .def_readwrite("occupied_byte", &PythonHookCache::occupied_byte)
+      .def_readwrite("cache_size", &PythonHookCache::cache_size);
+
+  /**
+   * @brief Process a trace with a cache and return miss ratio.
+   */
+  m.def(
+      "process_trace",
+      [](cache_t& cache, reader_t& reader, int max_req = -1, int max_sec = -1,
+         int64_t start_time = -1, int64_t end_time = -1) {
+        request_t* req = new_request();
+        int n_req = 0, n_hit = 0;
+        bool hit;
+
+        read_one_req(&reader, req);
+        while (req->valid) {
+          // Check limits
+          if (max_req != -1 && n_req >= max_req) break;
+          if (max_sec != -1 && req->clock_time >= end_time) break;
+          if (start_time != -1 && req->clock_time < start_time) {
+            read_one_req(&reader, req);
+            continue;
+          }
+
+          n_req += 1;
+          hit = cache.get(&cache, req);
+          if (hit) n_hit += 1;
+          read_one_req(&reader, req);
+        }
+
+        free_request(req);
+        // return the miss ratio
+        return n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0;
+      },
+      py::arg("cache"), py::arg("reader"), py::arg("max_req") = -1,
+      py::arg("max_sec") = -1, py::arg("start_time") = -1,
+      py::arg("end_time") = -1,
+      R"pbdoc(
+            Process a trace with a cache and return miss ratio.
+
+            This function processes trace data entirely on the C++ side to avoid
+            data movement overhead between Python and C++.
+
+            Args:
+                cache (Cache): The cache instance to use for processing.
+                reader (Reader): The trace reader instance.
+                max_req (int): Maximum number of requests to process (-1 for no limit).
+                max_sec (int): Maximum seconds to process (-1 for no limit).
+                start_time (int): Start time filter (-1 for no filter).
+                end_time (int): End time filter (-1 for no filter).
+
+            Returns:
+                float: Miss ratio (0.0 to 1.0).
+
+            Example:
+                >>> cache = libcachesim.LRU(1024*1024)
+                >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE)
+                >>> miss_ratio = libcachesim.process_trace(cache, reader)
+                >>> print(f"Miss ratio: {miss_ratio:.4f}")
+      )pbdoc");
+
+  /**
+   * @brief Process a trace with a Python hook cache and return miss ratio.
+   */
+  m.def(
+      "process_trace_python_hook",
+      [](PythonHookCache& cache, reader_t& reader, int max_req = -1,
+         int max_sec = -1, int64_t start_time = -1, int64_t end_time = -1) {
+        request_t* req = new_request();
+        int n_req = 0, n_hit = 0;
+        bool hit;
+
+        read_one_req(&reader, req);
+        while (req->valid) {
+          // Check limits
+          if (max_req != -1 && n_req >= max_req) break;
+          if (max_sec != -1 && req->clock_time >= end_time) break;
+          if (start_time != -1 && req->clock_time < start_time) {
+            read_one_req(&reader, req);
+            continue;
+          }
+
+          n_req += 1;
+          hit = cache.get(*req);
+          if (hit) n_hit += 1;
+          read_one_req(&reader, req);
+        }
+
+        free_request(req);
+        // return the miss ratio
+        return n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0;
+      },
+      py::arg("cache"), py::arg("reader"), py::arg("max_req") = -1,
+      py::arg("max_sec") = -1, py::arg("start_time") = -1,
+      py::arg("end_time") = -1,
+      R"pbdoc(
+            Process a trace with a Python hook cache and return miss ratio.
+
+            This function processes trace data entirely on the C++ side to avoid
+            data movement overhead between Python and C++. Specifically designed
+            for PythonHookCache instances.
+
+            Args:
+                cache (PythonHookCache): The Python hook cache instance to use.
+                reader (Reader): The trace reader instance.
+                max_req (int): Maximum number of requests to process (-1 for no limit).
+                max_sec (int): Maximum seconds to process (-1 for no limit).
+                start_time (int): Start time filter (-1 for no filter).
+                end_time (int): End time filter (-1 for no filter).
+
+            Returns:
+                float: Miss ratio (0.0 to 1.0).
+
+            Example:
+                >>> cache = libcachesim.PythonHookCachePolicy(1024*1024)
+                >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+                >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE)
+                >>> miss_ratio = libcachesim.process_trace_python_hook(cache.cache, reader)
+                >>> print(f"Miss ratio: {miss_ratio:.4f}")
+      )pbdoc");
+
+#ifdef VERSION_INFO
+  m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
+#else
+  m.attr("__version__") = "dev";
+#endif
+}
diff --git a/libCacheSim-python/tests/conftest.py b/libCacheSim-python/tests/conftest.py
new file mode 100644
index 000000000..5335134b1
--- /dev/null
+++ b/libCacheSim-python/tests/conftest.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+import os
+import gc
+
+import pytest
+
+from libcachesim import Reader, TraceType, open_trace
+
+
+@pytest.fixture
+def mock_reader():
+    data_file = os.path.join(  # noqa: PTH118
+        os.path.dirname(os.path.dirname(os.path.dirname(__file__))),  # noqa: PTH120
+        "data",
+        "cloudPhysicsIO.oracleGeneral.bin"
+    )
+    reader: Reader = open_trace(
+        data_file,
+        type=TraceType.ORACLE_GENERAL_TRACE.value,
+    )
+    try:
+        yield reader
+    finally:
+        # More careful cleanup
+        try:
+            if hasattr(reader, 'close'):
+                reader.close()
+        except Exception:  # Be specific about exception type
+            pass
+        # Don't explicitly del reader here, let Python handle it
+        gc.collect()
diff --git a/libCacheSim-python/tests/pytest.ini b/libCacheSim-python/tests/pytest.ini
new file mode 100644
index 000000000..561da0177
--- /dev/null
+++ b/libCacheSim-python/tests/pytest.ini
@@ -0,0 +1,9 @@
+[pytest]
+addopts = -ra --strict-markers -m "not optional"
+
+markers =
+    optional: mark test as optional
+
+python_files = test.py test_*.py *_test.py
+python_classes = Test*
+python_functions = test_*
\ No newline at end of file
diff --git a/libCacheSim-python/tests/reference.csv b/libCacheSim-python/tests/reference.csv
new file mode 100644
index 000000000..cb569d0c9
--- /dev/null
+++ b/libCacheSim-python/tests/reference.csv
@@ -0,0 +1,20 @@
+FIFO,0.01,0.8368
+ARC,0.01,0.8222
+Clock,0.01,0.8328
+LRB,0.01,0.8339
+LRU,0.01,0.8339
+S3FIFO,0.01,0.8235
+Sieve,0.01,0.8231
+3LCache,0.01,0.8339
+TinyLFU,0.01,0.8262
+TwoQ,0.01,0.8276
+FIFO,0.1,0.8075
+ARC,0.1,0.7688
+Clock,0.1,0.8086
+LRB,0.1,0.8097
+LRU,0.1,0.8097
+S3FIFO,0.1,0.7542
+Sieve,0.1,0.7903
+3LCache,0.1,0.8097
+TinyLFU,0.1,0.7666
+TwoQ,0.1,0.7695
diff --git a/libCacheSim-python/tests/test_eviction.py b/libCacheSim-python/tests/test_eviction.py
new file mode 100644
index 000000000..1de462a84
--- /dev/null
+++ b/libCacheSim-python/tests/test_eviction.py
@@ -0,0 +1,61 @@
+import pytest
+import gc
+import sys
+import os
+
+from libcachesim import (
+    ARC,
+    FIFO,
+    LRU,
+    S3FIFO,
+    Clock,
+    Sieve,
+    TinyLFU,
+    TwoQ,
+)
+from tests.utils import get_reference_data
+
+
+@pytest.mark.parametrize("eviction_algo", [
+    FIFO,
+    ARC,
+    Clock,
+    LRU,
+    S3FIFO,
+    Sieve,
+    TinyLFU,
+    TwoQ,
+])
+@pytest.mark.parametrize("cache_size_ratio", [0.01])
+def test_eviction_algo(eviction_algo, cache_size_ratio, mock_reader):
+    cache = None
+    try:
+        # create a cache with the eviction policy
+        cache = eviction_algo(cache_size=int(mock_reader.get_wss()*cache_size_ratio))
+        req_count = 0
+        miss_count = 0
+
+        # Limit the number of requests to avoid long test times
+        # max_requests = 1000
+        for i, req in enumerate(mock_reader):
+            # if i >= max_requests:
+            #     break
+            hit = cache.get(req)
+            if not hit:
+                miss_count += 1
+            req_count += 1
+
+        if req_count == 0:
+            pytest.skip("No requests processed")
+
+        miss_ratio = miss_count / req_count
+        reference_miss_ratio = get_reference_data(eviction_algo.__name__, cache_size_ratio)
+        if reference_miss_ratio is None:
+            pytest.skip(f"No reference data for {eviction_algo.__name__} with cache size ratio {cache_size_ratio}")
+        assert abs(miss_ratio - reference_miss_ratio) < 0.01, f"Miss ratio {miss_ratio} is not close to reference {reference_miss_ratio}"
+
+    except Exception as e:
+        print(f"Error in test_eviction_algo: {e}")
+        raise
+    finally:
+        pass
diff --git a/libCacheSim-python/tests/test_process_trace.py b/libCacheSim-python/tests/test_process_trace.py
new file mode 100644
index 000000000..0d08edeab
--- /dev/null
+++ b/libCacheSim-python/tests/test_process_trace.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+"""
+Test file for process_trace functionality.
+"""
+
+import sys
+import os
+import pytest
+
+# Add the parent directory to the Python path for development testing
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+try:
+    import libcachesim as lcs
+except ImportError as e:
+    print(f"Error importing libcachesim: {e}")
+    print("Make sure the Python binding is built and installed")
+    sys.exit(1)
+
+from collections import OrderedDict
+
+
+def create_trace_reader():
+    """Helper function to create a trace reader with binary trace file."""
+    data_file = os.path.join(
+        os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
+        "data",
+        "cloudPhysicsIO.oracleGeneral.bin"
+    )
+    if not os.path.exists(data_file):
+        return None
+    return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE)
+
+
+def test_process_trace_native():
+    """Test process_trace with native LRU cache."""
+    print("Testing process_trace with native LRU...")
+
+    # Open trace
+    reader = create_trace_reader()
+    if reader is None:
+        pytest.skip("Test trace file not found, skipping test")
+
+    # Create LRU cache
+    cache = lcs.LRU(1024*1024)  # 1MB cache
+
+    # Process trace and get miss ratio
+    miss_ratio = cache.process_trace(reader, max_req=1000)
+
+    print(f"Native LRU miss ratio (first 1000 requests): {miss_ratio:.4f}")
+
+    # Verify miss ratio is reasonable (should be between 0 and 1)
+    assert 0.0 <= miss_ratio <= 1.0, f"Invalid miss ratio: {miss_ratio}"
+    print("PASS: Native LRU process_trace test PASSED")
+
+
+def test_process_trace_python_hook():
+    """Test process_trace with Python hook cache."""
+    print("\nTesting process_trace with Python hook cache...")
+
+    # Open trace
+    reader = create_trace_reader()
+    if reader is None:
+        pytest.skip("Test trace file not found, skipping test")
+
+    # Create Python hook LRU cache
+    cache = lcs.PythonHookCachePolicy(1024*1024, "TestLRU")
+
+    # Define LRU hooks
+    def init_hook(cache_size):
+        return OrderedDict()
+
+    def hit_hook(lru_dict, obj_id, obj_size):
+        lru_dict.move_to_end(obj_id)
+
+    def miss_hook(lru_dict, obj_id, obj_size):
+        lru_dict[obj_id] = True
+
+    def eviction_hook(lru_dict, obj_id, obj_size):
+        return next(iter(lru_dict))
+
+    def remove_hook(lru_dict, obj_id):
+        lru_dict.pop(obj_id, None)
+
+    # Set hooks
+    cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+
+    # Test both methods
+    # Method 1: Direct function call
+    miss_ratio1 = lcs.process_trace_python_hook(cache.cache, reader, max_req=1000)
+
+    # Need to reopen the trace for second test
+    reader2 = create_trace_reader()
+    if reader2 is None:
+        print("Warning: Cannot reopen trace file, skipping second test")
+        # Continue with just the first test result
+        assert miss_ratio1 is not None and 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio: {miss_ratio1}"
+        return
+
+    # Reset cache for fair comparison
+    cache2 = lcs.PythonHookCachePolicy(1024*1024, "TestLRU2")
+    cache2.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+
+    # Method 2: Convenience method
+    miss_ratio2 = cache2.process_trace(reader2, max_req=1000)
+
+    print(f"Python hook LRU miss ratio (method 1): {miss_ratio1:.4f}")
+    print(f"Python hook LRU miss ratio (method 2): {miss_ratio2:.4f}")
+
+    # Verify both methods give the same result and miss ratios are reasonable
+    assert 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio 1: {miss_ratio1}"
+    assert 0.0 <= miss_ratio2 <= 1.0, f"Invalid miss ratio 2: {miss_ratio2}"
+    assert abs(miss_ratio1 - miss_ratio2) < 0.001, f"Different results from the two methods: {miss_ratio1} vs {miss_ratio2}"
+    print("PASS: Python hook process_trace test PASSED")
+
+
+def test_compare_native_vs_python_hook():
+    """Compare native LRU vs Python hook LRU using process_trace."""
+    print("\nComparing native LRU vs Python hook LRU using process_trace...")
+
+    cache_size = 512*1024  # 512KB cache
+    max_requests = 500
+
+    # Test native LRU
+    native_cache = lcs.LRU(cache_size)
+    reader1 = create_trace_reader()
+    if reader1 is None:
+        pytest.skip("Test trace file not found, skipping test")
+
+    native_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests)
+
+    # Test Python hook LRU
+    hook_cache = lcs.PythonHookCachePolicy(cache_size, "HookLRU")
+
+    def init_hook(cache_size):
+        return OrderedDict()
+
+    def hit_hook(lru_dict, obj_id, obj_size):
+        lru_dict.move_to_end(obj_id)
+
+    def miss_hook(lru_dict, obj_id, obj_size):
+        lru_dict[obj_id] = True
+
+    def eviction_hook(lru_dict, obj_id, obj_size):
+        return next(iter(lru_dict))
+
+    def remove_hook(lru_dict, obj_id):
+        lru_dict.pop(obj_id, None)
+
+    hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+
+    reader2 = create_trace_reader()
+    if reader2 is None:
+        print("Warning: Cannot reopen trace file, skipping comparison")
+        return  # Skip test
+
+    hook_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests)
+
+    print(f"Native LRU miss ratio: {native_miss_ratio:.4f}")
+    print(f"Python hook LRU miss ratio: {hook_miss_ratio:.4f}")
+    print(f"Difference: {abs(native_miss_ratio - hook_miss_ratio):.4f}")
+
+    # They should be very similar (allowing for some small differences due to implementation details)
+    assert abs(native_miss_ratio - hook_miss_ratio) < 0.05, f"Too much difference: {abs(native_miss_ratio - hook_miss_ratio):.4f}"
+    print("PASS: Native vs Python hook comparison test PASSED")
+
+
+def test_error_handling():
+    """Test error handling for process_trace."""
+    print("\nTesting error handling...")
+
+    cache = lcs.PythonHookCachePolicy(1024)
+
+    reader = create_trace_reader()
+    if reader is None:
+        pytest.skip("Test trace file not found, skipping error test")
+
+    # Try to process trace without setting hooks
+    try:
+        cache.process_trace(reader)
+        assert False, "Should have raised RuntimeError"
+    except RuntimeError as e:
+        print(f"Correctly caught error: {e}")
+        print("PASS: Error handling test PASSED")
+
+
+def test_lru_implementation_accuracy():
+    """Test that Python hook LRU implementation matches native LRU closely."""
+    print("Testing LRU implementation accuracy...")
+
+    cache_size = 1024 * 1024  # 1MB
+    max_requests = 100
+
+    # Create readers
+    reader1 = create_trace_reader()
+    reader2 = create_trace_reader()
+
+    if not reader1 or not reader2:
+        pytest.skip("Cannot open trace files for LRU accuracy test")
+
+    # Test native LRU
+    native_cache = lcs.LRU(cache_size)
+    native_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests)
+
+    # Test Python hook LRU
+    hook_cache = lcs.PythonHookCachePolicy(cache_size, "AccuracyTestLRU")
+    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_optimized_lru_hooks()
+    hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+
+    hook_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests)
+
+    # Calculate difference
+    difference = abs(native_miss_ratio - hook_miss_ratio)
+    percentage_diff = (difference / native_miss_ratio) * 100 if native_miss_ratio > 0 else 0
+
+    print(f"Native LRU miss ratio: {native_miss_ratio:.6f}")
+    print(f"Hook LRU miss ratio: {hook_miss_ratio:.6f}")
+    print(f"Percentage difference: {percentage_diff:.4f}%")
+
+    # Assert that the difference is small (< 5%)
+    assert percentage_diff < 5.0, f"LRU implementation difference too large: {percentage_diff:.4f}%"
+    print("PASS: LRU implementation accuracy test passed")
+
+
+def create_optimized_lru_hooks():
+    """Create optimized LRU hooks that closely match native LRU behavior."""
+
+    def init_hook(cache_size):
+        return OrderedDict()
+
+    def hit_hook(lru_dict, obj_id, obj_size):
+        if obj_id in lru_dict:
+            lru_dict.move_to_end(obj_id)
+
+    def miss_hook(lru_dict, obj_id, obj_size):
+        lru_dict[obj_id] = obj_size
+
+    def eviction_hook(lru_dict, obj_id, obj_size):
+        if lru_dict:
+            return next(iter(lru_dict))
+        return obj_id
+
+    def remove_hook(lru_dict, obj_id):
+        lru_dict.pop(obj_id, None)
+
+    return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
+
+
+if __name__ == "__main__":
+    tests = [
+        test_process_trace_native,
+        test_process_trace_python_hook,
+        test_compare_native_vs_python_hook,
+        test_error_handling,
+        test_lru_implementation_accuracy,
+    ]
+
+    all_passed = True
+    for test in tests:
+        try:
+            test()  # Just call the test, don't check return value
+            print(f"PASS: {test.__name__} passed")
+        except Exception as e:
+            print(f"FAIL: {test.__name__} failed with exception: {e}")
+            all_passed = False
+
+    if all_passed:
+        print("\nAll process_trace tests PASSED!")
+    else:
+        print("\nSome process_trace tests FAILED!")
diff --git a/libCacheSim-python/tests/test_python_hook_cache.py b/libCacheSim-python/tests/test_python_hook_cache.py
new file mode 100644
index 000000000..c84c03cbb
--- /dev/null
+++ b/libCacheSim-python/tests/test_python_hook_cache.py
@@ -0,0 +1,283 @@
+#!/usr/bin/env python3
+"""
+Test file for PythonHookCachePolicy functionality.
+"""
+
+import sys
+import os
+import pytest
+
+# Add the parent directory to the Python path for development testing
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+try:
+    import libcachesim as lcs
+except ImportError as e:
+    print(f"Error importing libcachesim: {e}")
+    print("Make sure the Python binding is built and installed")
+    sys.exit(1)
+
+from collections import OrderedDict
+
+
+def test_python_hook_cache():
+    """Test the Python hook cache implementation."""
+    print("Testing PythonHookCachePolicy...")
+
+    # Create cache
+    cache_size = 300  # 3 objects of size 100 each
+    cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU")
+
+    # Define LRU hooks
+    def init_hook(cache_size):
+        print(f"Initializing LRU cache with size {cache_size}")
+        return OrderedDict()
+
+    def hit_hook(lru_dict, obj_id, obj_size):
+        print(f"Hit: object {obj_id}")
+        lru_dict.move_to_end(obj_id)
+
+    def miss_hook(lru_dict, obj_id, obj_size):
+        print(f"Miss: object {obj_id}, size {obj_size}")
+        lru_dict[obj_id] = True
+
+    def eviction_hook(lru_dict, obj_id, obj_size):
+        victim = next(iter(lru_dict))
+        print(f"Evicting object {victim} to make room for {obj_id}")
+        return victim
+
+    def remove_hook(lru_dict, obj_id):
+        print(f"Removing object {obj_id}")
+        lru_dict.pop(obj_id, None)
+
+    # Set hooks
+    cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+
+    # Test sequence
+    test_requests = [
+        (1, 100),  # Miss - insert 1
+        (2, 100),  # Miss - insert 2
+        (3, 100),  # Miss - insert 3 (cache full)
+        (1, 100),  # Hit - move 1 to end
+        (4, 100),  # Miss - should evict 2 (LRU), insert 4
+        (2, 100),  # Miss - should evict 3, insert 2
+        (1, 100),  # Hit - move 1 to end
+    ]
+
+    print("\n--- Starting cache simulation ---")
+    for obj_id, obj_size in test_requests:
+        req = lcs.Request()
+        req.obj_id = obj_id
+        req.obj_size = obj_size
+
+        result = cache.get(req)
+        print(f"Request {obj_id}: {'HIT' if result else 'MISS'}")
+        print(f"  Cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes\n")
+
+    print("Test completed successfully!")
+
+
+def test_error_handling():
+    """Test error handling."""
+    print("\nTesting error handling...")
+
+    cache = lcs.PythonHookCachePolicy(1000)
+
+    # Try to use cache without setting hooks
+    req = lcs.Request()
+    req.obj_id = 1
+    req.obj_size = 100
+
+    with pytest.raises(RuntimeError):
+        cache.get(req)
+
+    print("Error handling test passed!")
+
+
+def test_lru_comparison():
+    """Test Python hook LRU against native LRU to verify identical behavior."""
+    print("\nTesting Python hook LRU vs Native LRU comparison...")
+
+    cache_size = 300  # 3 objects of size 100 each
+
+    # Create native LRU cache
+    native_lru = lcs.LRU(cache_size)
+
+    # Create Python hook LRU cache
+    hook_lru = lcs.PythonHookCachePolicy(cache_size, "TestLRU")
+
+    # Define LRU hooks
+    def init_hook(cache_size):
+        return OrderedDict()
+
+    def hit_hook(lru_dict, obj_id, obj_size):
+        lru_dict.move_to_end(obj_id)
+
+    def miss_hook(lru_dict, obj_id, obj_size):
+        lru_dict[obj_id] = True
+
+    def eviction_hook(lru_dict, obj_id, obj_size):
+        return next(iter(lru_dict))
+
+    def remove_hook(lru_dict, obj_id):
+        lru_dict.pop(obj_id, None)
+
+    # Set hooks
+    hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+
+    # Test sequence with various access patterns
+    test_requests = [
+        (1, 100),  # Miss - insert 1
+        (2, 100),  # Miss - insert 2
+        (3, 100),  # Miss - insert 3 (cache full)
+        (1, 100),  # Hit - move 1 to end
+        (4, 100),  # Miss - should evict 2 (LRU), insert 4
+        (2, 100),  # Miss - should evict 3, insert 2
+        (1, 100),  # Hit - move 1 to end
+        (3, 100),  # Miss - should evict 4, insert 3
+        (5, 100),  # Miss - should evict 2, insert 5
+        (1, 100),  # Hit - move 1 to end
+        (3, 100),  # Hit - move 3 to end
+        (6, 100),  # Miss - should evict 5, insert 6
+    ]
+
+    print("\n--- Comparing LRU implementations ---")
+    hit_rate_matches = 0
+    total_requests = len(test_requests)
+
+    for i, (obj_id, obj_size) in enumerate(test_requests):
+        # Test native LRU
+        req_native = lcs.Request()
+        req_native.obj_id = obj_id
+        req_native.obj_size = obj_size
+        native_result = native_lru.get(req_native)
+
+        # Test hook LRU
+        req_hook = lcs.Request()
+        req_hook.obj_id = obj_id
+        req_hook.obj_size = obj_size
+        hook_result = hook_lru.get(req_hook)
+
+        # Compare results
+        match = native_result == hook_result
+        if match:
+            hit_rate_matches += 1
+
+        print(f"Request {i+1}: obj_id={obj_id}")
+        print(f"  Native LRU: {'HIT' if native_result else 'MISS'}")
+        print(f"  Hook LRU:   {'HIT' if hook_result else 'MISS'}")
+        print(f"  Match: {'PASS' if match else 'FAIL'}")
+
+        # Compare cache statistics
+        stats_match = (native_lru.cache.n_obj == hook_lru.n_obj and
+                      native_lru.cache.occupied_byte == hook_lru.occupied_byte)
+        print(f"  Native stats: {native_lru.cache.n_obj} objects, {native_lru.cache.occupied_byte} bytes")
+        print(f"  Hook stats:   {hook_lru.n_obj} objects, {hook_lru.occupied_byte} bytes")
+        print(f"  Stats match: {'PASS' if stats_match else 'FAIL'}")
+        print()
+
+        if not match:
+            print(f"ERROR: Hit/miss mismatch at request {i+1}")
+            return False
+
+        if not stats_match:
+            print(f"ERROR: Cache statistics mismatch at request {i+1}")
+            return False
+
+    accuracy = (hit_rate_matches / total_requests) * 100
+    print(f"LRU comparison test results:")
+    print(f"  Total requests: {total_requests}")
+    print(f"  Matching results: {hit_rate_matches}")
+    print(f"  Accuracy: {accuracy:.1f}%")
+
+    assert accuracy == 100.0, f"LRU implementations differ! Accuracy: {accuracy:.1f}%"
+    print("PASS: LRU comparison test PASSED - Both implementations behave identically!")
+
+
+def test_lru_comparison_variable_sizes():
+    """Test Python hook LRU vs Native LRU with variable object sizes."""
+    print("\nTesting Python hook LRU vs Native LRU with variable object sizes...")
+
+    cache_size = 1000  # Total cache capacity
+
+    # Create native LRU cache
+    native_lru = lcs.LRU(cache_size)
+
+    # Create Python hook LRU cache
+    hook_lru = lcs.PythonHookCachePolicy(cache_size, "VariableSizeLRU")
+
+    # Define LRU hooks
+    def init_hook(cache_size):
+        return OrderedDict()
+
+    def hit_hook(lru_dict, obj_id, obj_size):
+        lru_dict.move_to_end(obj_id)
+
+    def miss_hook(lru_dict, obj_id, obj_size):
+        lru_dict[obj_id] = True
+
+    def eviction_hook(lru_dict, obj_id, obj_size):
+        return next(iter(lru_dict))
+
+    def remove_hook(lru_dict, obj_id):
+        lru_dict.pop(obj_id, None)
+
+    # Set hooks
+    hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+
+    # Test sequence with variable object sizes
+    test_requests = [
+        (1, 200),  # Miss - insert 1 (200 bytes)
+        (2, 300),  # Miss - insert 2 (300 bytes)
+        (3, 400),  # Miss - insert 3 (400 bytes) - total 900 bytes
+        (4, 200),  # Miss - should evict 1, insert 4 (total would be 1100, over limit)
+        (1, 200),  # Miss - should evict 2, insert 1
+        (5, 100),  # Miss - should evict 3, insert 5
+        (4, 200),  # Hit - access 4
+        (6, 500),  # Miss - should evict multiple objects to fit
+        (4, 200),  # Miss - 4 was evicted
+    ]
+
+    print("\n--- Comparing LRU implementations with variable sizes ---")
+    all_match = True
+
+    for i, (obj_id, obj_size) in enumerate(test_requests):
+        # Test native LRU
+        req_native = lcs.Request()
+        req_native.obj_id = obj_id
+        req_native.obj_size = obj_size
+        native_result = native_lru.get(req_native)
+
+        # Test hook LRU
+        req_hook = lcs.Request()
+        req_hook.obj_id = obj_id
+        req_hook.obj_size = obj_size
+        hook_result = hook_lru.get(req_hook)
+
+        # Compare results
+        result_match = native_result == hook_result
+        stats_match = (native_lru.cache.n_obj == hook_lru.n_obj and
+                      native_lru.cache.occupied_byte == hook_lru.occupied_byte)
+
+        print(f"Request {i+1}: obj_id={obj_id}, size={obj_size}")
+        print(f"  Native LRU: {'HIT' if native_result else 'MISS'}")
+        print(f"  Hook LRU:   {'HIT' if hook_result else 'MISS'}")
+        print(f"  Result match: {'PASS' if result_match else 'FAIL'}")
+        print(f"  Native stats: {native_lru.cache.n_obj} objects, {native_lru.cache.occupied_byte} bytes")
+        print(f"  Hook stats:   {hook_lru.n_obj} objects, {hook_lru.occupied_byte} bytes")
+        print(f"  Stats match: {'PASS' if stats_match else 'FAIL'}")
+        print()
+
+        if not result_match or not stats_match:
+            all_match = False
+            print(f"ERROR: Mismatch at request {i+1}")
+
+    assert all_match, "Variable size LRU comparison failed - implementations differ!"
+    print("PASS: Variable size LRU comparison test PASSED!")
+
+
+if __name__ == "__main__":
+    test_python_hook_cache()
+    test_error_handling()
+    test_lru_comparison()
+    test_lru_comparison_variable_sizes()
diff --git a/libCacheSim-python/tests/test_unified_interface.py b/libCacheSim-python/tests/test_unified_interface.py
new file mode 100644
index 000000000..48d3751de
--- /dev/null
+++ b/libCacheSim-python/tests/test_unified_interface.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python3
+"""
+Test the unified interface for all cache policies.
+"""
+
+import sys
+import os
+import pytest
+
+# Add the parent directory to the Python path for development testing
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+try:
+    import libcachesim as lcs
+except ImportError as e:
+    print(f"Error importing libcachesim: {e}")
+    print("Make sure the Python binding is built and installed")
+    sys.exit(1)
+
+from collections import OrderedDict
+
+
+def create_trace_reader():
+    """Helper function to create a trace reader.
+    
+    Returns:
+        Reader or None: A trace reader instance, or None if trace file not found.
+    """
+    data_file = os.path.join(
+        os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
+        "data",
+        "cloudPhysicsIO.oracleGeneral.bin"
+    )
+    if not os.path.exists(data_file):
+        return None
+    return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE)
+
+
+def create_test_lru_hooks():
+    """Create LRU hooks for testing.
+    
+    Returns:
+        tuple: A tuple of (init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+    """
+
+    def init_hook(cache_size):
+        """Initialize LRU data structure."""
+        return OrderedDict()
+
+    def hit_hook(lru_dict, obj_id, obj_size):
+        """Handle cache hit by moving to end (most recently used)."""
+        if obj_id in lru_dict:
+            lru_dict.move_to_end(obj_id)
+
+    def miss_hook(lru_dict, obj_id, obj_size):
+        """Handle cache miss by adding new object."""
+        lru_dict[obj_id] = obj_size
+
+    def eviction_hook(lru_dict, obj_id, obj_size):
+        """Return the least recently used object ID for eviction."""
+        if lru_dict:
+            return next(iter(lru_dict))
+        return obj_id
+
+    def remove_hook(lru_dict, obj_id):
+        """Remove object from LRU structure."""
+        lru_dict.pop(obj_id, None)
+
+    return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook
+
+
+def test_unified_process_trace_interface():
+    """Test that all cache policies have the same process_trace interface."""
+    print("Testing unified process_trace interface...")
+
+    cache_size = 1024 * 1024  # 1MB
+    max_requests = 100
+
+    # Create trace reader
+    reader = create_trace_reader()
+    if not reader:
+        pytest.skip("Skipping test: Trace file not available")
+
+    # Test different cache policies
+    caches = {
+        "LRU": lcs.LRU(cache_size),
+        "FIFO": lcs.FIFO(cache_size),
+        "ARC": lcs.ARC(cache_size),
+    }
+
+    # Add Python hook cache
+    python_cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU")
+    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks()
+    python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+    caches["Python Hook LRU"] = python_cache
+
+    print("\n--- Testing unified process_trace interface ---")
+
+    results = {}
+    for name, cache in caches.items():
+        # Create fresh reader for each test
+        test_reader = create_trace_reader()
+        if not test_reader:
+            pytest.skip(f"Cannot create reader for {name} test")
+
+        # Test process_trace method exists
+        assert hasattr(cache, 'process_trace'), f"{name} missing process_trace method"
+
+        # Test process_trace functionality
+        miss_ratio = cache.process_trace(test_reader, max_req=max_requests)
+        results[name] = miss_ratio
+
+        print(f"{name:15s}: miss_ratio = {miss_ratio:.4f}")
+        print(f"                cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes")
+
+        # Verify miss_ratio is valid
+        assert 0.0 <= miss_ratio <= 1.0, f"{name} returned invalid miss_ratio: {miss_ratio}"
+
+    print(f"\nPASS: All {len(caches)} cache policies support unified process_trace interface!")
+    
+    # Verify we got results for all caches
+    assert len(results) == len(caches), "Not all caches were tested"
+
+
+def test_unified_properties_interface():
+    """Test that all cache policies have the same properties interface."""
+    print("\nTesting unified properties interface...")
+
+    cache_size = 1024 * 1024
+
+    # Create different cache types
+    caches = {
+        "LRU": lcs.LRU(cache_size),
+        "FIFO": lcs.FIFO(cache_size),
+        "Python Hook": lcs.PythonHookCachePolicy(cache_size, "TestCache"),
+    }
+
+    print("\n--- Testing unified properties interface ---")
+
+    required_properties = ['cache_size', 'n_req', 'n_obj', 'occupied_byte']
+
+    for name, cache in caches.items():
+        print(f"{name:15s}:")
+
+        # Test all required properties exist
+        for prop in required_properties:
+            assert hasattr(cache, prop), f"{name} missing {prop} property"
+            value = getattr(cache, prop)
+            print(f"                {prop} = {value}")
+
+        # Test cache_size is correct
+        assert cache.cache_size == cache_size, f"{name} cache_size mismatch"
+
+    print("PASS: All cache policies support unified properties interface!")
+
+
+def test_get_interface_consistency():
+    """Test that get() method works consistently across all cache policies."""
+    print("\nTesting get() interface consistency...")
+
+    cache_size = 1024 * 1024
+
+    # Create caches
+    caches = {
+        "LRU": lcs.LRU(cache_size),
+        "FIFO": lcs.FIFO(cache_size),
+    }
+
+    # Add Python hook cache
+    python_cache = lcs.PythonHookCachePolicy(cache_size, "ConsistencyTest")
+    init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks()
+    python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook)
+    caches["Python Hook"] = python_cache
+
+    # Create a test request using the proper request class
+    test_req = lcs.Request()
+    test_req.obj_id = 1
+    test_req.obj_size = 1024
+
+    print("Testing get() method with test request...")
+
+    for name, cache in caches.items():
+        # Reset cache state for consistent testing
+        initial_n_req = cache.n_req
+        initial_n_obj = cache.n_obj
+        initial_occupied = cache.occupied_byte
+        
+        # Test get method exists
+        assert hasattr(cache, 'get'), f"{name} missing get method"
+
+        # Test first access (should be miss for new object)
+        result = cache.get(test_req)
+        print(f"{name:15s}: first access = {'HIT' if result else 'MISS'}")
+
+        # Test properties updated correctly
+        assert cache.n_req > initial_n_req, f"{name} n_req not updated"
+        if not result:  # If it was a miss, object should be added
+            assert cache.n_obj > initial_n_obj, f"{name} n_obj not updated after miss"
+            assert cache.occupied_byte > initial_occupied, f"{name} occupied_byte not updated after miss"
+
+        # Test second access to same object (should be hit)
+        second_result = cache.get(test_req)
+        print(f"{name:15s}: second access = {'HIT' if second_result else 'MISS'}")
+        
+        # Second access should be a hit (unless cache is too small)
+        if cache.cache_size >= test_req.obj_size:
+            assert second_result, f"{name} second access should be a hit"
+
+    print("PASS: Get interface consistency test passed!")
+
+
+if __name__ == "__main__":
+    tests = [
+        test_unified_process_trace_interface,
+        test_unified_properties_interface,
+        test_get_interface_consistency,
+    ]
+
+    all_passed = True
+    for test in tests:
+        try:
+            test()  # Just call the test, don't check return value
+            print(f"PASS: {test.__name__} passed")
+        except Exception as e:
+            print(f"FAIL: {test.__name__} failed with exception: {e}")
+            all_passed = False
+
+    if all_passed:
+        print("\nAll unified interface tests PASSED!")
+    else:
+        print("\nSome unified interface tests FAILED!")
diff --git a/libCacheSim-python/tests/utils.py b/libCacheSim-python/tests/utils.py
new file mode 100644
index 000000000..6eabbdd2a
--- /dev/null
+++ b/libCacheSim-python/tests/utils.py
@@ -0,0 +1,16 @@
+import os
+
+
+def get_reference_data(eviction_algo, cache_size_ratio):
+    data_file = os.path.join(  # noqa: PTH118
+        (os.path.dirname(os.path.dirname(__file__))),  # noqa: PTH120
+        "tests",
+        "reference.csv"
+    )
+    with open(data_file, "r") as f:  # noqa: PTH123
+        lines = f.readlines()
+        key = "3LCache" if eviction_algo == "ThreeLCache" else eviction_algo
+        for line in lines:
+            if line.startswith(f"{key},{cache_size_ratio}"):
+                return float(line.split(",")[-1])
+    return None
\ No newline at end of file
diff --git a/libCacheSim/dataStructure/minimalIncrementCBF.c b/libCacheSim/dataStructure/minimalIncrementCBF.c
index 82967eedb..b8667eb88 100644
--- a/libCacheSim/dataStructure/minimalIncrementCBF.c
+++ b/libCacheSim/dataStructure/minimalIncrementCBF.c
@@ -53,7 +53,7 @@ int minimalIncrementCBF_init(struct minimalIncrementCBF *CBF, int entries,
     CBF->counter_num = CBF->hashes * 2;
   }
 
-  CBF->bf = (unsigned int *)calloc(sizeof(unsigned int), CBF->counter_num);
+  CBF->bf = (unsigned int *)calloc(CBF->counter_num, sizeof(unsigned int));
   // TODO: check whether unsigned int is enough for the size of each counter
 
   if (CBF->bf == NULL) {
diff --git a/libCacheSim/include/libCacheSim/mem.h b/libCacheSim/include/libCacheSim/mem.h
index 2f587d8b6..8068f9179 100644
--- a/libCacheSim/include/libCacheSim/mem.h
+++ b/libCacheSim/include/libCacheSim/mem.h
@@ -22,7 +22,7 @@
 #elif HEAP_ALLOCATOR == HEAP_ALLOCATOR_MALLOC
 #include <stdlib.h>
 #define my_malloc(type) (type *)malloc(sizeof(type))
-#define my_malloc_n(type, n) (type *)calloc(sizeof(type), n)
+#define my_malloc_n(type, n) (type *)calloc(n, sizeof(type))
 #define my_free(size, addr) free(addr)
 
 #elif HEAP_ALLOCATOR == HEAP_ALLOCATOR_ALIGNED_MALLOC
diff --git a/scripts/build_pypi.sh b/scripts/build_pypi.sh
new file mode 100644
index 000000000..5f4cda97c
--- /dev/null
+++ b/scripts/build_pypi.sh
@@ -0,0 +1 @@
+python3 -m cibuildwheel --platform linux libCacheSim-python
diff --git a/scripts/install_python.sh b/scripts/install_python.sh
new file mode 100644
index 000000000..d0ff2eba6
--- /dev/null
+++ b/scripts/install_python.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+set -euo pipefail
+
+# Build the main libCacheSim C++ library first
+echo "Building main libCacheSim library..."
+rm -rf ./build
+cmake -G Ninja -B build # -DENABLE_3L_CACHE=ON
+ninja -C build
+
+# Now build and install the Python binding
+echo "Building Python binding..."
+echo "Sync python version..."
+python scripts/sync_python_version.py
+pushd libCacheSim-python
+pip install -e . -vvv
+popd
+
+# Test that the import works
+echo "Testing import..."
+python -c "import libcachesim"
+
+# Run tests
+echo "Running tests..."
+pushd libCacheSim-python
+pytest .
+popd
diff --git a/scripts/sync_node_version.py b/scripts/sync_node_version.py
index d45a391a2..7497b1c00 100755
--- a/scripts/sync_node_version.py
+++ b/scripts/sync_node_version.py
@@ -22,18 +22,18 @@ def read_main_version():
     """Read version from version.txt."""
     project_root = get_project_root()
     version_file = project_root / "version.txt"
-    
+
     if not version_file.exists():
         print(f"Error: {version_file} not found", file=sys.stderr)
         sys.exit(1)
-    
+
     with open(version_file, 'r') as f:
         version = f.read().strip()
-    
+
     if not version:
         print("Error: version.txt is empty", file=sys.stderr)
         sys.exit(1)
-    
+
     return version
 
 
@@ -41,29 +41,29 @@ def update_package_json(version):
     """Update package.json with the new version."""
     project_root = get_project_root()
     package_json_path = project_root / "libCacheSim-node" / "package.json"
-    
+
     if not package_json_path.exists():
         print(f"Error: {package_json_path} not found", file=sys.stderr)
         sys.exit(1)
-    
+
     # Read current package.json
     with open(package_json_path, 'r') as f:
         package_data = json.load(f)
-    
+
     current_version = package_data.get('version', 'unknown')
-    
+
     if current_version == version:
         print(f"Version already up to date: {version}")
         return False
-    
+
     # Update version
     package_data['version'] = version
-    
+
     # Write back to file with proper formatting
     with open(package_json_path, 'w') as f:
         json.dump(package_data, f, indent=2)
         f.write('\n')  # Add trailing newline
-    
+
     print(f"Updated Node.js binding version: {current_version} → {version}")
     return True
 
@@ -74,19 +74,19 @@ def main():
         # Read main project version
         main_version = read_main_version()
         print(f"Main project version: {main_version}")
-        
+
         # Update Node.js binding version
         updated = update_package_json(main_version)
-        
+
         if updated:
             print("✓ Node.js binding version synchronized successfully")
         else:
             print("✓ No changes needed")
-            
+
     except Exception as e:
         print(f"Error: {e}", file=sys.stderr)
         sys.exit(1)
 
 
 if __name__ == "__main__":
-    main() 
\ No newline at end of file
+    main()
diff --git a/scripts/sync_python_version.py b/scripts/sync_python_version.py
new file mode 100644
index 000000000..65e51a92f
--- /dev/null
+++ b/scripts/sync_python_version.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+"""
+Script to synchronize version between libCacheSim main project and Python bindings.
+
+This script reads the version from version.txt and updates the pyproject.toml
+in libCacheSim-python to match.
+"""
+
+import json
+import os
+import sys
+import re
+from pathlib import Path
+
+
+def get_project_root():
+    """Get the project root directory."""
+    script_dir = Path(__file__).parent
+    return script_dir.parent
+
+
+def read_main_version():
+    """Read version from version.txt."""
+    project_root = get_project_root()
+    version_file = project_root / "version.txt"
+
+    if not version_file.exists():
+        print(f"Error: {version_file} not found", file=sys.stderr)
+        sys.exit(1)
+
+    with open(version_file, 'r') as f:
+        version = f.read().strip()
+
+    if not version:
+        print("Error: version.txt is empty", file=sys.stderr)
+        sys.exit(1)
+
+    return version
+
+def update_pyproject_toml(version):
+    """Update pyproject.toml with the new version."""
+    project_root = get_project_root()
+    pyproject_toml_path = project_root / "libCacheSim-python" / "pyproject.toml"
+
+    if not pyproject_toml_path.exists():
+        print(f"Error: {pyproject_toml_path} not found", file=sys.stderr)
+        return False
+
+    # Read current pyproject.toml
+    with open(pyproject_toml_path, 'r') as f:
+        pyproject_data = f.read()
+
+    # Update the version line in pyproject.toml, make it can match any version in version.txt, like "0.3.1" or "dev"
+    match = re.search(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", pyproject_data)
+    if not match:
+        print("Error: Could not find a valid version line in pyproject.toml", file=sys.stderr)
+        return False
+    current_version = match.group(1)
+    if current_version == version:
+        print(f"Python binding version already up to date: {version}")
+        return False
+    # replace the version line with the new version
+    pyproject_data = re.sub(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", f"version = \"{version}\"", pyproject_data)
+
+    # Write back to file with proper formatting
+    with open(pyproject_toml_path, 'w') as f:
+        f.write(pyproject_data)
+
+    print(f"Updated Python version: {current_version} → {version}")
+    return True
+
+
+def main():
+    """Main function."""
+    try:
+        # Read main project version
+        main_version = read_main_version()
+        print(f"Main project version: {main_version}")
+
+        # Update Python binding version
+        updated = update_pyproject_toml(main_version)
+
+        if updated:
+            print("Python binding version synchronized successfully")
+        else:
+            print("No changes needed")
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()