diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 000000000..74ff0bb58 --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,38 @@ +name: Python + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Prepare + run: bash scripts/install_dependency.sh + + - name: Build main libCacheSim project + run: | + cmake -G Ninja -B build + ninja -C build + + - name: Install Python dependencies + run: | + pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + + - name: Build libCacheSim-python + run: | + cd libCacheSim-python + pip install -e . + + - name: Run tests + run: | + cd libCacheSim-python + pytest tests/ diff --git a/.gitignore b/.gitignore index 9913f147a..620e8536b 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ sftp-config.json # Clangd cache *.cache/ .lint-logs/ +# Python wheels +*.whl diff --git a/CMakeLists.txt b/CMakeLists.txt index 89513c28f..a2623b470 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,6 +247,8 @@ else() message(STATUS "Building without test") endif() +# Export variables for scikit-build -> build/export_vars.cmake +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export) # libCacheSim unified library compilation and installation # Create a single library that combines all modular libraries diff --git a/libCacheSim-python/.gitignore b/libCacheSim-python/.gitignore new file mode 100644 index 000000000..34712f29d --- /dev/null +++ b/libCacheSim-python/.gitignore @@ -0,0 +1,59 @@ +# Automatically generated by `hgimportsvn` +.svn +.hgsvn + +# Ignore local virtualenvs +lib/ +bin/ +include/ +.Python/ + +# These lines are suggested according to the svn:ignore property +# Feel free to enable them by uncommenting them +*.pyc +*.pyo +*.swp +*.class +*.orig +*~ +.hypothesis/ + +# autogenerated +src/_pytest/_version.py +# setuptools +.eggs/ + +doc/*/_build +doc/*/.doctrees +build/ +dist/ +*.egg-info +htmlcov/ +issue/ +env/ +.env/ +.venv/ +/pythonenv*/ +3rdparty/ +.tox +.cache +.pytest_cache +.mypy_cache +.coverage +.coverage.* +coverage.xml +.ropeproject +.idea +.hypothesis +.pydevproject +.project +.settings +.vscode +__pycache__/ +.python-version + +# generated by pip +pip-wheel-metadata/ + +# pytest debug logs generated via --debug +pytestdebug.log \ No newline at end of file diff --git a/libCacheSim-python/CMakeLists.txt b/libCacheSim-python/CMakeLists.txt new file mode 100644 index 000000000..f3e1c5d6b --- /dev/null +++ b/libCacheSim-python/CMakeLists.txt @@ -0,0 +1,103 @@ +cmake_minimum_required(VERSION 3.15...3.27) + +# Include exported variables from cache +if(DEFINED LIBCB_BUILD_DIR) + set(PARENT_BUILD_DIR "${LIBCB_BUILD_DIR}") + message(STATUS "Using provided LIBCB_BUILD_DIR: ${LIBCB_BUILD_DIR}") +else() + set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") +endif() +set(EXPORT_FILE "${PARENT_BUILD_DIR}/export_vars.cmake") + +if(EXISTS "${EXPORT_FILE}") + include("${EXPORT_FILE}") + message(STATUS "Loaded variables from export_vars.cmake") +else() + message(FATAL_ERROR "export_vars.cmake not found at ${EXPORT_FILE}. Please build the main project first (e.g. cd .. && cmake -G Ninja -B build)") +endif() + +# Force enable -fPIC +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") + +project(libCacheSim-python VERSION "${LIBCACHESIM_VERSION}") + +if(LOG_LEVEL_LOWER STREQUAL "default") + if(CMAKE_BUILD_TYPE_LOWER MATCHES "debug") + add_compile_definitions(LOGLEVEL=6) + else() + add_compile_definitions(LOGLEVEL=7) + endif() +elseif(LOG_LEVEL_LOWER STREQUAL "verbose") + add_compile_definitions(LOGLEVEL=5) +elseif(LOG_LEVEL_LOWER STREQUAL "debug") + add_compile_definitions(LOGLEVEL=6) +elseif(LOG_LEVEL_LOWER STREQUAL "info") + add_compile_definitions(LOGLEVEL=7) +elseif(LOG_LEVEL_LOWER STREQUAL "warn") + add_compile_definitions(LOGLEVEL=8) +elseif(LOG_LEVEL_LOWER STREQUAL "error") + add_compile_definitions(LOGLEVEL=9) +else() + add_compile_definitions(LOGLEVEL=7) +endif() + +# Find python and pybind11 +find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) +find_package(pybind11 CONFIG REQUIRED) + +# Include directories for dependencies +include_directories(${GLib_INCLUDE_DIRS}) +include_directories(${GLib_CONFIG_INCLUDE_DIR}) +include_directories(${XGBOOST_INCLUDE_DIR}) +include_directories(${LIGHTGBM_PATH}) +include_directories(${ZSTD_INCLUDE_DIR}) +include_directories(${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin) + +# Find the main libCacheSim library +set(MAIN_PROJECT_BUILD_DIR "${PARENT_BUILD_DIR}") +set(MAIN_PROJECT_LIB_PATH "${MAIN_PROJECT_BUILD_DIR}/liblibCacheSim.a") + +if(EXISTS "${MAIN_PROJECT_LIB_PATH}") + message(STATUS "Found pre-built libCacheSim library at ${MAIN_PROJECT_LIB_PATH}") + + # Import the main library as an imported target + add_library(libCacheSim_main STATIC IMPORTED) + set_target_properties(libCacheSim_main PROPERTIES + IMPORTED_LOCATION "${MAIN_PROJECT_LIB_PATH}" + INTERFACE_INCLUDE_DIRECTORIES "${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/utils/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim" + ) + + # Link dependencies that the main library needs + target_link_libraries(libCacheSim_main INTERFACE ${dependency_libs}) + set(LIBCACHESIM_TARGET libCacheSim_main) + +else() + message(FATAL_ERROR "Pre-built libCacheSim library not found. Please build the main project first: cd .. && cmake -G Ninja -B build && ninja -C build") +endif() + +python_add_library(_libcachesim MODULE + src/pylibcachesim.cpp + ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/cli_reader_utils.c + WITH_SOABI +) + +set_target_properties(_libcachesim PROPERTIES + POSITION_INDEPENDENT_CODE ON + INSTALL_RPATH_USE_LINK_PATH TRUE + BUILD_WITH_INSTALL_RPATH TRUE + INSTALL_RPATH "$ORIGIN" +) + +target_compile_definitions(_libcachesim PRIVATE VERSION_INFO=${PROJECT_VERSION}) + +target_link_libraries(_libcachesim PRIVATE + ${LIBCACHESIM_TARGET} + pybind11::headers + pybind11::module + -Wl,--no-as-needed -ldl +) + +# install to wheel directory +install(TARGETS _libcachesim LIBRARY DESTINATION libcachesim) diff --git a/libCacheSim-python/MAINFEST.in b/libCacheSim-python/MAINFEST.in new file mode 100644 index 000000000..e69de29bb diff --git a/libCacheSim-python/README.md b/libCacheSim-python/README.md new file mode 100644 index 000000000..db46af6a1 --- /dev/null +++ b/libCacheSim-python/README.md @@ -0,0 +1,522 @@ +# libCacheSim Python Binding + +Python bindings for libCacheSim, a high-performance cache simulator and analysis library. + +## Installation + +### Quick Install (Recommended) +```bash +# From the libCacheSim root directory +bash scripts/install_python.sh +``` + +### Manual Install +```bash +# Build the main libCacheSim library first +cmake -G Ninja -B build +ninja -C build + +# Install Python binding +cd libCacheSim-python +pip install -e . -v +``` + +### Testing +```bash +# Run all tests +python -m pytest . + +# Test import +python -c "import libcachesim; print('Success!')" +``` + +## Quick Start + +### Basic Usage + +```python +import libcachesim as lcs + +# Create a cache +cache = lcs.LRU(cache_size=1024*1024) # 1MB cache + +# Process requests +req = lcs.Request() +req.obj_id = 1 +req.obj_size = 100 + +hit = cache.get(req) # False (first access) +hit = cache.get(req) # True (second access) + +# Check statistics +print(f"Hit rate: {(cache.n_req - cache.n_miss)/cache.n_req:.2%}") +``` + +### Trace Processing + +```python +import libcachesim as lcs + +# Open trace and process efficiently +reader = lcs.open_trace("trace.bin", lcs.TraceType.ORACLE_GENERAL_TRACE.value) +cache = lcs.S3FIFO(cache_size=1024*1024) + +# Process entire trace efficiently (C++ backend) +miss_ratio = cache.process_trace(reader) +print(f"Miss ratio: {miss_ratio:.4f}") + +# Process with limits and time ranges +miss_ratio = cache.process_trace( + reader, + max_req=10000, # Process max 10K requests + max_sec=3600, # Process max 1 hour + start_time=1000, # Start from timestamp 1000 + end_time=5000 # End at timestamp 5000 +) +``` + +## Custom Cache Policies + +Implement custom cache replacement algorithms using pure Python functions - no C/C++ compilation required. + +### Python Hook Cache Overview + +The `PythonHookCachePolicy` allows you to define custom caching behavior through Python callback functions. This is perfect for: +- Prototyping new cache algorithms +- Educational purposes and learning +- Research and experimentation +- Custom business logic implementation + +### Hook Functions + +You need to implement these callback functions: + +- **`init_hook(cache_size: int) -> Any`**: Initialize your data structure +- **`hit_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache hits +- **`miss_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache misses +- **`eviction_hook(data: Any, obj_id: int, obj_size: int) -> int`**: Return object ID to evict +- **`remove_hook(data: Any, obj_id: int) -> None`**: Clean up when object removed +- **`free_hook(data: Any) -> None`**: [Optional] Final cleanup + +### Example: Custom LRU Implementation + +```python +import libcachesim as lcs +from collections import OrderedDict + +# Create a Python hook-based cache +cache = lcs.PythonHookCachePolicy(cache_size=1024*1024, cache_name="MyLRU") + +# Define LRU policy hooks +def init_hook(cache_size): + return OrderedDict() # Track access order + +def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) # Move to most recent + +def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True # Add to end + +def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) # Return least recent + +def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + +# Set the hooks +cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + +# Use it like any other cache +req = lcs.Request() +req.obj_id = 1 +req.obj_size = 100 +hit = cache.get(req) +``` + +### Example: Custom FIFO Implementation + +```python +import libcachesim as lcs +from collections import deque + +# Create a custom FIFO cache +cache = lcs.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO") + +def init_hook(cache_size): + return deque() # Use deque for FIFO order + +def hit_hook(fifo_queue, obj_id, obj_size): + pass # FIFO doesn't reorder on hit + +def miss_hook(fifo_queue, obj_id, obj_size): + fifo_queue.append(obj_id) # Add to end of queue + +def eviction_hook(fifo_queue, obj_id, obj_size): + return fifo_queue[0] # Return first item (oldest) + +def remove_hook(fifo_queue, obj_id): + if fifo_queue and fifo_queue[0] == obj_id: + fifo_queue.popleft() + +# Set the hooks and test +cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + +req = lcs.Request() +req.obj_id = 1 +req.obj_size = 100 +hit = cache.get(req) +print(f"Cache hit: {hit}") # Should be False (miss) +``` + +## Available Algorithms + +### Built-in Cache Algorithms + +#### Basic Algorithms +- **FIFO**: First-In-First-Out +- **LRU**: Least Recently Used +- **LFU**: Least Frequently Used +- **Clock**: Clock/Second-chance algorithm + +#### Advanced Algorithms +- **S3FIFO**: Simple, Fast, Fair FIFO (recommended for most workloads) +- **Sieve**: High-performance eviction algorithm +- **ARC**: Adaptive Replacement Cache +- **TwoQ**: Two-Queue algorithm +- **TinyLFU**: TinyLFU with window +- **SLRU**: Segmented LRU + +#### Research/ML Algorithms +- **LRB**: Learning-based cache (if enabled) +- **GLCache**: Machine learning-based cache +- **ThreeLCache**: Three-level cache hierarchy (if enabled) + +```python +import libcachesim as lcs + +# All algorithms use the same unified interface +cache_size = 1024 * 1024 # 1MB + +lru_cache = lcs.LRU(cache_size) +s3fifo_cache = lcs.S3FIFO(cache_size) # Recommended +sieve_cache = lcs.Sieve(cache_size) +arc_cache = lcs.ARC(cache_size) + +# All caches work identically +req = lcs.Request() +req.obj_id = 1 +req.obj_size = 100 +hit = lru_cache.get(req) +``` + +## Examples and Testing + +### Algorithm Comparison +```python +import libcachesim as lcs + +def compare_algorithms(trace_path): + reader = lcs.open_trace(trace_path, lcs.TraceType.VSCSI_TRACE.value) + algorithms = ['LRU', 'S3FIFO', 'Sieve', 'ARC'] + + print("Algorithm\tMiss Ratio") + print("-" * 25) + for algo_name in algorithms: + cache = getattr(lcs, algo_name)(cache_size=1024*1024) + miss_ratio = cache.process_trace(reader) + print(f"{algo_name}\t\t{miss_ratio:.4f}") + +compare_algorithms("workload.vscsi") +``` + +### Performance Benchmarking +```python +import time + +def benchmark_cache(cache, num_requests=100000): + """Benchmark cache performance""" + start_time = time.time() + + for i in range(num_requests): + req = lcs.Request() + req.obj_id = i % 1000 # Working set of 1000 objects + req.obj_size = 100 + cache.get(req) + + end_time = time.time() + throughput = num_requests / (end_time - start_time) + + print(f"Processed {num_requests} requests in {end_time - start_time:.2f}s") + print(f"Throughput: {throughput:.0f} requests/sec") + print(f"Miss ratio: {cache.n_miss / cache.n_req:.4f}") + +# Compare performance +lru_cache = lcs.LRU(cache_size=1024*1024) +s3fifo_cache = lcs.S3FIFO(cache_size=1024*1024) + +print("LRU Performance:") +benchmark_cache(lru_cache) + +print("\nS3-FIFO Performance:") +benchmark_cache(s3fifo_cache) +``` + +### Validate Custom Implementation +```python +def test_custom_vs_builtin(): + """Test custom cache against built-in implementation""" + cache_size = 1024 + + # Your custom LRU implementation + custom_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU") + # ... set up your LRU hooks here ... + + # Built-in LRU for comparison + builtin_cache = lcs.LRU(cache_size) + + # Test with same request sequence + test_requests = [(1, 100), (2, 100), (3, 100), (1, 100)] + + for obj_id, obj_size in test_requests: + req1 = lcs.Request() + req1.obj_id = obj_id + req1.obj_size = obj_size + + req2 = lcs.Request() + req2.obj_id = obj_id + req2.obj_size = obj_size + + custom_result = custom_cache.get(req1) + builtin_result = builtin_cache.get(req2) + + assert custom_result == builtin_result, f"Mismatch at obj_id {obj_id}" + print(f"obj_id {obj_id}: {'HIT' if custom_result else 'MISS'} ✓") +``` + +## Advanced Usage + +### Multi-Format Trace Processing + +```python +import libcachesim as lcs + +# Supported trace types +trace_types = { + "oracle": lcs.TraceType.ORACLE_GENERAL_TRACE.value, + "csv": lcs.TraceType.CSV_TRACE.value, + "vscsi": lcs.TraceType.VSCSI_TRACE.value, + "txt": lcs.TraceType.TXT_TRACE.value +} + +# Open different trace formats +oracle_reader = lcs.open_trace("trace.bin", trace_types["oracle"]) +csv_reader = lcs.open_trace("trace.csv", trace_types["csv"], + "time-col=1,obj-id-col=2,obj-size-col=3,delimiter=,") + +# Process traces with different caches +caches = [ + lcs.LRU(cache_size=1024*1024), + lcs.S3FIFO(cache_size=1024*1024), + lcs.Sieve(cache_size=1024*1024) +] + +for i, cache in enumerate(caches): + miss_ratio = cache.process_trace(oracle_reader) + print(f"Cache {i} miss ratio: {miss_ratio:.4f}") +``` + +### Cache Hierarchy Simulation + +```python +def simulate_cache_hierarchy(): + """Simulate a two-level cache hierarchy""" + + # L1 cache (small, fast) + l1_cache = lcs.LRU(cache_size=64*1024) # 64KB + + # L2 cache (larger, slower) + l2_cache = lcs.LRU(cache_size=1024*1024) # 1MB + + # Simulate requests + total_requests = 0 + l1_hits = 0 + l2_hits = 0 + + for obj_id in range(1000): + req = lcs.Request() + req.obj_id = obj_id % 100 # Working set of 100 objects + req.obj_size = 1024 + + total_requests += 1 + + # Check L1 first + if l1_cache.get(req): + l1_hits += 1 + # Check L2 on L1 miss + elif l2_cache.get(req): + l2_hits += 1 + # Promote to L1 + l1_cache.get(req) + + print(f"L1 hit rate: {l1_hits/total_requests:.2%}") + print(f"L2 hit rate: {l2_hits/total_requests:.2%}") + print(f"Overall hit rate: {(l1_hits+l2_hits)/total_requests:.2%}") + +simulate_cache_hierarchy() +``` + +### Cache Statistics Monitoring + +```python +def analyze_cache_behavior(): + """Detailed cache statistics analysis""" + cache = lcs.S3FIFO(cache_size=1024*1024) + + # Process some requests + for i in range(1000): + req = lcs.Request() + req.obj_id = i % 100 + req.obj_size = 1024 + cache.get(req) + + # Access detailed statistics + print("=== Cache Statistics ===") + print(f"Cache size: {cache.cache_size:,} bytes") + print(f"Occupied space: {cache.occupied_byte:,} bytes") + print(f"Utilization: {cache.occupied_byte/cache.cache_size:.2%}") + print(f"Objects stored: {cache.n_obj:,}") + print(f"Total requests: {cache.n_req:,}") + print(f"Cache hits: {cache.n_req - cache.n_miss:,}") + print(f"Cache misses: {cache.n_miss:,}") + print(f"Hit rate: {(cache.n_req - cache.n_miss)/cache.n_req:.2%}") + print(f"Miss rate: {cache.n_miss/cache.n_req:.2%}") + +analyze_cache_behavior() +``` + +## API Reference + +### Unified Cache Interface + +All cache policies (built-in and Python hook-based) share the same interface: + +```python +import libcachesim as lcs + +# All cache policies work the same way +cache = lcs.LRU(cache_size=1024*1024) +# or +cache = lcs.PythonHookCachePolicy(cache_size=1024*1024, cache_name="Custom") + +# Unified methods for all caches: +req = lcs.Request() +req.obj_id = 123 # Object identifier (required) +req.obj_size = 1024 # Object size in bytes (required) +req.timestamp = 1000 # Request timestamp (optional) +req.op = 1 # Operation type (optional, default=1) + +hit = cache.get(req) # Process single request - returns True if hit, False if miss + +# Batch processing (faster for large traces) +reader = lcs.open_trace("trace.bin", lcs.TraceType.ORACLE_GENERAL_TRACE.value) +miss_ratio = cache.process_trace(reader, max_req=10000) + +# Unified properties for all caches: +print(f"Cache size: {cache.cache_size}") +print(f"Objects: {cache.n_obj}") +print(f"Occupied bytes: {cache.occupied_byte}") +print(f"Total requests: {cache.n_req}") +print(f"Cache misses: {cache.n_miss}") +print(f"Hit rate: {(cache.n_req - cache.n_miss) / cache.n_req:.2%}") +``` + +### Trace Reader + +```python +# Open trace with specific format +reader = lcs.open_trace( + trace_path="trace.csv", + trace_type=lcs.TraceType.CSV_TRACE.value, + trace_type_params="time-col=1,obj-id-col=2,obj-size-col=3,delimiter=," +) + +# Process trace with options +miss_ratio = cache.process_trace( + reader, + max_req=10000, # Process max requests + max_sec=3600, # Process max seconds of trace + start_time=1000, # Start from timestamp + end_time=5000 # End at timestamp +) +``` + +### Supported Trace Formats +```python +# Oracle format (binary, fastest) +reader = lcs.open_trace("trace.bin", lcs.TraceType.ORACLE_GENERAL_TRACE.value) + +# CSV format with custom parameters +reader = lcs.open_trace("trace.csv", lcs.TraceType.CSV_TRACE.value, + "time-col=1,obj-id-col=2,obj-size-col=3,delimiter=,") + +# VSCSI format +reader = lcs.open_trace("trace.vscsi", lcs.TraceType.VSCSI_TRACE.value) + +# Plain text format +reader = lcs.open_trace("trace.txt", lcs.TraceType.TXT_TRACE.value) +``` + +### Python Hook Cache Reference + +When implementing `PythonHookCachePolicy`, provide these hook functions: + +```python +def init_hook(cache_size: int) -> Any: + """Initialize and return plugin data structure""" + return {} # Can be any Python object + +def hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None: + """Handle cache hits - update your data structure""" + pass + +def miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None: + """Handle cache misses - add object to your data structure""" + pass + +def eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int: + """Return object ID to evict when cache is full""" + return victim_obj_id + +def remove_hook(plugin_data: Any, obj_id: int) -> None: + """Clean up when object is removed from cache""" + pass + +def free_hook(plugin_data: Any) -> None: + """[Optional] Final cleanup when cache is destroyed""" + pass + +# Set hooks +cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook) +``` + +## Troubleshooting + +### Common Issues + +**Import Error**: Make sure libCacheSim C++ library is built first: +```bash +cmake -G Ninja -B build && ninja -C build +``` + +**Performance Issues**: Use `process_trace()` for large workloads instead of individual `get()` calls for better performance. + +**Memory Usage**: Monitor cache statistics (`cache.occupied_byte`) and ensure proper cache size limits for your system. + +**Custom Cache Issues**: Validate your custom implementation against built-in algorithms using the test functions above. + +### Getting Help + +- Check the [main documentation](/doc/) for detailed guides +- Run tests: `python -m pytest libCacheSim-python/` +- Open issues on [GitHub](https://github.com/1a1a11a/libCacheSim/issues) +- Review [examples](/example) in the main repository diff --git a/libCacheSim-python/examples/demo_unified_interface.py b/libCacheSim-python/examples/demo_unified_interface.py new file mode 100644 index 000000000..c51c3e344 --- /dev/null +++ b/libCacheSim-python/examples/demo_unified_interface.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Demo script showing the unified interface for all cache policies. +This demonstrates how to use both native and Python hook-based caches +with the same API for seamless algorithm comparison and switching. +""" + +import sys +import os + +# Add parent directory for development testing +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +try: + import libcachesim as lcs +except ImportError as e: + print(f"Error importing libcachesim: {e}") + print("Make sure the Python binding is built and installed") + sys.exit(1) + +from collections import OrderedDict + + +def create_trace_reader(): + """Helper function to create a trace reader.""" + data_file = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), + "data", + "cloudPhysicsIO.oracleGeneral.bin" + ) + if not os.path.exists(data_file): + print(f"Warning: Trace file not found at {data_file}") + return None + return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE.value) + + +def create_demo_lru_hooks(): + """Create demo LRU hooks for Python-based cache policy.""" + + def init_hook(cache_size): + print(f" Initializing custom LRU with {cache_size} bytes") + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + if obj_id in lru_dict: + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = obj_size + + def eviction_hook(lru_dict, obj_id, obj_size): + if lru_dict: + return next(iter(lru_dict)) + return obj_id + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook + + +def demo_unified_interface(): + """Demonstrate the unified interface across different cache policies.""" + print("libCacheSim Python Binding - Unified Interface Demo") + print("=" * 60) + + cache_size = 1024 * 1024 # 1MB + + # Create different cache policies + caches = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + "ARC": lcs.ARC(cache_size), + } + + # Create Python hook-based LRU + python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU") + init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_demo_lru_hooks() + python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + caches["Custom Python LRU"] = python_cache + + print(f"Testing {len(caches)} different cache policies with unified interface:") + + # Demo 1: Single request interface + print("1. Single Request Interface:") + print(" All caches use: cache.get(request)") + + test_req = lcs.Request() + test_req.obj_id = 1 + test_req.obj_size = 1024 + + for name, cache in caches.items(): + result = cache.get(test_req) + print(f" {name:20s}: {'HIT' if result else 'MISS'}") + + # Demo 2: Unified properties interface + print("\n2. Unified Properties Interface:") + print(" All caches provide: cache_size, n_obj, occupied_byte, n_req") + + for name, cache in caches.items(): + print(f" {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, " + f"bytes={cache.occupied_byte}, reqs={cache.n_req}") + + # Demo 3: Efficient trace processing + print("\n3. Efficient Trace Processing Interface:") + print(" All caches use: cache.process_trace(reader, max_req=N)") + + max_requests = 1000 + + for name, cache in caches.items(): + # Create fresh reader for each cache + reader = create_trace_reader() + if not reader: + print(f" {name:20s}: trace file not available") + continue + + miss_ratio = cache.process_trace(reader, max_req=max_requests) + print(f" {name:20s}: miss_ratio={miss_ratio:.4f}") + + print("\nKey Benefits of Unified Interface:") + print(" • Same API for all cache policies (built-in + custom)") + print(" • Easy to switch between different algorithms") + print(" • Efficient trace processing in C++ (no Python overhead)") + print(" • Consistent properties and statistics") + print(" • Type-safe and well-documented") + + print("\nDemo completed! All cache policies work with the same interface.") + + +if __name__ == "__main__": + demo_unified_interface() diff --git a/libCacheSim-python/examples/python_hook_cache_example.py b/libCacheSim-python/examples/python_hook_cache_example.py new file mode 100644 index 000000000..daef56a73 --- /dev/null +++ b/libCacheSim-python/examples/python_hook_cache_example.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +""" +Example demonstrating how to create custom cache policies using Python hooks. + +This example shows how to implement LRU and FIFO cache policies using the +PythonHookCachePolicy class, which allows users to define cache behavior using +pure Python functions instead of C/C++ plugins. +""" + +import libcachesim as lcs +from collections import OrderedDict, deque + + +class LRUPolicy: + """LRU (Least Recently Used) cache policy implementation.""" + + def __init__(self, cache_size): + self.cache_size = cache_size + self.access_order = OrderedDict() # obj_id -> True (for ordering) + + def on_hit(self, obj_id, obj_size): + """Move accessed object to end (most recent).""" + if obj_id in self.access_order: + # Move to end (most recent) + self.access_order.move_to_end(obj_id) + + def on_miss(self, obj_id, obj_size): + """Add new object to end (most recent).""" + self.access_order[obj_id] = True + + def evict(self, obj_id, obj_size): + """Return the least recently used object ID.""" + if self.access_order: + # Return first item (least recent) + victim_id = next(iter(self.access_order)) + return victim_id + raise RuntimeError("No objects to evict") + + def on_remove(self, obj_id): + """Remove object from tracking.""" + self.access_order.pop(obj_id, None) + + +class FIFOPolicy: + """FIFO (First In First Out) cache policy implementation.""" + + def __init__(self, cache_size): + self.cache_size = cache_size + self.insertion_order = deque() # obj_id queue + + def on_hit(self, obj_id, obj_size): + """FIFO doesn't change order on hits.""" + pass + + def on_miss(self, obj_id, obj_size): + """Add new object to end of queue.""" + self.insertion_order.append(obj_id) + + def evict(self, obj_id, obj_size): + """Return the first inserted object ID.""" + if self.insertion_order: + victim_id = self.insertion_order.popleft() + return victim_id + raise RuntimeError("No objects to evict") + + def on_remove(self, obj_id): + """Remove object from tracking.""" + try: + self.insertion_order.remove(obj_id) + except ValueError: + pass # Object not in queue + + +def create_lru_cache(cache_size): + """Create an LRU cache using Python hooks.""" + cache = lcs.PythonHookCachePolicy(cache_size, "PythonLRU") + + def init_hook(cache_size): + return LRUPolicy(cache_size) + + def hit_hook(policy, obj_id, obj_size): + policy.on_hit(obj_id, obj_size) + + def miss_hook(policy, obj_id, obj_size): + policy.on_miss(obj_id, obj_size) + + def eviction_hook(policy, obj_id, obj_size): + return policy.evict(obj_id, obj_size) + + def remove_hook(policy, obj_id): + policy.on_remove(obj_id) + + def free_hook(policy): + # Python garbage collection handles cleanup + pass + + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook) + return cache + + +def create_fifo_cache(cache_size): + """Create a FIFO cache using Python hooks.""" + cache = lcs.PythonHookCachePolicy(cache_size, "PythonFIFO") + + def init_hook(cache_size): + return FIFOPolicy(cache_size) + + def hit_hook(policy, obj_id, obj_size): + policy.on_hit(obj_id, obj_size) + + def miss_hook(policy, obj_id, obj_size): + policy.on_miss(obj_id, obj_size) + + def eviction_hook(policy, obj_id, obj_size): + return policy.evict(obj_id, obj_size) + + def remove_hook(policy, obj_id): + policy.on_remove(obj_id) + + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + return cache + + +def test_cache_policy(cache, name): + """Test a cache policy with sample requests.""" + print(f"\n=== Testing {name} Cache ===") + + # Test requests: obj_id, obj_size + test_requests = [ + (1, 100), (2, 100), (3, 100), (4, 100), (5, 100), # Fill cache + (1, 100), # Hit + (6, 100), # Miss, should evict something + (2, 100), # Hit or miss depending on policy + (7, 100), # Miss, should evict something + ] + + hits = 0 + misses = 0 + + for obj_id, obj_size in test_requests: + req = lcs.Request() + req.obj_id = obj_id + req.obj_size = obj_size + + hit = cache.get(req) + if hit: + hits += 1 + print(f"Request {obj_id}: HIT") + else: + misses += 1 + print(f"Request {obj_id}: MISS") + + print(f"Total: {hits} hits, {misses} misses") + print(f"Cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes occupied") + + +def main(): + """Main example function.""" + cache_size = 400 # Bytes (can hold 4 objects of size 100 each) + + # Test LRU cache + lru_cache = create_lru_cache(cache_size) + test_cache_policy(lru_cache, "LRU") + + # Test FIFO cache + fifo_cache = create_fifo_cache(cache_size) + test_cache_policy(fifo_cache, "FIFO") + + print("\n=== Comparison ===") + print("LRU keeps recently accessed items, evicting least recently used") + print("FIFO keeps items in insertion order, evicting oldest inserted") + + +if __name__ == "__main__": + main() diff --git a/libCacheSim-python/export/CMakeLists.txt b/libCacheSim-python/export/CMakeLists.txt new file mode 100644 index 000000000..06a3566ac --- /dev/null +++ b/libCacheSim-python/export/CMakeLists.txt @@ -0,0 +1,33 @@ +# Helper functions are removed since we don't export source files anymore + +set(EXPORT_FILE "${CMAKE_BINARY_DIR}/export_vars.cmake") +file(WRITE "${EXPORT_FILE}" "") + +get_filename_component(MAIN_PROJECT_SOURCE_DIR ${CMAKE_SOURCE_DIR} ABSOLUTE) +file(WRITE ${CMAKE_BINARY_DIR}/export_vars.cmake "set(MAIN_PROJECT_SOURCE_DIR \"${MAIN_PROJECT_SOURCE_DIR}\")\n") +file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(dependency_libs \"${dependency_libs}\")\n") +file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(LIBCACHESIM_VERSION \"${LIBCACHESIM_VERSION}\")\n") + +# ============================================================================== +# Export project metadata +# ============================================================================== +file(APPEND "${EXPORT_FILE}" "set(LIBCACHESIM_VERSION \"${${PROJECT_NAME}_VERSION}\")\n") + +# ============================================================================== +# Export essential include directory variables +# ============================================================================== +foreach(var IN ITEMS GLib_INCLUDE_DIRS GLib_CONFIG_INCLUDE_DIR XGBOOST_INCLUDE_DIR LIGHTGBM_PATH ZSTD_INCLUDE_DIR) + file(APPEND "${EXPORT_FILE}" "set(${var} \"${${var}}\")\n") +endforeach() + +# ============================================================================== +# Export dependency library variables +# ============================================================================== +file(APPEND "${EXPORT_FILE}" "set(dependency_libs \"${dependency_libs}\")\n") + +# ============================================================================== +# Export essential build option variables +# ============================================================================== +file(APPEND "${EXPORT_FILE}" "set(LOG_LEVEL_LOWER \"${LOG_LEVEL_LOWER}\")\n") + +message(STATUS "Exported essential variables to ${EXPORT_FILE}") diff --git a/libCacheSim-python/export/README.md b/libCacheSim-python/export/README.md new file mode 100644 index 000000000..976b1daa8 --- /dev/null +++ b/libCacheSim-python/export/README.md @@ -0,0 +1,47 @@ +# Python Binding Export System + +Build system bridge for sharing CMake variables between the main libCacheSim project and Python binding. + +## Purpose + +The `export/CMakeLists.txt` exports all necessary build variables (source files, include directories, compiler flags, etc.) from the main project to the Python binding, enabling consistent builds without duplicating configuration. + +## How It Works + +1. **Export**: Main project writes variables to `export_vars.cmake` +2. **Import**: Python binding includes this file during CMake configuration +3. **Build**: Python binding uses shared variables for consistent compilation + +## Key Exported Variables + +### Source Files +- Cache algorithms, data structures, trace readers +- Profilers, utilities, analyzers + +### Build Configuration +- Include directories (main, GLib, ZSTD, XGBoost, LightGBM) +- Compiler flags (C/C++) +- Dependency libraries +- Build options (hugepage, tests, optional features) + +## Usage + +**Main Project** (`CMakeLists.txt`): +```cmake +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export) +``` + +**Python Binding** (`libCacheSim-python/CMakeLists.txt`): +```cmake +set(EXPORT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../build/export_vars.cmake") +include("${EXPORT_FILE}") +``` + +## For Developers + +This system ensures the Python binding automatically picks up changes to: +- New source files added to the main project +- Updated compiler flags or dependencies +- Modified build options + +No manual synchronization needed between main project and Python binding builds. diff --git a/libCacheSim-python/libcachesim/__init__.py b/libCacheSim-python/libcachesim/__init__.py new file mode 100644 index 000000000..5cac3c360 --- /dev/null +++ b/libCacheSim-python/libcachesim/__init__.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from ._libcachesim import ( + Cache, + Reader, + Request, + __doc__, + __version__, + open_trace, + process_trace, + process_trace_python_hook, +) +from .const import TraceType +from .eviction import ( + ARC, + FIFO, + LRB, + LRU, + S3FIFO, + Clock, + Sieve, + ThreeLCache, + TinyLFU, + TwoQ, + PythonHookCachePolicy, +) + +__all__ = [ + "ARC", + "FIFO", + "LRB", + "LRU", + "S3FIFO", + "Cache", + "Clock", + "Reader", + "Request", + "Sieve", + "ThreeLCache", + "TinyLFU", + "TraceType", + "TwoQ", + "PythonHookCachePolicy", + "__doc__", + "__version__", + "open_trace", + "process_trace", + "process_trace_python_hook", + # TODO(haocheng): add more eviction policies +] diff --git a/libCacheSim-python/libcachesim/__init__.pyi b/libCacheSim-python/libcachesim/__init__.pyi new file mode 100644 index 000000000..4148ddc84 --- /dev/null +++ b/libCacheSim-python/libcachesim/__init__.pyi @@ -0,0 +1,139 @@ +""" +libCacheSim Python bindings +-------------------------- + +.. currentmodule:: libcachesim + +.. autosummary:: + :toctree: _generate + + open_trace + ARC_init + Clock_init + FIFO_init + LRB_init + LRU_init + S3FIFO_init + Sieve_init + ThreeLCache_init + TinyLFU_init + TwoQ_init + Cache + Request + Reader + reader_init_param_t + TraceType +""" + +from .const import TraceType + +def open_trace( + trace_path: str, + type: TraceType, + reader_init_param: dict | reader_init_param_t | None = None +) -> Reader: ... + + +def FIFO_init(cache_size: int) -> Cache: + """ + Create a FIFO cache instance. + """ + + +def ARC_init(cache_size: int) -> Cache: + """ + Create a ARC cache instance. + """ + + +def Clock_init(cache_size: int, n_bit_counter: int = 1, init_freq: int = 0) -> Cache: + """ + Create a Clock cache instance. + """ + + +def LRB_init(cache_size: int, objective: str = "byte-miss-ratio") -> Cache: + """ + Create a LRB cache instance. + """ + + +def LRU_init(cache_size: int) -> Cache: + """ + Create a LRU cache instance. + """ + + +def S3FIFO_init( + cache_size: int, + fifo_size_ratio: float = 0.10, + ghost_size_ratio: float = 0.90, + move_to_main_threshold: int = 2 +) -> Cache: + """ + Create a S3FIFO cache instance. + """ + + +def Sieve_init(cache_size: int) -> Cache: + """ + Create a Sieve cache instance. + """ + + +def ThreeLCache_init(cache_size: int, objective: str = "byte-miss-ratio") -> Cache: + """ + Create a ThreeLCache cache instance. + """ + + +def TinyLFU_init( + cache_size: int, + main_cache: str = "SLRU", + window_size: float = 0.01 +) -> Cache: + """ + Create a TinyLFU cache instance. + """ + + +def TwoQ_init( + cache_size: int, + Ain_size_ratio: float = 0.25, + Aout_size_ratio: float = 0.5 +) -> Cache: + """ + Create a TwoQ cache instance. + """ + +class reader_init_param_t: + time_field: int + obj_id_field: int + obj_size_field: int + delimiter: str + has_header: bool + + +class Cache: + n_req: int + n_obj: int + occupied_byte: int + cache_size: int + def get(self, req: Request) -> bool: ... + + +class Request: + clock_time: int + hv: int + obj_id: int + obj_size: int + + +class Reader: + n_read_req: int + n_total_req: int + trace_path: str + file_size: int + def get_wss(self, ignore_obj_size: bool = False) -> int: ... + def __iter__(self) -> Reader: ... + def __next__(self) -> Request: ... diff --git a/libCacheSim-python/libcachesim/const.py b/libCacheSim-python/libcachesim/const.py new file mode 100644 index 000000000..142f3cccb --- /dev/null +++ b/libCacheSim-python/libcachesim/const.py @@ -0,0 +1,4 @@ +from __future__ import annotations + +# Import TraceType directly from the C++ binding to avoid duplication +from ._libcachesim import TraceType diff --git a/libCacheSim-python/libcachesim/eviction.py b/libCacheSim-python/libcachesim/eviction.py new file mode 100644 index 000000000..fa1cfb836 --- /dev/null +++ b/libCacheSim-python/libcachesim/eviction.py @@ -0,0 +1,512 @@ +"""Registry of eviction policies.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod + +from ._libcachesim import ( + ARC_init, + Cache, + Clock_init, + FIFO_init, + LRB_init, + LRU_init, + Request, + S3FIFO_init, + Sieve_init, + ThreeLCache_init, + TinyLFU_init, + TwoQ_init, + PythonHookCache, +) + + +class EvictionPolicyBase(ABC): + """Abstract base class for all eviction policies.""" + @abstractmethod + def get(self, req: Request) -> bool: + pass + + @abstractmethod + def __repr__(self) -> str: + pass + + @abstractmethod + def process_trace(self, reader, max_req: int = -1, max_sec: int = -1, start_time: int = -1, end_time: int = -1) -> float: + """Process a trace with this cache and return miss ratio. + + This method processes trace data entirely on the C++ side to avoid + data movement overhead between Python and C++. + + Args: + reader: The trace reader instance + max_req: Maximum number of requests to process (-1 for no limit) + max_sec: Maximum seconds to process (-1 for no limit) + start_time: Start time filter (-1 for no filter) + end_time: End time filter (-1 for no filter) + + Returns: + float: Miss ratio (0.0 to 1.0) + """ + pass + + +class EvictionPolicy(EvictionPolicyBase): + """Base class for all eviction policies.""" + def __init__(self, cache_size: int, **kwargs) -> None: + self.cache: Cache = self.init_cache(cache_size, **kwargs) + + @abstractmethod + def init_cache(self, cache_size: int, **kwargs) -> Cache: + pass + + def get(self, req: Request) -> bool: + return self.cache.get(req) + + def process_trace(self, reader, max_req: int = -1, max_sec: int = -1, start_time: int = -1, end_time: int = -1) -> float: + """Process a trace with this cache and return miss ratio. + + This method processes trace data entirely on the C++ side to avoid + data movement overhead between Python and C++. + + Args: + reader: The trace reader instance + max_req: Maximum number of requests to process (-1 for no limit) + max_sec: Maximum seconds to process (-1 for no limit) + start_time: Start time filter (-1 for no filter) + end_time: End time filter (-1 for no filter) + + Returns: + float: Miss ratio (0.0 to 1.0) + + Example: + >>> cache = LRU(1024*1024) + >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE) + >>> miss_ratio = cache.process_trace(reader) + >>> print(f"Miss ratio: {miss_ratio:.4f}") + """ + from ._libcachesim import process_trace + return process_trace(self.cache, reader, max_req, max_sec, start_time, end_time) + + def __repr__(self): + return f"{self.__class__.__name__}(cache_size={self.cache.cache_size})" + + @property + def n_req(self): + """Number of requests processed.""" + return self.cache.n_req + + @property + def n_obj(self): + """Number of objects currently in cache.""" + return self.cache.n_obj + + @property + def occupied_byte(self): + """Number of bytes currently occupied in cache.""" + return self.cache.occupied_byte + + @property + def cache_size(self): + """Total cache size in bytes.""" + return self.cache.cache_size + + +class FIFO(EvictionPolicy): + """First In First Out replacement policy. + + Args: + cache_size: Size of the cache + """ + def init_cache(self, cache_size: int, **kwargs) -> Cache: # noqa: ARG002 + return FIFO_init(cache_size) + + +class Clock(EvictionPolicy): + """Clock (Second Chance or FIFO-Reinsertion) replacement policy. + + Args: + cache_size: Size of the cache + n_bit_counter: Number of bits for counter (default: 1) + init_freq: Initial frequency value (default: 0) + """ + def __init__(self, cache_size: int, n_bit_counter: int = 1, init_freq: int = 0): + super().__init__(cache_size, n_bit_counter=n_bit_counter, init_freq=init_freq) + + def init_cache(self, cache_size: int, **kwargs): + init_freq = kwargs.get('init_freq', 0) + n_bit_counter = kwargs.get('n_bit_counter', 1) + + if n_bit_counter < 1 or n_bit_counter > 32: + msg = "n_bit_counter must be between 1 and 32" + raise ValueError(msg) + if init_freq < 0 or init_freq > 2**n_bit_counter - 1: + msg = "init_freq must be between 0 and 2^n_bit_counter - 1" + raise ValueError(msg) + + self.init_freq = init_freq + self.n_bit_counter = n_bit_counter + + return Clock_init(cache_size, n_bit_counter, init_freq) + + def __repr__(self): + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"n_bit_counter={self.n_bit_counter}, " + f"init_freq={self.init_freq})") + + +class TwoQ(EvictionPolicy): + """2Q replacement policy. + + 2Q has three queues: Ain, Aout, Am. When a obj hits in Aout, it will be + inserted into Am otherwise it will be inserted into Ain. + + Args: + cache_size: Total size of the cache + ain_size_ratio: Size ratio for Ain queue (default: 0.25) + aout_size_ratio: Size ratio for Aout queue (default: 0.5) + """ + def __init__(self, cache_size: int, ain_size_ratio: float = 0.25, aout_size_ratio: float = 0.5): + super().__init__(cache_size, ain_size_ratio=ain_size_ratio, aout_size_ratio=aout_size_ratio) + + def init_cache(self, cache_size: int, **kwargs): + ain_size_ratio = kwargs.get('ain_size_ratio', 0.25) + aout_size_ratio = kwargs.get('aout_size_ratio', 0.5) + + if ain_size_ratio <= 0 or aout_size_ratio <= 0: + msg = "ain_size_ratio and aout_size_ratio must be greater than 0" + raise ValueError(msg) + + self.ain_size_ratio = ain_size_ratio + self.aout_size_ratio = aout_size_ratio + + return TwoQ_init(cache_size, ain_size_ratio, aout_size_ratio) + + def __repr__(self): + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"ain_size_ratio={self.ain_size_ratio}, " + f"aout_size_ratio={self.aout_size_ratio})") + + +class LRB(EvictionPolicy): + """LRB (Learning Relaxed Belady) replacement policy. + + LRB is a learning-based replacement policy that uses a neural network to + predict the future access patterns of the cache, randomly select one obj + outside the Belady boundary to evict. + + Args: + cache_size: Size of the cache + objective: Objective function to optimize (default: "byte-miss-ratio") + """ + def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"): + super().__init__(cache_size, objective=objective) + + def init_cache(self, cache_size: int, **kwargs) -> Cache: + objective = kwargs.get('objective', "byte-miss-ratio") + + if objective not in ["byte-miss-ratio", "byte-hit-ratio"]: + msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'" + raise ValueError(msg) + + self.objective = objective + + return LRB_init(cache_size, objective) + + def __repr__(self): + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"objective={self.objective})") + + +class LRU(EvictionPolicy): + """Least Recently Used replacement policy. + + Args: + cache_size: Size of the cache + """ + def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 + return LRU_init(cache_size) + + +class ARC(EvictionPolicy): + """Adaptive Replacement Cache policy. + + ARC is a two-tiered cache with two LRU caches (T1 and T2) and two ghost + lists (B1 and B2). T1 records the obj accessed only once, T2 records + the obj accessed more than once. ARC has an internal parameter `p` to + learn and dynamically control the size of T1 and T2. + + Args: + cache_size: Size of the cache + """ + def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 + return ARC_init(cache_size) + + +class S3FIFO(EvictionPolicy): + """S3FIFO replacement policy. + + S3FIFO consists of three FIFO queues: Small, Main, and Ghost. Small + queue gets the obj and records the freq. + When small queue is full, if the obj to evict satisfies the threshold, + it will be moved to main queue. Otherwise, it will be evicted from small + queue and inserted into ghost queue. + When main queue is full, the obj to evict will be evicted and reinserted + like Clock. + If obj hits in the ghost queue, it will be moved to main queue. + + Args: + cache_size: Size of the cache + fifo_size_ratio: Size ratio for FIFO queue (default: 0.1) + ghost_size_ratio: Size ratio for ghost queue (default: 0.9) + move_to_main_threshold: Threshold for moving obj from ghost to main (default: 2) + """ + def __init__(self, cache_size: int, fifo_size_ratio: float = 0.1, + ghost_size_ratio: float = 0.9, move_to_main_threshold: int = 2): + super().__init__(cache_size, fifo_size_ratio=fifo_size_ratio, + ghost_size_ratio=ghost_size_ratio, + move_to_main_threshold=move_to_main_threshold) + + def init_cache(self, cache_size: int, **kwargs): + fifo_size_ratio = kwargs.get('fifo_size_ratio', 0.1) + ghost_size_ratio = kwargs.get('ghost_size_ratio', 0.9) + move_to_main_threshold = kwargs.get('move_to_main_threshold', 2) + + if fifo_size_ratio <= 0 or ghost_size_ratio <= 0: + msg = "fifo_size_ratio and ghost_size_ratio must be greater than 0" + raise ValueError(msg) + if move_to_main_threshold < 0: + msg = "move_to_main_threshold must be greater or equal to 0" + raise ValueError(msg) + + self.fifo_size_ratio = fifo_size_ratio + self.ghost_size_ratio = ghost_size_ratio + self.move_to_main_threshold = move_to_main_threshold + + return S3FIFO_init(cache_size, fifo_size_ratio, ghost_size_ratio, move_to_main_threshold) + + def __repr__(self): + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"fifo_size_ratio={self.fifo_size_ratio}, " + f"ghost_size_ratio={self.ghost_size_ratio}, " + f"move_to_main_threshold={self.move_to_main_threshold})") + + +class Sieve(EvictionPolicy): + """Sieve replacement policy. + + FIFO-Reinsertion with check pointer. + + Args: + cache_size: Size of the cache + """ + def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 + return Sieve_init(cache_size) + + +class ThreeLCache(EvictionPolicy): + """3L-Cache replacement policy. + + Args: + cache_size: Size of the cache + objective: Objective function to optimize (default: "byte-miss-ratio") + """ + def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"): + super().__init__(cache_size, objective=objective) + + def init_cache(self, cache_size: int, **kwargs): + objective = kwargs.get('objective', "byte-miss-ratio") + + if objective not in ["byte-miss-ratio", "byte-hit-ratio"]: + msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'" + raise ValueError(msg) + + self.objective = objective + + return ThreeLCache_init(cache_size, objective) + + def __repr__(self): + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"objective={self.objective})") + + +class TinyLFU(EvictionPolicy): + """TinyLFU replacement policy. + + Args: + cache_size: Size of the cache + main_cache: Main cache to use (default: "SLRU") + window_size: Window size for TinyLFU (default: 0.01) + """ + def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01): + super().__init__(cache_size, main_cache=main_cache, window_size=window_size) + + def init_cache(self, cache_size: int, **kwargs): + main_cache = kwargs.get('main_cache', "SLRU") + window_size = kwargs.get('window_size', 0.01) + + if window_size <= 0: + msg = "window_size must be greater than 0" + raise ValueError(msg) + + self.main_cache = main_cache + self.window_size = window_size + + return TinyLFU_init(cache_size, main_cache, window_size) + + def __repr__(self): + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"main_cache={self.main_cache}, " + f"window_size={self.window_size})") + + + +class PythonHookCachePolicy(EvictionPolicyBase): + """Python hook-based cache that allows defining custom policies using Python functions. + + This cache implementation allows users to define custom cache replacement algorithms + using pure Python functions instead of compiling C/C++ plugins. Users provide hook + functions for cache initialization, hit handling, miss handling, eviction decisions, + and cleanup. + + Args: + cache_size: Size of the cache in bytes + cache_name: Optional name for the cache (default: "PythonHookCache") + + Hook Functions Required: + init_hook(cache_size: int) -> Any: + Initialize plugin data structures. Return any object to be passed to other hooks. + + hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None: + Handle cache hit events. Update internal state as needed. + + miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None: + Handle cache miss events. Update internal state for new object. + + eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int: + Determine which object to evict. Return the object ID to be evicted. + + remove_hook(plugin_data: Any, obj_id: int) -> None: + Clean up when objects are removed from cache. + + free_hook(plugin_data: Any) -> None: [Optional] + Clean up plugin resources when cache is destroyed. + + Example: + >>> from collections import OrderedDict + >>> + >>> cache = PythonHookCachePolicy(1024) + >>> + >>> def init_hook(cache_size): + ... return OrderedDict() # LRU tracking + >>> + >>> def hit_hook(lru_dict, obj_id, obj_size): + ... lru_dict.move_to_end(obj_id) # Move to end (most recent) + >>> + >>> def miss_hook(lru_dict, obj_id, obj_size): + ... lru_dict[obj_id] = True # Add to end + >>> + >>> def eviction_hook(lru_dict, obj_id, obj_size): + ... return next(iter(lru_dict)) # Return least recent + >>> + >>> def remove_hook(lru_dict, obj_id): + ... lru_dict.pop(obj_id, None) + >>> + >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + >>> + >>> req = Request() + >>> req.obj_id = 1 + >>> req.obj_size = 100 + >>> hit = cache.get(req) + """ + def __init__(self, cache_size: int, cache_name: str = "PythonHookCache"): + self._cache_size = cache_size + self.cache_name = cache_name + self.cache = PythonHookCache(cache_size, cache_name) + self._hooks_set = False + + def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=None): + """Set the hook functions for the cache. + + Args: + init_hook: Function called during cache initialization + hit_hook: Function called on cache hit + miss_hook: Function called on cache miss + eviction_hook: Function called to select eviction candidate + remove_hook: Function called when object is removed + free_hook: Optional function called during cache cleanup + """ + self.cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook) + self._hooks_set = True + + def get(self, req: Request) -> bool: + """Process a cache request. + + Args: + req: The cache request to process + + Returns: + True if cache hit, False if cache miss + + Raises: + RuntimeError: If hooks have not been set + """ + if not self._hooks_set: + raise RuntimeError("Hooks must be set before using the cache. Call set_hooks() first.") + return self.cache.get(req) + + def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1): + """Process a trace with this cache and return miss ratio. + + This method processes trace data entirely on the C++ side to avoid + data movement overhead between Python and C++. + + Args: + reader: The trace reader instance + max_req: Maximum number of requests to process (-1 for no limit) + max_sec: Maximum seconds to process (-1 for no limit) + start_time: Start time filter (-1 for no filter) + end_time: End time filter (-1 for no filter) + + Returns: + float: Miss ratio (0.0 to 1.0) + + Raises: + RuntimeError: If hooks have not been set + + Example: + >>> cache = PythonHookCachePolicy(1024*1024) + >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE) + >>> miss_ratio = cache.process_trace(reader) + >>> print(f"Miss ratio: {miss_ratio:.4f}") + """ + if not self._hooks_set: + raise RuntimeError("Hooks must be set before processing trace. Call set_hooks() first.") + + from ._libcachesim import process_trace_python_hook + return process_trace_python_hook(self.cache, reader, max_req, max_sec, start_time, end_time) + + @property + def n_req(self): + """Number of requests processed.""" + return self.cache.n_req + + @property + def n_obj(self): + """Number of objects currently in cache.""" + return self.cache.n_obj + + @property + def occupied_byte(self): + """Number of bytes currently occupied in cache.""" + return self.cache.occupied_byte + + @property + def cache_size(self): + """Total cache size in bytes.""" + return self.cache.cache_size + + def __repr__(self): + return (f"{self.__class__.__name__}(cache_size={self._cache_size}, " + f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})") diff --git a/libCacheSim-python/pyproject.toml b/libCacheSim-python/pyproject.toml new file mode 100644 index 000000000..3bf6c66e8 --- /dev/null +++ b/libCacheSim-python/pyproject.toml @@ -0,0 +1,82 @@ +[build-system] +requires = ["scikit-build-core>=0.10", "pybind11"] +build-backend = "scikit_build_core.build" + + +[project] +name = "libcachesim" +version = "0.3.1" +description="Python bindings for libCacheSim" +readme = "README.md" +requires-python = ">=3.9" + +[project.optional-dependencies] +test = ["pytest"] + + +[tool.scikit-build] +wheel.expand-macos-universal-tags = true + +[tool.pytest.ini_options] +minversion = "8.0" +addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] +xfail_strict = true +log_cli_level = "INFO" +filterwarnings = [ + "error", + "ignore::pytest.PytestCacheWarning", +] +testpaths = ["tests"] + + +[tool.cibuildwheel] +build-frontend = "build" +manylinux-x86_64-image = "quay.io/pypa/manylinux_2_34_x86_64" +# Only build for x86_64 architectures +build = "*-manylinux_x86_64" +# Install build dependencies using dnf. This runs once per container. +before-all = "dnf install -y ninja-build cmake libzstd-devel glib2-devel" +# We add 'rm -rf build' to ensure a clean build inside the container. +# The C++ core is built first, then the wheel build will use it. +# Use absolute paths to avoid issues with the working directory. +before-build = "rm -rf /project/build && cmake -S /project -B /project/build -G Ninja && cmake --build /project/build" +# Set the environment variable for the wheel build step. +environment = { LCS_BUILD_DIR = "/project/build" } +# test-requires = "pytest" +# test-command = "pytest {project}/tests" + +# [tool.cibuildwheel.pyodide] +# build-frontend = {name = "build", args = ["--exports", "whole_archive"]} + +[tool.ruff.lint] +extend-select = [ + "B", # flake8-bugbear + "I", # isort + "ARG", # flake8-unused-arguments + "C4", # flake8-comprehensions + "EM", # flake8-errmsg + "ICN", # flake8-import-conventions + "G", # flake8-logging-format + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PL", # pylint + "PT", # flake8-pytest-style + "PTH", # flake8-use-pathlib + "RET", # flake8-return + "RUF", # Ruff-specific + "SIM", # flake8-simplify + "T20", # flake8-print + "UP", # pyupgrade + "YTT", # flake8-2020 + "EXE", # flake8-executable + "NPY", # NumPy specific rules + "PD", # pandas-vet +] +ignore = [ + "PLR09", # Too many X + "PLR2004", # Magic comparison +] +isort.required-imports = ["from __future__ import annotations"] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["T20"] diff --git a/libCacheSim-python/requirements.txt b/libCacheSim-python/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp new file mode 100644 index 000000000..43d875788 --- /dev/null +++ b/libCacheSim-python/src/pylibcachesim.cpp @@ -0,0 +1,842 @@ +#include +#include +#include + +// Suppress visibility warnings for pybind11 types +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wattributes" + +#include +#include +#include + +#include "config.h" +#include "libCacheSim/cache.h" +#include "libCacheSim/cacheObj.h" +#include "libCacheSim/const.h" +#include "libCacheSim/enum.h" +#include "libCacheSim/logging.h" +#include "libCacheSim/macro.h" +#include "libCacheSim/reader.h" +#include "libCacheSim/request.h" +#include "libCacheSim/sampling.h" +#include "mystr.h" + +/* admission */ +#include "libCacheSim/admissionAlgo.h" + +/* eviction */ +#include "libCacheSim/evictionAlgo.h" + +/* cache simulator */ +#include "libCacheSim/profilerLRU.h" +#include "libCacheSim/simulator.h" + +/* bin */ +#include "cachesim/cache_init.h" +#include "cli_reader_utils.h" + +#define STRINGIFY(x) #x +#define MACRO_STRINGIFY(x) STRINGIFY(x) + +namespace py = pybind11; + +// Python Hook Cache Implementation +class PythonHookCache { + private: + uint64_t cache_size_; + std::string cache_name_; + std::unordered_map objects_; // obj_id -> obj_size + py::object plugin_data_; + + // Hook functions + py::function init_hook_; + py::function hit_hook_; + py::function miss_hook_; + py::function eviction_hook_; + py::function remove_hook_; + py::object free_hook_; // Changed to py::object to allow py::none() + + public: + uint64_t n_req = 0; + uint64_t n_obj = 0; + uint64_t occupied_byte = 0; + uint64_t cache_size; + + PythonHookCache(uint64_t cache_size, + const std::string& cache_name = "PythonHookCache") + : cache_size_(cache_size), + cache_name_(cache_name), + cache_size(cache_size), + free_hook_(py::none()) {} + + void set_hooks(py::function init_hook, py::function hit_hook, + py::function miss_hook, py::function eviction_hook, + py::function remove_hook, py::object free_hook = py::none()) { + init_hook_ = init_hook; + hit_hook_ = hit_hook; + miss_hook_ = miss_hook; + eviction_hook_ = eviction_hook; + remove_hook_ = remove_hook; + + // Handle free_hook properly + if (!free_hook.is_none()) { + free_hook_ = free_hook; + } else { + free_hook_ = py::none(); + } + + // Initialize plugin data + plugin_data_ = init_hook_(cache_size_); + } + + bool get(const request_t& req) { + n_req++; + + auto it = objects_.find(req.obj_id); + if (it != objects_.end()) { + // Cache hit + hit_hook_(plugin_data_, req.obj_id, req.obj_size); + return true; + } else { + // Cache miss - call miss hook first + miss_hook_(plugin_data_, req.obj_id, req.obj_size); + + // Check if eviction is needed + while (occupied_byte + req.obj_size > cache_size_ && !objects_.empty()) { + // Need to evict + uint64_t victim_id = + eviction_hook_(plugin_data_, req.obj_id, req.obj_size) + .cast(); + auto victim_it = objects_.find(victim_id); + if (victim_it != objects_.end()) { + occupied_byte -= victim_it->second; + objects_.erase(victim_it); + n_obj--; + remove_hook_(plugin_data_, victim_id); + } else { + // Safety check: if eviction hook returns invalid ID, break to avoid + // infinite loop + break; + } + } + + // Insert new object if there's space + if (occupied_byte + req.obj_size <= cache_size_) { + objects_[req.obj_id] = req.obj_size; + occupied_byte += req.obj_size; + n_obj++; + } + + return false; + } + } + + ~PythonHookCache() { + if (!free_hook_.is_none()) { + py::function free_func = free_hook_.cast(); + free_func(plugin_data_); + } + } +}; + +// Restore visibility warnings +#pragma GCC diagnostic pop + +struct CacheDeleter { + void operator()(cache_t* ptr) const { + if (ptr != nullptr) ptr->cache_free(ptr); + } +}; + +struct RequestDeleter { + void operator()(request_t* ptr) const { + if (ptr != nullptr) free_request(ptr); + } +}; + +struct ReaderDeleter { + void operator()(reader_t* ptr) const { + if (ptr != nullptr) close_trace(ptr); + } +}; + +PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) + m.doc() = R"pbdoc( + libCacheSim Python bindings + -------------------------- + + .. currentmodule:: libcachesim + + .. autosummary:: + :toctree: _generate + + TODO(haocheng): add meaningful methods + )pbdoc"; + + py::enum_(m, "TraceType") + .value("CSV_TRACE", trace_type_e::CSV_TRACE) + .value("BIN_TRACE", trace_type_e::BIN_TRACE) + .value("PLAIN_TXT_TRACE", trace_type_e::PLAIN_TXT_TRACE) + .value("ORACLE_GENERAL_TRACE", trace_type_e::ORACLE_GENERAL_TRACE) + .value("LCS_TRACE", trace_type_e::LCS_TRACE) + .value("VSCSI_TRACE", trace_type_e::VSCSI_TRACE) + .value("TWR_TRACE", trace_type_e::TWR_TRACE) + .value("TWRNS_TRACE", trace_type_e::TWRNS_TRACE) + .value("ORACLE_SIM_TWR_TRACE", trace_type_e::ORACLE_SIM_TWR_TRACE) + .value("ORACLE_SYS_TWR_TRACE", trace_type_e::ORACLE_SYS_TWR_TRACE) + .value("ORACLE_SIM_TWRNS_TRACE", trace_type_e::ORACLE_SIM_TWRNS_TRACE) + .value("ORACLE_SYS_TWRNS_TRACE", trace_type_e::ORACLE_SYS_TWRNS_TRACE) + .value("VALPIN_TRACE", trace_type_e::VALPIN_TRACE) + .value("UNKNOWN_TRACE", trace_type_e::UNKNOWN_TRACE) + .export_values(); + + // *************** structs *************** + /** + * @brief Cache structure + */ + py::class_>(m, "Cache") + .def_readwrite("n_req", &cache_t::n_req) + .def_readwrite("n_obj", &cache_t::n_obj) + .def_readwrite("occupied_byte", &cache_t::occupied_byte) + .def_readwrite("cache_size", &cache_t::cache_size) + // methods + .def("get", [](cache_t& self, const request_t& req) { + return self.get(&self, &req); + }); + + /** + * @brief Request structure + */ + py::class_>(m, + "Request") + .def(py::init([]() { return new_request(); })) + .def_readwrite("clock_time", &request_t::clock_time) + .def_readwrite("hv", &request_t::hv) + .def_readwrite("obj_id", &request_t::obj_id) + .def_readwrite("obj_size", &request_t::obj_size) + .def_readwrite("op", &request_t::op); + + /** + * @brief Reader structure + */ + py::class_>(m, "Reader") + .def_readwrite("n_read_req", &reader_t::n_read_req) + .def_readwrite("n_total_req", &reader_t::n_total_req) + .def_readwrite("trace_path", &reader_t::trace_path) + .def_readwrite("file_size", &reader_t::file_size) + // methods + .def( + "get_wss", + [](reader_t& self, bool ignore_obj_size) { + int64_t wss_obj = 0, wss_byte = 0; + cal_working_set_size(&self, &wss_obj, &wss_byte); + return ignore_obj_size ? wss_obj : wss_byte; + }, + py::arg("ignore_obj_size") = false, + R"pbdoc( + Get the working set size of the trace. + + Args: + ignore_obj_size (bool): Whether to ignore the object size. + + Returns: + int: The working set size of the trace. + )pbdoc") + .def("__iter__", [](reader_t& self) -> reader_t& { return self; }) + .def("__next__", [](reader_t& self) { + auto req = std::unique_ptr(new_request()); + int ret = read_one_req(&self, req.get()); + if (ret != 0) { + throw py::stop_iteration(); + } + // std::cout << "Read request: " << req->obj_id + // << ", size: " << req->obj_size << std::endl; + return req; + }); + + py::class_(m, "reader_init_param_t") + .def(py::init<>()) + .def_readwrite("time_field", &reader_init_param_t::time_field) + .def_readwrite("obj_id_field", &reader_init_param_t::obj_id_field) + .def_readwrite("obj_size_field", &reader_init_param_t::obj_size_field) + .def_readwrite("delimiter", &reader_init_param_t::delimiter) + .def_readwrite("has_header", &reader_init_param_t::has_header) + .def_property( + "binary_fmt_str", + // Getter: C char* to Python string (returns copy) + [](const reader_init_param_t& self) { + return self.binary_fmt_str ? std::string(self.binary_fmt_str) : ""; + }, + // Setter: Python string to C char* (handles deep copy and old memory) + [](reader_init_param_t& self, const std::string& value) { + // Free existing memory if any + if (self.binary_fmt_str != nullptr) { + free(self.binary_fmt_str); // Use free() since it was + // strdup'd/malloc'd + } + // Deep copy the new string + self.binary_fmt_str = strdup(value.c_str()); + if (self.binary_fmt_str == nullptr && !value.empty()) { + throw std::runtime_error( + "Failed to allocate memory for binary_fmt_str"); + } + }); + + // *************** functions *************** + /** + * @brief Open a trace file for reading + */ + m.def( + "open_trace", + [](const std::string& trace_path, int type, const py::object& params) { + // Create an init_param instance, it will be populated from Python + reader_init_param_t init_param = {}; + + // === IMPORTANT: Initialize binary_fmt_str to nullptr === + // This is crucial if it's not always set from Python, + // so that free() won't be called on uninitialized memory if not set + // later. + init_param.binary_fmt_str = nullptr; + + // Populate other fields from Python dict or object + if (py::isinstance(params)) { + py::dict dict_params = params.cast(); + init_param.time_field = dict_params["time_field"].cast(); + init_param.obj_id_field = dict_params["obj_id_field"].cast(); + init_param.obj_size_field = dict_params["obj_size_field"].cast(); + init_param.delimiter = + dict_params["delimiter"].cast()[0]; + init_param.has_header = dict_params["has_header"].cast(); + // If binary_fmt_str is in dict_params, set it via property setter + if (dict_params.contains("binary_fmt_str") && + !dict_params["binary_fmt_str"].is_none()) { + std::string bfs_val = + dict_params["binary_fmt_str"].cast(); + if (init_param.binary_fmt_str != nullptr) + free(init_param.binary_fmt_str); + init_param.binary_fmt_str = strdup(bfs_val.c_str()); + if (init_param.binary_fmt_str == nullptr && !bfs_val.empty()) { + throw std::runtime_error( + "Failed to allocate memory for binary_fmt_str from dict"); + } + } + } else if (!params.is_none()) { + // If using a reader_init_param_t object from Python, its members are + // already set via def_property (No need to copy here, just ensure + // it's reader_init_param_t object) If `params` is a + // `reader_init_param_t` object, Pybind11 will pass its fields + // directly We need to ensure that the `binary_fmt_str` member of + // `params` is correctly handled. The direct `getattr` below is for + // other fields, for binary_fmt_str, the `def_property` takes care. + init_param.time_field = py::getattr(params, "time_field").cast(); + init_param.obj_id_field = + py::getattr(params, "obj_id_field").cast(); + init_param.obj_size_field = + py::getattr(params, "obj_size_field").cast(); + init_param.delimiter = + py::getattr(params, "delimiter").cast()[0]; + init_param.has_header = + py::getattr(params, "has_header").cast(); + // Handle binary_fmt_str if it's set on the Python object + if (py::hasattr(params, "binary_fmt_str") && + !py::getattr(params, "binary_fmt_str").is_none()) { + std::string bfs_val = + py::getattr(params, "binary_fmt_str").cast(); + if (init_param.binary_fmt_str != nullptr) + free(init_param.binary_fmt_str); + init_param.binary_fmt_str = strdup(bfs_val.c_str()); + if (init_param.binary_fmt_str == nullptr && !bfs_val.empty()) { + throw std::runtime_error( + "Failed to allocate memory for binary_fmt_str from object"); + } + } + } + // ... (rest of open_trace function) ... + reader_t* ptr = open_trace( + trace_path.c_str(), static_cast(type), &init_param); + return std::unique_ptr(ptr); + }, + py::arg("trace_path"), py::arg("type"), + py::arg("reader_init_param") = py::none(), + R"pbdoc( + Open a trace file for reading. + + Args: + trace_path (str): Path to the trace file. + type (int): Type of the trace (e.g., CSV_TRACE). + reader_init_param (Union[dict, reader_init_param_t, None]): Initialization parameters for the reader. + + Returns: + Reader: A new reader instance for the trace. + )pbdoc"); + + /** + * @brief Generic function to create a cache instance. + */ + m.def( + "create_cache", + [](const std::string& eviction_algo, const uint64_t cache_size, + const std::string& eviction_params, + bool consider_obj_metadata) { return nullptr; }, + py::arg("eviction_algo"), py::arg("cache_size"), + py::arg("eviction_params"), py::arg("consider_obj_metadata"), + R"pbdoc( + Create a cache instance. + + Args: + eviction_algo (str): Eviction algorithm to use (e.g., "LRU", "FIFO", "Random"). + cache_size (int): Size of the cache in bytes. + eviction_params (str): Additional parameters for the eviction algorithm. + consider_obj_metadata (bool): Whether to consider object metadata in eviction decisions. + + Returns: + Cache: A new cache instance. + )pbdoc"); + + /* TODO(haocheng): should we support all parameters in the + * common_cache_params_t? (hash_power, etc.) */ + + // Currently supported eviction algorithms with direct initialization: + // - "ARC" + // - "Clock" + // - "FIFO" + // - "LRB" + // - "LRU" + // - "S3FIFO" + // - "Sieve" + // - "ThreeLCache" + // - "TinyLFU" + // - "TwoQ" + + /** + * @brief Create a ARC cache instance. + */ + m.def( + "ARC_init", + [](uint64_t cache_size) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = ARC_init(cc_params, nullptr); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), + R"pbdoc( + Create a ARC cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + )pbdoc"); + + /** + * @brief Create a Clock cache instance. + */ + m.def( + "Clock_init", + [](uint64_t cache_size, long int n_bit_counter, long int init_freq) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + // assemble the cache specific parameters + std::string cache_specific_params = + "n-bit-counter=" + std::to_string(n_bit_counter) + "," + + "init-freq=" + std::to_string(init_freq); + + cache_t* ptr = Clock_init(cc_params, cache_specific_params.c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("n_bit_counter") = 1, + py::arg("init_freq") = 0, + R"pbdoc( + Create a Clock cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + n_bit_counter (int): Number of bits for counter (default: 1). + init_freq (int): Initial frequency value (default: 0). + + Returns: + Cache: A new Clock cache instance. + )pbdoc"); + + /** + * @brief Create a FIFO cache instance. + */ + m.def( + "FIFO_init", + [](uint64_t cache_size) { + // Construct common cache parameters + common_cache_params_t cc_params = {.cache_size = cache_size}; + // FIFO no specific parameters, so we pass nullptr + cache_t* ptr = FIFO_init(cc_params, nullptr); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), + R"pbdoc( + Create a FIFO cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + + Returns: + Cache: A new FIFO cache instance. + )pbdoc"); + +#ifdef ENABLE_LRB + /** + * @brief Create a LRB cache instance. + */ + m.def( + "LRB_init", + [](uint64_t cache_size, std::string objective) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = LRB_init(cc_params, ("objective=" + objective).c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio", + R"pbdoc( + Create a LRB cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + objective (str): Objective function to optimize (default: "byte-miss-ratio"). + + Returns: + Cache: A new LRB cache instance. + )pbdoc"); +#else + // TODO(haocheng): add a dummy function to avoid the error when LRB is not + // enabled + m.def( + "LRB_init", + [](uint64_t cache_size, std::string objective) { + throw std::runtime_error("LRB is not enabled"); + }, + py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio"); +#endif + + /** + * @brief Create a LRU cache instance. + */ + m.def( + "LRU_init", + [](uint64_t cache_size) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = LRU_init(cc_params, nullptr); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), + R"pbdoc( + Create a LRU cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + + Returns: + Cache: A new LRU cache instance. + )pbdoc"); + + /** + * @brief Create a S3FIFO cache instance. + */ + m.def( + "S3FIFO_init", + [](uint64_t cache_size, double fifo_size_ratio, double ghost_size_ratio, + int move_to_main_threshold) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = S3FIFO_init( + cc_params, + ("fifo-size-ratio=" + std::to_string(fifo_size_ratio) + "," + + "ghost-size-ratio=" + std::to_string(ghost_size_ratio) + "," + + "move-to-main-threshold=" + std::to_string(move_to_main_threshold)) + .c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("fifo_size_ratio") = 0.10, + py::arg("ghost_size_ratio") = 0.90, py::arg("move_to_main_threshold") = 2, + R"pbdoc( + Create a S3FIFO cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + fifo_size_ratio (float): Ratio of FIFO size to cache size (default: 0.10). + ghost_size_ratio (float): Ratio of ghost size to cache size (default: 0.90). + move_to_main_threshold (int): Threshold for moving to main queue (default: 2). + + Returns: + Cache: A new S3FIFO cache instance. + )pbdoc"); + + /** + * @brief Create a Sieve cache instance. + */ + m.def( + "Sieve_init", + [](uint64_t cache_size) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = Sieve_init(cc_params, nullptr); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), + R"pbdoc( + Create a Sieve cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + + Returns: + Cache: A new Sieve cache instance. + )pbdoc"); + +#ifdef ENABLE_3L_CACHE + /** + * @brief Create a ThreeL cache instance. + */ + m.def( + "ThreeLCache_init", + [](uint64_t cache_size, std::string objective) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = + ThreeLCache_init(cc_params, ("objective=" + objective).c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio", + R"pbdoc( + Create a ThreeL cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + objective (str): Objective function to optimize (default: "byte-miss-ratio"). + + Returns: + Cache: A new ThreeL cache instance. + )pbdoc"); +#else + // TODO(haocheng): add a dummy function to avoid the error when ThreeLCache is + // not enabled + m.def( + "ThreeLCache_init", + [](uint64_t cache_size, std::string objective) { + throw std::runtime_error("ThreeLCache is not enabled"); + }, + py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio"); +#endif + + /** + * @brief Create a TinyLFU cache instance. + */ + // mark evivtion parsing need change + m.def( + "TinyLFU_init", + [](uint64_t cache_size, std::string main_cache, double window_size) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = WTinyLFU_init( + cc_params, ("main-cache=" + main_cache + "," + + "window-size=" + std::to_string(window_size)) + .c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("main_cache") = "SLRU", + py::arg("window_size") = 0.01, + R"pbdoc( + Create a TinyLFU cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + main_cache (str): Main cache to use (default: "SLRU"). + window_size (float): Window size for TinyLFU (default: 0.01). + + Returns: + Cache: A new TinyLFU cache instance. + )pbdoc"); + + /** + * @brief Create a TwoQ cache instance. + */ + m.def( + "TwoQ_init", + [](uint64_t cache_size, double Ain_size_ratio, double Aout_size_ratio) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = TwoQ_init( + cc_params, + ("Ain-size-ratio=" + std::to_string(Ain_size_ratio) + "," + + "Aout-size-ratio=" + std::to_string(Aout_size_ratio)) + .c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("Ain_size_ratio") = 0.25, + py::arg("Aout_size_ratio") = 0.5, + R"pbdoc( + Create a TwoQ cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + Ain_size_ratio (float): Ratio of A-in size to cache size (default: 0.25). + Aout_size_ratio (float): Ratio of A-out size to cache size (default: 0.5). + + Returns: + Cache: A new TwoQ cache instance. + )pbdoc"); + + /** + * @brief Create a Python hook-based cache instance. + */ + py::class_(m, "PythonHookCache") + .def(py::init(), py::arg("cache_size"), + py::arg("cache_name") = "PythonHookCache") + .def("set_hooks", &PythonHookCache::set_hooks, py::arg("init_hook"), + py::arg("hit_hook"), py::arg("miss_hook"), py::arg("eviction_hook"), + py::arg("remove_hook"), py::arg("free_hook") = py::none(), + R"pbdoc( + Set the hook functions for the cache. + + Args: + init_hook (callable): Function called during cache initialization. + Signature: init_hook(cache_size: int) -> Any + hit_hook (callable): Function called on cache hit. + Signature: hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None + miss_hook (callable): Function called on cache miss. + Signature: miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None + eviction_hook (callable): Function called to select eviction candidate. + Signature: eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int + remove_hook (callable): Function called when object is removed. + Signature: remove_hook(plugin_data: Any, obj_id: int) -> None + free_hook (callable, optional): Function called during cache cleanup. + Signature: free_hook(plugin_data: Any) -> None + )pbdoc") + .def("get", &PythonHookCache::get, py::arg("req"), + R"pbdoc( + Process a cache request. + + Args: + req (Request): The cache request to process. + + Returns: + bool: True if cache hit, False if cache miss. + )pbdoc") + .def_readwrite("n_req", &PythonHookCache::n_req) + .def_readwrite("n_obj", &PythonHookCache::n_obj) + .def_readwrite("occupied_byte", &PythonHookCache::occupied_byte) + .def_readwrite("cache_size", &PythonHookCache::cache_size); + + /** + * @brief Process a trace with a cache and return miss ratio. + */ + m.def( + "process_trace", + [](cache_t& cache, reader_t& reader, int max_req = -1, int max_sec = -1, + int64_t start_time = -1, int64_t end_time = -1) { + request_t* req = new_request(); + int n_req = 0, n_hit = 0; + bool hit; + + read_one_req(&reader, req); + while (req->valid) { + // Check limits + if (max_req != -1 && n_req >= max_req) break; + if (max_sec != -1 && req->clock_time >= end_time) break; + if (start_time != -1 && req->clock_time < start_time) { + read_one_req(&reader, req); + continue; + } + + n_req += 1; + hit = cache.get(&cache, req); + if (hit) n_hit += 1; + read_one_req(&reader, req); + } + + free_request(req); + // return the miss ratio + return n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0; + }, + py::arg("cache"), py::arg("reader"), py::arg("max_req") = -1, + py::arg("max_sec") = -1, py::arg("start_time") = -1, + py::arg("end_time") = -1, + R"pbdoc( + Process a trace with a cache and return miss ratio. + + This function processes trace data entirely on the C++ side to avoid + data movement overhead between Python and C++. + + Args: + cache (Cache): The cache instance to use for processing. + reader (Reader): The trace reader instance. + max_req (int): Maximum number of requests to process (-1 for no limit). + max_sec (int): Maximum seconds to process (-1 for no limit). + start_time (int): Start time filter (-1 for no filter). + end_time (int): End time filter (-1 for no filter). + + Returns: + float: Miss ratio (0.0 to 1.0). + + Example: + >>> cache = libcachesim.LRU(1024*1024) + >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE) + >>> miss_ratio = libcachesim.process_trace(cache, reader) + >>> print(f"Miss ratio: {miss_ratio:.4f}") + )pbdoc"); + + /** + * @brief Process a trace with a Python hook cache and return miss ratio. + */ + m.def( + "process_trace_python_hook", + [](PythonHookCache& cache, reader_t& reader, int max_req = -1, + int max_sec = -1, int64_t start_time = -1, int64_t end_time = -1) { + request_t* req = new_request(); + int n_req = 0, n_hit = 0; + bool hit; + + read_one_req(&reader, req); + while (req->valid) { + // Check limits + if (max_req != -1 && n_req >= max_req) break; + if (max_sec != -1 && req->clock_time >= end_time) break; + if (start_time != -1 && req->clock_time < start_time) { + read_one_req(&reader, req); + continue; + } + + n_req += 1; + hit = cache.get(*req); + if (hit) n_hit += 1; + read_one_req(&reader, req); + } + + free_request(req); + // return the miss ratio + return n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0; + }, + py::arg("cache"), py::arg("reader"), py::arg("max_req") = -1, + py::arg("max_sec") = -1, py::arg("start_time") = -1, + py::arg("end_time") = -1, + R"pbdoc( + Process a trace with a Python hook cache and return miss ratio. + + This function processes trace data entirely on the C++ side to avoid + data movement overhead between Python and C++. Specifically designed + for PythonHookCache instances. + + Args: + cache (PythonHookCache): The Python hook cache instance to use. + reader (Reader): The trace reader instance. + max_req (int): Maximum number of requests to process (-1 for no limit). + max_sec (int): Maximum seconds to process (-1 for no limit). + start_time (int): Start time filter (-1 for no filter). + end_time (int): End time filter (-1 for no filter). + + Returns: + float: Miss ratio (0.0 to 1.0). + + Example: + >>> cache = libcachesim.PythonHookCachePolicy(1024*1024) + >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE) + >>> miss_ratio = libcachesim.process_trace_python_hook(cache.cache, reader) + >>> print(f"Miss ratio: {miss_ratio:.4f}") + )pbdoc"); + +#ifdef VERSION_INFO + m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); +#else + m.attr("__version__") = "dev"; +#endif +} diff --git a/libCacheSim-python/tests/conftest.py b/libCacheSim-python/tests/conftest.py new file mode 100644 index 000000000..5335134b1 --- /dev/null +++ b/libCacheSim-python/tests/conftest.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +import os +import gc + +import pytest + +from libcachesim import Reader, TraceType, open_trace + + +@pytest.fixture +def mock_reader(): + data_file = os.path.join( # noqa: PTH118 + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), # noqa: PTH120 + "data", + "cloudPhysicsIO.oracleGeneral.bin" + ) + reader: Reader = open_trace( + data_file, + type=TraceType.ORACLE_GENERAL_TRACE.value, + ) + try: + yield reader + finally: + # More careful cleanup + try: + if hasattr(reader, 'close'): + reader.close() + except Exception: # Be specific about exception type + pass + # Don't explicitly del reader here, let Python handle it + gc.collect() diff --git a/libCacheSim-python/tests/pytest.ini b/libCacheSim-python/tests/pytest.ini new file mode 100644 index 000000000..561da0177 --- /dev/null +++ b/libCacheSim-python/tests/pytest.ini @@ -0,0 +1,9 @@ +[pytest] +addopts = -ra --strict-markers -m "not optional" + +markers = + optional: mark test as optional + +python_files = test.py test_*.py *_test.py +python_classes = Test* +python_functions = test_* \ No newline at end of file diff --git a/libCacheSim-python/tests/reference.csv b/libCacheSim-python/tests/reference.csv new file mode 100644 index 000000000..cb569d0c9 --- /dev/null +++ b/libCacheSim-python/tests/reference.csv @@ -0,0 +1,20 @@ +FIFO,0.01,0.8368 +ARC,0.01,0.8222 +Clock,0.01,0.8328 +LRB,0.01,0.8339 +LRU,0.01,0.8339 +S3FIFO,0.01,0.8235 +Sieve,0.01,0.8231 +3LCache,0.01,0.8339 +TinyLFU,0.01,0.8262 +TwoQ,0.01,0.8276 +FIFO,0.1,0.8075 +ARC,0.1,0.7688 +Clock,0.1,0.8086 +LRB,0.1,0.8097 +LRU,0.1,0.8097 +S3FIFO,0.1,0.7542 +Sieve,0.1,0.7903 +3LCache,0.1,0.8097 +TinyLFU,0.1,0.7666 +TwoQ,0.1,0.7695 diff --git a/libCacheSim-python/tests/test_eviction.py b/libCacheSim-python/tests/test_eviction.py new file mode 100644 index 000000000..1de462a84 --- /dev/null +++ b/libCacheSim-python/tests/test_eviction.py @@ -0,0 +1,61 @@ +import pytest +import gc +import sys +import os + +from libcachesim import ( + ARC, + FIFO, + LRU, + S3FIFO, + Clock, + Sieve, + TinyLFU, + TwoQ, +) +from tests.utils import get_reference_data + + +@pytest.mark.parametrize("eviction_algo", [ + FIFO, + ARC, + Clock, + LRU, + S3FIFO, + Sieve, + TinyLFU, + TwoQ, +]) +@pytest.mark.parametrize("cache_size_ratio", [0.01]) +def test_eviction_algo(eviction_algo, cache_size_ratio, mock_reader): + cache = None + try: + # create a cache with the eviction policy + cache = eviction_algo(cache_size=int(mock_reader.get_wss()*cache_size_ratio)) + req_count = 0 + miss_count = 0 + + # Limit the number of requests to avoid long test times + # max_requests = 1000 + for i, req in enumerate(mock_reader): + # if i >= max_requests: + # break + hit = cache.get(req) + if not hit: + miss_count += 1 + req_count += 1 + + if req_count == 0: + pytest.skip("No requests processed") + + miss_ratio = miss_count / req_count + reference_miss_ratio = get_reference_data(eviction_algo.__name__, cache_size_ratio) + if reference_miss_ratio is None: + pytest.skip(f"No reference data for {eviction_algo.__name__} with cache size ratio {cache_size_ratio}") + assert abs(miss_ratio - reference_miss_ratio) < 0.01, f"Miss ratio {miss_ratio} is not close to reference {reference_miss_ratio}" + + except Exception as e: + print(f"Error in test_eviction_algo: {e}") + raise + finally: + pass diff --git a/libCacheSim-python/tests/test_process_trace.py b/libCacheSim-python/tests/test_process_trace.py new file mode 100644 index 000000000..0d08edeab --- /dev/null +++ b/libCacheSim-python/tests/test_process_trace.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +""" +Test file for process_trace functionality. +""" + +import sys +import os +import pytest + +# Add the parent directory to the Python path for development testing +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +try: + import libcachesim as lcs +except ImportError as e: + print(f"Error importing libcachesim: {e}") + print("Make sure the Python binding is built and installed") + sys.exit(1) + +from collections import OrderedDict + + +def create_trace_reader(): + """Helper function to create a trace reader with binary trace file.""" + data_file = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), + "data", + "cloudPhysicsIO.oracleGeneral.bin" + ) + if not os.path.exists(data_file): + return None + return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE) + + +def test_process_trace_native(): + """Test process_trace with native LRU cache.""" + print("Testing process_trace with native LRU...") + + # Open trace + reader = create_trace_reader() + if reader is None: + pytest.skip("Test trace file not found, skipping test") + + # Create LRU cache + cache = lcs.LRU(1024*1024) # 1MB cache + + # Process trace and get miss ratio + miss_ratio = cache.process_trace(reader, max_req=1000) + + print(f"Native LRU miss ratio (first 1000 requests): {miss_ratio:.4f}") + + # Verify miss ratio is reasonable (should be between 0 and 1) + assert 0.0 <= miss_ratio <= 1.0, f"Invalid miss ratio: {miss_ratio}" + print("PASS: Native LRU process_trace test PASSED") + + +def test_process_trace_python_hook(): + """Test process_trace with Python hook cache.""" + print("\nTesting process_trace with Python hook cache...") + + # Open trace + reader = create_trace_reader() + if reader is None: + pytest.skip("Test trace file not found, skipping test") + + # Create Python hook LRU cache + cache = lcs.PythonHookCachePolicy(1024*1024, "TestLRU") + + # Define LRU hooks + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True + + def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + # Set hooks + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + # Test both methods + # Method 1: Direct function call + miss_ratio1 = lcs.process_trace_python_hook(cache.cache, reader, max_req=1000) + + # Need to reopen the trace for second test + reader2 = create_trace_reader() + if reader2 is None: + print("Warning: Cannot reopen trace file, skipping second test") + # Continue with just the first test result + assert miss_ratio1 is not None and 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio: {miss_ratio1}" + return + + # Reset cache for fair comparison + cache2 = lcs.PythonHookCachePolicy(1024*1024, "TestLRU2") + cache2.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + # Method 2: Convenience method + miss_ratio2 = cache2.process_trace(reader2, max_req=1000) + + print(f"Python hook LRU miss ratio (method 1): {miss_ratio1:.4f}") + print(f"Python hook LRU miss ratio (method 2): {miss_ratio2:.4f}") + + # Verify both methods give the same result and miss ratios are reasonable + assert 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio 1: {miss_ratio1}" + assert 0.0 <= miss_ratio2 <= 1.0, f"Invalid miss ratio 2: {miss_ratio2}" + assert abs(miss_ratio1 - miss_ratio2) < 0.001, f"Different results from the two methods: {miss_ratio1} vs {miss_ratio2}" + print("PASS: Python hook process_trace test PASSED") + + +def test_compare_native_vs_python_hook(): + """Compare native LRU vs Python hook LRU using process_trace.""" + print("\nComparing native LRU vs Python hook LRU using process_trace...") + + cache_size = 512*1024 # 512KB cache + max_requests = 500 + + # Test native LRU + native_cache = lcs.LRU(cache_size) + reader1 = create_trace_reader() + if reader1 is None: + pytest.skip("Test trace file not found, skipping test") + + native_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests) + + # Test Python hook LRU + hook_cache = lcs.PythonHookCachePolicy(cache_size, "HookLRU") + + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True + + def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + reader2 = create_trace_reader() + if reader2 is None: + print("Warning: Cannot reopen trace file, skipping comparison") + return # Skip test + + hook_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests) + + print(f"Native LRU miss ratio: {native_miss_ratio:.4f}") + print(f"Python hook LRU miss ratio: {hook_miss_ratio:.4f}") + print(f"Difference: {abs(native_miss_ratio - hook_miss_ratio):.4f}") + + # They should be very similar (allowing for some small differences due to implementation details) + assert abs(native_miss_ratio - hook_miss_ratio) < 0.05, f"Too much difference: {abs(native_miss_ratio - hook_miss_ratio):.4f}" + print("PASS: Native vs Python hook comparison test PASSED") + + +def test_error_handling(): + """Test error handling for process_trace.""" + print("\nTesting error handling...") + + cache = lcs.PythonHookCachePolicy(1024) + + reader = create_trace_reader() + if reader is None: + pytest.skip("Test trace file not found, skipping error test") + + # Try to process trace without setting hooks + try: + cache.process_trace(reader) + assert False, "Should have raised RuntimeError" + except RuntimeError as e: + print(f"Correctly caught error: {e}") + print("PASS: Error handling test PASSED") + + +def test_lru_implementation_accuracy(): + """Test that Python hook LRU implementation matches native LRU closely.""" + print("Testing LRU implementation accuracy...") + + cache_size = 1024 * 1024 # 1MB + max_requests = 100 + + # Create readers + reader1 = create_trace_reader() + reader2 = create_trace_reader() + + if not reader1 or not reader2: + pytest.skip("Cannot open trace files for LRU accuracy test") + + # Test native LRU + native_cache = lcs.LRU(cache_size) + native_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests) + + # Test Python hook LRU + hook_cache = lcs.PythonHookCachePolicy(cache_size, "AccuracyTestLRU") + init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_optimized_lru_hooks() + hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + hook_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests) + + # Calculate difference + difference = abs(native_miss_ratio - hook_miss_ratio) + percentage_diff = (difference / native_miss_ratio) * 100 if native_miss_ratio > 0 else 0 + + print(f"Native LRU miss ratio: {native_miss_ratio:.6f}") + print(f"Hook LRU miss ratio: {hook_miss_ratio:.6f}") + print(f"Percentage difference: {percentage_diff:.4f}%") + + # Assert that the difference is small (< 5%) + assert percentage_diff < 5.0, f"LRU implementation difference too large: {percentage_diff:.4f}%" + print("PASS: LRU implementation accuracy test passed") + + +def create_optimized_lru_hooks(): + """Create optimized LRU hooks that closely match native LRU behavior.""" + + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + if obj_id in lru_dict: + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = obj_size + + def eviction_hook(lru_dict, obj_id, obj_size): + if lru_dict: + return next(iter(lru_dict)) + return obj_id + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook + + +if __name__ == "__main__": + tests = [ + test_process_trace_native, + test_process_trace_python_hook, + test_compare_native_vs_python_hook, + test_error_handling, + test_lru_implementation_accuracy, + ] + + all_passed = True + for test in tests: + try: + test() # Just call the test, don't check return value + print(f"PASS: {test.__name__} passed") + except Exception as e: + print(f"FAIL: {test.__name__} failed with exception: {e}") + all_passed = False + + if all_passed: + print("\nAll process_trace tests PASSED!") + else: + print("\nSome process_trace tests FAILED!") diff --git a/libCacheSim-python/tests/test_python_hook_cache.py b/libCacheSim-python/tests/test_python_hook_cache.py new file mode 100644 index 000000000..c84c03cbb --- /dev/null +++ b/libCacheSim-python/tests/test_python_hook_cache.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python3 +""" +Test file for PythonHookCachePolicy functionality. +""" + +import sys +import os +import pytest + +# Add the parent directory to the Python path for development testing +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +try: + import libcachesim as lcs +except ImportError as e: + print(f"Error importing libcachesim: {e}") + print("Make sure the Python binding is built and installed") + sys.exit(1) + +from collections import OrderedDict + + +def test_python_hook_cache(): + """Test the Python hook cache implementation.""" + print("Testing PythonHookCachePolicy...") + + # Create cache + cache_size = 300 # 3 objects of size 100 each + cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU") + + # Define LRU hooks + def init_hook(cache_size): + print(f"Initializing LRU cache with size {cache_size}") + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + print(f"Hit: object {obj_id}") + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + print(f"Miss: object {obj_id}, size {obj_size}") + lru_dict[obj_id] = True + + def eviction_hook(lru_dict, obj_id, obj_size): + victim = next(iter(lru_dict)) + print(f"Evicting object {victim} to make room for {obj_id}") + return victim + + def remove_hook(lru_dict, obj_id): + print(f"Removing object {obj_id}") + lru_dict.pop(obj_id, None) + + # Set hooks + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + # Test sequence + test_requests = [ + (1, 100), # Miss - insert 1 + (2, 100), # Miss - insert 2 + (3, 100), # Miss - insert 3 (cache full) + (1, 100), # Hit - move 1 to end + (4, 100), # Miss - should evict 2 (LRU), insert 4 + (2, 100), # Miss - should evict 3, insert 2 + (1, 100), # Hit - move 1 to end + ] + + print("\n--- Starting cache simulation ---") + for obj_id, obj_size in test_requests: + req = lcs.Request() + req.obj_id = obj_id + req.obj_size = obj_size + + result = cache.get(req) + print(f"Request {obj_id}: {'HIT' if result else 'MISS'}") + print(f" Cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes\n") + + print("Test completed successfully!") + + +def test_error_handling(): + """Test error handling.""" + print("\nTesting error handling...") + + cache = lcs.PythonHookCachePolicy(1000) + + # Try to use cache without setting hooks + req = lcs.Request() + req.obj_id = 1 + req.obj_size = 100 + + with pytest.raises(RuntimeError): + cache.get(req) + + print("Error handling test passed!") + + +def test_lru_comparison(): + """Test Python hook LRU against native LRU to verify identical behavior.""" + print("\nTesting Python hook LRU vs Native LRU comparison...") + + cache_size = 300 # 3 objects of size 100 each + + # Create native LRU cache + native_lru = lcs.LRU(cache_size) + + # Create Python hook LRU cache + hook_lru = lcs.PythonHookCachePolicy(cache_size, "TestLRU") + + # Define LRU hooks + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True + + def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + # Set hooks + hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + # Test sequence with various access patterns + test_requests = [ + (1, 100), # Miss - insert 1 + (2, 100), # Miss - insert 2 + (3, 100), # Miss - insert 3 (cache full) + (1, 100), # Hit - move 1 to end + (4, 100), # Miss - should evict 2 (LRU), insert 4 + (2, 100), # Miss - should evict 3, insert 2 + (1, 100), # Hit - move 1 to end + (3, 100), # Miss - should evict 4, insert 3 + (5, 100), # Miss - should evict 2, insert 5 + (1, 100), # Hit - move 1 to end + (3, 100), # Hit - move 3 to end + (6, 100), # Miss - should evict 5, insert 6 + ] + + print("\n--- Comparing LRU implementations ---") + hit_rate_matches = 0 + total_requests = len(test_requests) + + for i, (obj_id, obj_size) in enumerate(test_requests): + # Test native LRU + req_native = lcs.Request() + req_native.obj_id = obj_id + req_native.obj_size = obj_size + native_result = native_lru.get(req_native) + + # Test hook LRU + req_hook = lcs.Request() + req_hook.obj_id = obj_id + req_hook.obj_size = obj_size + hook_result = hook_lru.get(req_hook) + + # Compare results + match = native_result == hook_result + if match: + hit_rate_matches += 1 + + print(f"Request {i+1}: obj_id={obj_id}") + print(f" Native LRU: {'HIT' if native_result else 'MISS'}") + print(f" Hook LRU: {'HIT' if hook_result else 'MISS'}") + print(f" Match: {'PASS' if match else 'FAIL'}") + + # Compare cache statistics + stats_match = (native_lru.cache.n_obj == hook_lru.n_obj and + native_lru.cache.occupied_byte == hook_lru.occupied_byte) + print(f" Native stats: {native_lru.cache.n_obj} objects, {native_lru.cache.occupied_byte} bytes") + print(f" Hook stats: {hook_lru.n_obj} objects, {hook_lru.occupied_byte} bytes") + print(f" Stats match: {'PASS' if stats_match else 'FAIL'}") + print() + + if not match: + print(f"ERROR: Hit/miss mismatch at request {i+1}") + return False + + if not stats_match: + print(f"ERROR: Cache statistics mismatch at request {i+1}") + return False + + accuracy = (hit_rate_matches / total_requests) * 100 + print(f"LRU comparison test results:") + print(f" Total requests: {total_requests}") + print(f" Matching results: {hit_rate_matches}") + print(f" Accuracy: {accuracy:.1f}%") + + assert accuracy == 100.0, f"LRU implementations differ! Accuracy: {accuracy:.1f}%" + print("PASS: LRU comparison test PASSED - Both implementations behave identically!") + + +def test_lru_comparison_variable_sizes(): + """Test Python hook LRU vs Native LRU with variable object sizes.""" + print("\nTesting Python hook LRU vs Native LRU with variable object sizes...") + + cache_size = 1000 # Total cache capacity + + # Create native LRU cache + native_lru = lcs.LRU(cache_size) + + # Create Python hook LRU cache + hook_lru = lcs.PythonHookCachePolicy(cache_size, "VariableSizeLRU") + + # Define LRU hooks + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True + + def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + # Set hooks + hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + # Test sequence with variable object sizes + test_requests = [ + (1, 200), # Miss - insert 1 (200 bytes) + (2, 300), # Miss - insert 2 (300 bytes) + (3, 400), # Miss - insert 3 (400 bytes) - total 900 bytes + (4, 200), # Miss - should evict 1, insert 4 (total would be 1100, over limit) + (1, 200), # Miss - should evict 2, insert 1 + (5, 100), # Miss - should evict 3, insert 5 + (4, 200), # Hit - access 4 + (6, 500), # Miss - should evict multiple objects to fit + (4, 200), # Miss - 4 was evicted + ] + + print("\n--- Comparing LRU implementations with variable sizes ---") + all_match = True + + for i, (obj_id, obj_size) in enumerate(test_requests): + # Test native LRU + req_native = lcs.Request() + req_native.obj_id = obj_id + req_native.obj_size = obj_size + native_result = native_lru.get(req_native) + + # Test hook LRU + req_hook = lcs.Request() + req_hook.obj_id = obj_id + req_hook.obj_size = obj_size + hook_result = hook_lru.get(req_hook) + + # Compare results + result_match = native_result == hook_result + stats_match = (native_lru.cache.n_obj == hook_lru.n_obj and + native_lru.cache.occupied_byte == hook_lru.occupied_byte) + + print(f"Request {i+1}: obj_id={obj_id}, size={obj_size}") + print(f" Native LRU: {'HIT' if native_result else 'MISS'}") + print(f" Hook LRU: {'HIT' if hook_result else 'MISS'}") + print(f" Result match: {'PASS' if result_match else 'FAIL'}") + print(f" Native stats: {native_lru.cache.n_obj} objects, {native_lru.cache.occupied_byte} bytes") + print(f" Hook stats: {hook_lru.n_obj} objects, {hook_lru.occupied_byte} bytes") + print(f" Stats match: {'PASS' if stats_match else 'FAIL'}") + print() + + if not result_match or not stats_match: + all_match = False + print(f"ERROR: Mismatch at request {i+1}") + + assert all_match, "Variable size LRU comparison failed - implementations differ!" + print("PASS: Variable size LRU comparison test PASSED!") + + +if __name__ == "__main__": + test_python_hook_cache() + test_error_handling() + test_lru_comparison() + test_lru_comparison_variable_sizes() diff --git a/libCacheSim-python/tests/test_unified_interface.py b/libCacheSim-python/tests/test_unified_interface.py new file mode 100644 index 000000000..48d3751de --- /dev/null +++ b/libCacheSim-python/tests/test_unified_interface.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +""" +Test the unified interface for all cache policies. +""" + +import sys +import os +import pytest + +# Add the parent directory to the Python path for development testing +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +try: + import libcachesim as lcs +except ImportError as e: + print(f"Error importing libcachesim: {e}") + print("Make sure the Python binding is built and installed") + sys.exit(1) + +from collections import OrderedDict + + +def create_trace_reader(): + """Helper function to create a trace reader. + + Returns: + Reader or None: A trace reader instance, or None if trace file not found. + """ + data_file = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), + "data", + "cloudPhysicsIO.oracleGeneral.bin" + ) + if not os.path.exists(data_file): + return None + return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE) + + +def create_test_lru_hooks(): + """Create LRU hooks for testing. + + Returns: + tuple: A tuple of (init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + """ + + def init_hook(cache_size): + """Initialize LRU data structure.""" + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + """Handle cache hit by moving to end (most recently used).""" + if obj_id in lru_dict: + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + """Handle cache miss by adding new object.""" + lru_dict[obj_id] = obj_size + + def eviction_hook(lru_dict, obj_id, obj_size): + """Return the least recently used object ID for eviction.""" + if lru_dict: + return next(iter(lru_dict)) + return obj_id + + def remove_hook(lru_dict, obj_id): + """Remove object from LRU structure.""" + lru_dict.pop(obj_id, None) + + return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook + + +def test_unified_process_trace_interface(): + """Test that all cache policies have the same process_trace interface.""" + print("Testing unified process_trace interface...") + + cache_size = 1024 * 1024 # 1MB + max_requests = 100 + + # Create trace reader + reader = create_trace_reader() + if not reader: + pytest.skip("Skipping test: Trace file not available") + + # Test different cache policies + caches = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + "ARC": lcs.ARC(cache_size), + } + + # Add Python hook cache + python_cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU") + init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks() + python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + caches["Python Hook LRU"] = python_cache + + print("\n--- Testing unified process_trace interface ---") + + results = {} + for name, cache in caches.items(): + # Create fresh reader for each test + test_reader = create_trace_reader() + if not test_reader: + pytest.skip(f"Cannot create reader for {name} test") + + # Test process_trace method exists + assert hasattr(cache, 'process_trace'), f"{name} missing process_trace method" + + # Test process_trace functionality + miss_ratio = cache.process_trace(test_reader, max_req=max_requests) + results[name] = miss_ratio + + print(f"{name:15s}: miss_ratio = {miss_ratio:.4f}") + print(f" cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes") + + # Verify miss_ratio is valid + assert 0.0 <= miss_ratio <= 1.0, f"{name} returned invalid miss_ratio: {miss_ratio}" + + print(f"\nPASS: All {len(caches)} cache policies support unified process_trace interface!") + + # Verify we got results for all caches + assert len(results) == len(caches), "Not all caches were tested" + + +def test_unified_properties_interface(): + """Test that all cache policies have the same properties interface.""" + print("\nTesting unified properties interface...") + + cache_size = 1024 * 1024 + + # Create different cache types + caches = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + "Python Hook": lcs.PythonHookCachePolicy(cache_size, "TestCache"), + } + + print("\n--- Testing unified properties interface ---") + + required_properties = ['cache_size', 'n_req', 'n_obj', 'occupied_byte'] + + for name, cache in caches.items(): + print(f"{name:15s}:") + + # Test all required properties exist + for prop in required_properties: + assert hasattr(cache, prop), f"{name} missing {prop} property" + value = getattr(cache, prop) + print(f" {prop} = {value}") + + # Test cache_size is correct + assert cache.cache_size == cache_size, f"{name} cache_size mismatch" + + print("PASS: All cache policies support unified properties interface!") + + +def test_get_interface_consistency(): + """Test that get() method works consistently across all cache policies.""" + print("\nTesting get() interface consistency...") + + cache_size = 1024 * 1024 + + # Create caches + caches = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + } + + # Add Python hook cache + python_cache = lcs.PythonHookCachePolicy(cache_size, "ConsistencyTest") + init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks() + python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + caches["Python Hook"] = python_cache + + # Create a test request using the proper request class + test_req = lcs.Request() + test_req.obj_id = 1 + test_req.obj_size = 1024 + + print("Testing get() method with test request...") + + for name, cache in caches.items(): + # Reset cache state for consistent testing + initial_n_req = cache.n_req + initial_n_obj = cache.n_obj + initial_occupied = cache.occupied_byte + + # Test get method exists + assert hasattr(cache, 'get'), f"{name} missing get method" + + # Test first access (should be miss for new object) + result = cache.get(test_req) + print(f"{name:15s}: first access = {'HIT' if result else 'MISS'}") + + # Test properties updated correctly + assert cache.n_req > initial_n_req, f"{name} n_req not updated" + if not result: # If it was a miss, object should be added + assert cache.n_obj > initial_n_obj, f"{name} n_obj not updated after miss" + assert cache.occupied_byte > initial_occupied, f"{name} occupied_byte not updated after miss" + + # Test second access to same object (should be hit) + second_result = cache.get(test_req) + print(f"{name:15s}: second access = {'HIT' if second_result else 'MISS'}") + + # Second access should be a hit (unless cache is too small) + if cache.cache_size >= test_req.obj_size: + assert second_result, f"{name} second access should be a hit" + + print("PASS: Get interface consistency test passed!") + + +if __name__ == "__main__": + tests = [ + test_unified_process_trace_interface, + test_unified_properties_interface, + test_get_interface_consistency, + ] + + all_passed = True + for test in tests: + try: + test() # Just call the test, don't check return value + print(f"PASS: {test.__name__} passed") + except Exception as e: + print(f"FAIL: {test.__name__} failed with exception: {e}") + all_passed = False + + if all_passed: + print("\nAll unified interface tests PASSED!") + else: + print("\nSome unified interface tests FAILED!") diff --git a/libCacheSim-python/tests/utils.py b/libCacheSim-python/tests/utils.py new file mode 100644 index 000000000..6eabbdd2a --- /dev/null +++ b/libCacheSim-python/tests/utils.py @@ -0,0 +1,16 @@ +import os + + +def get_reference_data(eviction_algo, cache_size_ratio): + data_file = os.path.join( # noqa: PTH118 + (os.path.dirname(os.path.dirname(__file__))), # noqa: PTH120 + "tests", + "reference.csv" + ) + with open(data_file, "r") as f: # noqa: PTH123 + lines = f.readlines() + key = "3LCache" if eviction_algo == "ThreeLCache" else eviction_algo + for line in lines: + if line.startswith(f"{key},{cache_size_ratio}"): + return float(line.split(",")[-1]) + return None \ No newline at end of file diff --git a/libCacheSim/dataStructure/minimalIncrementCBF.c b/libCacheSim/dataStructure/minimalIncrementCBF.c index 82967eedb..b8667eb88 100644 --- a/libCacheSim/dataStructure/minimalIncrementCBF.c +++ b/libCacheSim/dataStructure/minimalIncrementCBF.c @@ -53,7 +53,7 @@ int minimalIncrementCBF_init(struct minimalIncrementCBF *CBF, int entries, CBF->counter_num = CBF->hashes * 2; } - CBF->bf = (unsigned int *)calloc(sizeof(unsigned int), CBF->counter_num); + CBF->bf = (unsigned int *)calloc(CBF->counter_num, sizeof(unsigned int)); // TODO: check whether unsigned int is enough for the size of each counter if (CBF->bf == NULL) { diff --git a/libCacheSim/include/libCacheSim/mem.h b/libCacheSim/include/libCacheSim/mem.h index 2f587d8b6..8068f9179 100644 --- a/libCacheSim/include/libCacheSim/mem.h +++ b/libCacheSim/include/libCacheSim/mem.h @@ -22,7 +22,7 @@ #elif HEAP_ALLOCATOR == HEAP_ALLOCATOR_MALLOC #include #define my_malloc(type) (type *)malloc(sizeof(type)) -#define my_malloc_n(type, n) (type *)calloc(sizeof(type), n) +#define my_malloc_n(type, n) (type *)calloc(n, sizeof(type)) #define my_free(size, addr) free(addr) #elif HEAP_ALLOCATOR == HEAP_ALLOCATOR_ALIGNED_MALLOC diff --git a/scripts/build_pypi.sh b/scripts/build_pypi.sh new file mode 100644 index 000000000..5f4cda97c --- /dev/null +++ b/scripts/build_pypi.sh @@ -0,0 +1 @@ +python3 -m cibuildwheel --platform linux libCacheSim-python diff --git a/scripts/install_python.sh b/scripts/install_python.sh new file mode 100644 index 000000000..d0ff2eba6 --- /dev/null +++ b/scripts/install_python.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -euo pipefail + +# Build the main libCacheSim C++ library first +echo "Building main libCacheSim library..." +rm -rf ./build +cmake -G Ninja -B build # -DENABLE_3L_CACHE=ON +ninja -C build + +# Now build and install the Python binding +echo "Building Python binding..." +echo "Sync python version..." +python scripts/sync_python_version.py +pushd libCacheSim-python +pip install -e . -vvv +popd + +# Test that the import works +echo "Testing import..." +python -c "import libcachesim" + +# Run tests +echo "Running tests..." +pushd libCacheSim-python +pytest . +popd diff --git a/scripts/sync_node_version.py b/scripts/sync_node_version.py index d45a391a2..7497b1c00 100755 --- a/scripts/sync_node_version.py +++ b/scripts/sync_node_version.py @@ -22,18 +22,18 @@ def read_main_version(): """Read version from version.txt.""" project_root = get_project_root() version_file = project_root / "version.txt" - + if not version_file.exists(): print(f"Error: {version_file} not found", file=sys.stderr) sys.exit(1) - + with open(version_file, 'r') as f: version = f.read().strip() - + if not version: print("Error: version.txt is empty", file=sys.stderr) sys.exit(1) - + return version @@ -41,29 +41,29 @@ def update_package_json(version): """Update package.json with the new version.""" project_root = get_project_root() package_json_path = project_root / "libCacheSim-node" / "package.json" - + if not package_json_path.exists(): print(f"Error: {package_json_path} not found", file=sys.stderr) sys.exit(1) - + # Read current package.json with open(package_json_path, 'r') as f: package_data = json.load(f) - + current_version = package_data.get('version', 'unknown') - + if current_version == version: print(f"Version already up to date: {version}") return False - + # Update version package_data['version'] = version - + # Write back to file with proper formatting with open(package_json_path, 'w') as f: json.dump(package_data, f, indent=2) f.write('\n') # Add trailing newline - + print(f"Updated Node.js binding version: {current_version} → {version}") return True @@ -74,19 +74,19 @@ def main(): # Read main project version main_version = read_main_version() print(f"Main project version: {main_version}") - + # Update Node.js binding version updated = update_package_json(main_version) - + if updated: print("✓ Node.js binding version synchronized successfully") else: print("✓ No changes needed") - + except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/sync_python_version.py b/scripts/sync_python_version.py new file mode 100644 index 000000000..65e51a92f --- /dev/null +++ b/scripts/sync_python_version.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +""" +Script to synchronize version between libCacheSim main project and Python bindings. + +This script reads the version from version.txt and updates the pyproject.toml +in libCacheSim-python to match. +""" + +import json +import os +import sys +import re +from pathlib import Path + + +def get_project_root(): + """Get the project root directory.""" + script_dir = Path(__file__).parent + return script_dir.parent + + +def read_main_version(): + """Read version from version.txt.""" + project_root = get_project_root() + version_file = project_root / "version.txt" + + if not version_file.exists(): + print(f"Error: {version_file} not found", file=sys.stderr) + sys.exit(1) + + with open(version_file, 'r') as f: + version = f.read().strip() + + if not version: + print("Error: version.txt is empty", file=sys.stderr) + sys.exit(1) + + return version + +def update_pyproject_toml(version): + """Update pyproject.toml with the new version.""" + project_root = get_project_root() + pyproject_toml_path = project_root / "libCacheSim-python" / "pyproject.toml" + + if not pyproject_toml_path.exists(): + print(f"Error: {pyproject_toml_path} not found", file=sys.stderr) + return False + + # Read current pyproject.toml + with open(pyproject_toml_path, 'r') as f: + pyproject_data = f.read() + + # Update the version line in pyproject.toml, make it can match any version in version.txt, like "0.3.1" or "dev" + match = re.search(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", pyproject_data) + if not match: + print("Error: Could not find a valid version line in pyproject.toml", file=sys.stderr) + return False + current_version = match.group(1) + if current_version == version: + print(f"Python binding version already up to date: {version}") + return False + # replace the version line with the new version + pyproject_data = re.sub(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", f"version = \"{version}\"", pyproject_data) + + # Write back to file with proper formatting + with open(pyproject_toml_path, 'w') as f: + f.write(pyproject_data) + + print(f"Updated Python version: {current_version} → {version}") + return True + + +def main(): + """Main function.""" + try: + # Read main project version + main_version = read_main_version() + print(f"Main project version: {main_version}") + + # Update Python binding version + updated = update_pyproject_toml(main_version) + + if updated: + print("Python binding version synchronized successfully") + else: + print("No changes needed") + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main()