From 3185063c9c4fe62239aeeac91ee61e44a9b0049e Mon Sep 17 00:00:00 2001 From: Percy Date: Sat, 31 May 2025 19:29:33 +0800 Subject: [PATCH 01/10] Use scikit build --- .github/workflows/python.yml | 38 ++ CMakeLists.txt | 2 + libCacheSim-python/.gitignore | 59 ++ libCacheSim-python/CMakeLists.txt | 98 ++++ libCacheSim-python/README.md | 44 ++ libCacheSim-python/export/CMakeLists.txt | 28 + libCacheSim-python/export/README.md | 85 +++ libCacheSim-python/libcachesim/__init__.py | 46 ++ libCacheSim-python/libcachesim/__init__.pyi | 148 +++++ libCacheSim-python/libcachesim/const.py | 11 + libCacheSim-python/libcachesim/eviction.py | 295 ++++++++++ libCacheSim-python/pyproject.toml | 73 +++ libCacheSim-python/requirements.txt | 0 libCacheSim-python/src/pylibcachesim.cpp | 566 ++++++++++++++++++++ libCacheSim-python/tests/conftest.py | 32 ++ libCacheSim-python/tests/pytest.ini | 9 + libCacheSim-python/tests/reference.csv | 20 + libCacheSim-python/tests/test_eviction.py | 94 ++++ libCacheSim-python/tests/utils.py | 17 + scripts/install_python.sh | 9 + scripts/sync_node_version.py | 30 +- scripts/sync_python_version.py | 89 +++ 22 files changed, 1778 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/python.yml create mode 100644 libCacheSim-python/.gitignore create mode 100644 libCacheSim-python/CMakeLists.txt create mode 100644 libCacheSim-python/README.md create mode 100644 libCacheSim-python/export/CMakeLists.txt create mode 100644 libCacheSim-python/export/README.md create mode 100644 libCacheSim-python/libcachesim/__init__.py create mode 100644 libCacheSim-python/libcachesim/__init__.pyi create mode 100644 libCacheSim-python/libcachesim/const.py create mode 100644 libCacheSim-python/libcachesim/eviction.py create mode 100644 libCacheSim-python/pyproject.toml create mode 100644 libCacheSim-python/requirements.txt create mode 100644 libCacheSim-python/src/pylibcachesim.cpp create mode 100644 libCacheSim-python/tests/conftest.py create mode 100644 libCacheSim-python/tests/pytest.ini create mode 100644 libCacheSim-python/tests/reference.csv create mode 100644 libCacheSim-python/tests/test_eviction.py create mode 100644 libCacheSim-python/tests/utils.py create mode 100644 scripts/install_python.sh create mode 100644 scripts/sync_python_version.py diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml new file mode 100644 index 000000000..ebd08a41a --- /dev/null +++ b/.github/workflows/python.yml @@ -0,0 +1,38 @@ +name: Python + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Prepare + run: bash scripts/install_dependency.sh + + - name: Build main libCacheSim project + run: | + cmake -G Ninja -B build + ninja -C build + + - name: Install Python dependencies + run: | + pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + + - name: Build libCacheSim-python + run: | + cd libCacheSim-python + pip install -e . + + - name: Run tests + run: | + cd libCacheSim-python + pytest tests/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 89513c28f..a2623b470 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,6 +247,8 @@ else() message(STATUS "Building without test") endif() +# Export variables for scikit-build -> build/export_vars.cmake +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export) # libCacheSim unified library compilation and installation # Create a single library that combines all modular libraries diff --git a/libCacheSim-python/.gitignore b/libCacheSim-python/.gitignore new file mode 100644 index 000000000..34712f29d --- /dev/null +++ b/libCacheSim-python/.gitignore @@ -0,0 +1,59 @@ +# Automatically generated by `hgimportsvn` +.svn +.hgsvn + +# Ignore local virtualenvs +lib/ +bin/ +include/ +.Python/ + +# These lines are suggested according to the svn:ignore property +# Feel free to enable them by uncommenting them +*.pyc +*.pyo +*.swp +*.class +*.orig +*~ +.hypothesis/ + +# autogenerated +src/_pytest/_version.py +# setuptools +.eggs/ + +doc/*/_build +doc/*/.doctrees +build/ +dist/ +*.egg-info +htmlcov/ +issue/ +env/ +.env/ +.venv/ +/pythonenv*/ +3rdparty/ +.tox +.cache +.pytest_cache +.mypy_cache +.coverage +.coverage.* +coverage.xml +.ropeproject +.idea +.hypothesis +.pydevproject +.project +.settings +.vscode +__pycache__/ +.python-version + +# generated by pip +pip-wheel-metadata/ + +# pytest debug logs generated via --debug +pytestdebug.log \ No newline at end of file diff --git a/libCacheSim-python/CMakeLists.txt b/libCacheSim-python/CMakeLists.txt new file mode 100644 index 000000000..a40a08cae --- /dev/null +++ b/libCacheSim-python/CMakeLists.txt @@ -0,0 +1,98 @@ +cmake_minimum_required(VERSION 3.15...3.27) + +# Include exported variables from cache +set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") +set(EXPORT_FILE "${PARENT_BUILD_DIR}/export_vars.cmake") + +if(EXISTS "${EXPORT_FILE}") + include("${EXPORT_FILE}") + message(STATUS "Loaded variables from export_vars.cmake") +else() + message(FATAL_ERROR "export_vars.cmake not found at ${EXPORT_FILE}. Please build the main project first (e.g. cd .. && cmake -G Ninja -B build)") +endif() + +# Force enable -fPIC +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") + +project(libCacheSim-python VERSION "${LIBCACHESIM_VERSION}") + +if(LOG_LEVEL_LOWER STREQUAL "default") + if(CMAKE_BUILD_TYPE_LOWER MATCHES "debug") + add_compile_definitions(LOGLEVEL=6) + else() + add_compile_definitions(LOGLEVEL=7) + endif() +elseif(LOG_LEVEL_LOWER STREQUAL "verbose") + add_compile_definitions(LOGLEVEL=5) +elseif(LOG_LEVEL_LOWER STREQUAL "debug") + add_compile_definitions(LOGLEVEL=6) +elseif(LOG_LEVEL_LOWER STREQUAL "info") + add_compile_definitions(LOGLEVEL=7) +elseif(LOG_LEVEL_LOWER STREQUAL "warn") + add_compile_definitions(LOGLEVEL=8) +elseif(LOG_LEVEL_LOWER STREQUAL "error") + add_compile_definitions(LOGLEVEL=9) +else() + add_compile_definitions(LOGLEVEL=7) +endif() + +# Find python and pybind11 +find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) +find_package(pybind11 CONFIG REQUIRED) + +# Include directories for dependencies +include_directories(${GLib_INCLUDE_DIRS}) +include_directories(${GLib_CONFIG_INCLUDE_DIR}) +include_directories(${XGBOOST_INCLUDE_DIR}) +include_directories(${LIGHTGBM_PATH}) +include_directories(${ZSTD_INCLUDE_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../libCacheSim/bin) + +# Find the main libCacheSim library +set(MAIN_PROJECT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") +set(MAIN_PROJECT_LIB_PATH "${MAIN_PROJECT_BUILD_DIR}/liblibCacheSim.a") + +if(EXISTS "${MAIN_PROJECT_LIB_PATH}") + message(STATUS "Found pre-built libCacheSim library at ${MAIN_PROJECT_LIB_PATH}") + + # Import the main library as an imported target + add_library(libCacheSim_main STATIC IMPORTED) + set_target_properties(libCacheSim_main PROPERTIES + IMPORTED_LOCATION "${MAIN_PROJECT_LIB_PATH}" + INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/../libCacheSim/include;${CMAKE_CURRENT_SOURCE_DIR}/../libCacheSim/utils/include;${CMAKE_CURRENT_SOURCE_DIR}/../libCacheSim" + ) + + # Link dependencies that the main library needs + target_link_libraries(libCacheSim_main INTERFACE ${dependency_libs}) + set(LIBCACHESIM_TARGET libCacheSim_main) + +else() + message(FATAL_ERROR "Pre-built libCacheSim library not found. Please build the main project first: cd .. && cmake -G Ninja -B build && ninja -C build") +endif() + +python_add_library(_libcachesim MODULE + src/pylibcachesim.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../libCacheSim/bin/cli_reader_utils.c + WITH_SOABI +) + +set_target_properties(_libcachesim PROPERTIES + POSITION_INDEPENDENT_CODE ON + INSTALL_RPATH_USE_LINK_PATH TRUE + BUILD_WITH_INSTALL_RPATH TRUE + INSTALL_RPATH "$ORIGIN" +) + +target_compile_definitions(_libcachesim PRIVATE VERSION_INFO=${PROJECT_VERSION}) + +target_link_libraries(_libcachesim PRIVATE + ${LIBCACHESIM_TARGET} + pybind11::headers + pybind11::module + -Wl,--no-as-needed -ldl +) + +# install to wheel directory +install(TARGETS _libcachesim LIBRARY DESTINATION libcachesim) diff --git a/libCacheSim-python/README.md b/libCacheSim-python/README.md new file mode 100644 index 000000000..67039e04c --- /dev/null +++ b/libCacheSim-python/README.md @@ -0,0 +1,44 @@ +# libCacheSim Python Binding + +Python bindings for libCacheSim, a high-performance cache simulator. + +## Installation + +```bash +pip install . +``` + +## Development + +```bash +pip install -e . +``` + +Test + +``` +python -m pytest . +``` + +## Usage + +```python +import libcachesim as cachesim + +# Create a cache with FIFO eviction policy +cache = cachesim.FIFO(cache_size=1024*1024) + +# Create a request +req = cachesim.Request() +req.obj_id = 1 +req.obj_size = 100 + +# Check if object is in cache +hit = cache.get(req) +print(f"Cache hit: {hit}") +``` + +## Features + +- [x] Support for multiple eviction policies (FIFO, LRU, ARC, Clock, etc.) +- [ ] trace analysis tools diff --git a/libCacheSim-python/export/CMakeLists.txt b/libCacheSim-python/export/CMakeLists.txt new file mode 100644 index 000000000..3a9928f80 --- /dev/null +++ b/libCacheSim-python/export/CMakeLists.txt @@ -0,0 +1,28 @@ +# Helper functions are removed since we don't export source files anymore + +set(EXPORT_FILE "${CMAKE_BINARY_DIR}/export_vars.cmake") +file(WRITE "${EXPORT_FILE}" "") + +# ============================================================================== +# Export project metadata +# ============================================================================== +file(APPEND "${EXPORT_FILE}" "set(LIBCACHESIM_VERSION \"${${PROJECT_NAME}_VERSION}\")\n") + +# ============================================================================== +# Export essential include directory variables +# ============================================================================== +foreach(var IN ITEMS GLib_INCLUDE_DIRS GLib_CONFIG_INCLUDE_DIR XGBOOST_INCLUDE_DIR LIGHTGBM_PATH ZSTD_INCLUDE_DIR) + file(APPEND "${EXPORT_FILE}" "set(${var} \"${${var}}\")\n") +endforeach() + +# ============================================================================== +# Export dependency library variables +# ============================================================================== +file(APPEND "${EXPORT_FILE}" "set(dependency_libs \"${dependency_libs}\")\n") + +# ============================================================================== +# Export essential build option variables +# ============================================================================== +file(APPEND "${EXPORT_FILE}" "set(LOG_LEVEL_LOWER \"${LOG_LEVEL_LOWER}\")\n") + +message(STATUS "Exported essential variables to ${EXPORT_FILE}") diff --git a/libCacheSim-python/export/README.md b/libCacheSim-python/export/README.md new file mode 100644 index 000000000..b3406c3d0 --- /dev/null +++ b/libCacheSim-python/export/README.md @@ -0,0 +1,85 @@ +# libCacheSim Python Binding Export + +This directory contains the export mechanism for sharing variables between the main libCacheSim project and the Python binding. + +## Overview + +The `export/CMakeLists.txt` file serves as a bridge between the main libCacheSim project and the Python binding, ensuring that all necessary variables (source files, include directories, compiler flags, etc.) are properly exported and can be imported by the Python binding's CMakeLists.txt. + +## How It Works + +### 1. Variable Export Process + +The export mechanism works in the following steps: + +1. **Path Conversion**: Converts relative source file paths to absolute paths using the `convert_to_absolute_paths` function +2. **Variable Collection**: Gathers all necessary variables from the main project +3. **File Generation**: Writes all variables to `export_vars.cmake` in the build directory +4. **Import**: The Python binding's CMakeLists.txt includes this file to access all variables + +### 2. Exported Variables + +The following categories of variables are exported: + +#### Source Files +- `ABS_cache_sources` - Cache-related source files +- `ABS_dataStructure_sources` - Data structure source files +- `ABS_traceReader_sources` - Trace reader source files +- `ABS_profiler_sources` - Profiler source files +- `ABS_utils_sources` - Utility source files +- `ABS_traceAnalyzer_sources` - Trace analyzer source files +- `ABS_mrcProfiler_sources` - MRC profiler source files + +#### Project Metadata +- `LIBCACHESIM_VERSION` - Version information + +#### Include Directories +- `libCacheSim_include_dir` - Main include directory +- `libCacheSim_binary_include_dir` - Binary include directory +- `GLib_INCLUDE_DIRS` - GLib include directories +- `XGBOOST_INCLUDE_DIR` - XGBoost include directory +- `LIGHTGBM_PATH` - LightGBM include directory +- `ZSTD_INCLUDE_DIR` - ZSTD include directory + +#### Dependencies +- `dependency_libs` - Dependency libraries + +#### Compiler Flags +- `LIBCACHESIM_C_FLAGS` - C compiler flags +- `LIBCACHESIM_CXX_FLAGS` - C++ compiler flags + +#### Build Options +- `USE_HUGEPAGE` - Hugepage usage +- `ENABLE_TESTS` - Test enablement +- `ENABLE_GLCACHE` - GLCache enablement +- `SUPPORT_TTL` - TTL support +- `OPT_SUPPORT_ZSTD_TRACE` - ZSTD trace support +- `ENABLE_LRB` - LRB enablement +- `ENABLE_3L_CACHE` - 3L Cache enablement +- `LOG_LEVEL_LOWER` - Log level + +## Usage + +### In Main Project + +The main project's CMakeLists.txt includes this export directory: + +```cmake +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export) +``` + +### In Python Binding + +The Python binding's CMakeLists.txt imports the exported variables: + +```cmake +set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") +set(EXPORT_FILE "${PARENT_BUILD_DIR}/export_vars.cmake") + +if(EXISTS "${EXPORT_FILE}") + include("${EXPORT_FILE}") + message(STATUS "Loaded variables from export_vars.cmake") +else() + message(FATAL_ERROR "export_vars.cmake not found") +endif() +``` \ No newline at end of file diff --git a/libCacheSim-python/libcachesim/__init__.py b/libCacheSim-python/libcachesim/__init__.py new file mode 100644 index 000000000..92e667fb7 --- /dev/null +++ b/libCacheSim-python/libcachesim/__init__.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from ._libcachesim import ( + Cache, + Reader, + Request, + __doc__, + __version__, + create_cache, + open_trace, +) +from .const import TraceType +from .eviction import ( + ARC, + FIFO, + LRB, + LRU, + S3FIFO, + Clock, + Sieve, + ThreeLCache, + TinyLFU, + TwoQ, +) + +__all__ = [ + "ARC", + "FIFO", + "LRB", + "LRU", + "S3FIFO", + "Cache", + "Clock", + "Reader", + "Request", + "Sieve", + "ThreeLCache", + "TinyLFU", + "TraceType", + "TwoQ", + "__doc__", + "__version__", + "create_cache", + "open_trace", + # TODO(haocheng): add more eviction policies +] diff --git a/libCacheSim-python/libcachesim/__init__.pyi b/libCacheSim-python/libcachesim/__init__.pyi new file mode 100644 index 000000000..2d4937f7f --- /dev/null +++ b/libCacheSim-python/libcachesim/__init__.pyi @@ -0,0 +1,148 @@ +""" +libCacheSim Python bindings +-------------------------- + +.. currentmodule:: libcachesim + +.. autosummary:: + :toctree: _generate + + create_cache + open_trace + ARC_init + Clock_init + FIFO_init + LRB_init + LRU_init + S3FIFO_init + Sieve_init + ThreeLCache_init + TinyLFU_init + TwoQ_init + Cache + Request + Reader + reader_init_param_t + TraceType +""" + +from .const import TraceType + +def create_cache( + eviction_algo: str, + cache_size: int, + eviction_params: str, + consider_obj_metadata: bool +) -> Cache: ... + + +def open_trace( + trace_path: str, + type: TraceType, + reader_init_param: dict | reader_init_param_t | None = None +) -> Reader: ... + + +def FIFO_init(cache_size: int) -> Cache: + """ + Create a FIFO cache instance. + """ + + +def ARC_init(cache_size: int) -> Cache: + """ + Create a ARC cache instance. + """ + + +def Clock_init(cache_size: int, n_bit_counter: int = 1, init_freq: int = 0) -> Cache: + """ + Create a Clock cache instance. + """ + + +def LRB_init(cache_size: int, objective: str = "byte-miss-ratio") -> Cache: + """ + Create a LRB cache instance. + """ + + +def LRU_init(cache_size: int) -> Cache: + """ + Create a LRU cache instance. + """ + + +def S3FIFO_init( + cache_size: int, + fifo_size_ratio: float = 0.10, + ghost_size_ratio: float = 0.90, + move_to_main_threshold: int = 2 +) -> Cache: + """ + Create a S3FIFO cache instance. + """ + + +def Sieve_init(cache_size: int) -> Cache: + """ + Create a Sieve cache instance. + """ + + +def ThreeLCache_init(cache_size: int, objective: str = "byte-miss-ratio") -> Cache: + """ + Create a ThreeLCache cache instance. + """ + + +def TinyLFU_init( + cache_size: int, + main_cache: str = "SLRU", + window_size: float = 0.01 +) -> Cache: + """ + Create a TinyLFU cache instance. + """ + + +def TwoQ_init( + cache_size: int, + Ain_size_ratio: float = 0.25, + Aout_size_ratio: float = 0.5 +) -> Cache: + """ + Create a TwoQ cache instance. + """ + +class reader_init_param_t: + time_field: int + obj_id_field: int + obj_size_field: int + delimiter: str + has_header: bool + + +class Cache: + n_req: int + n_obj: int + occupied_byte: int + cache_size: int + def get(self, req: Request) -> bool: ... + + +class Request: + clock_time: int + hv: int + obj_id: int + obj_size: int + + +class Reader: + n_read_req: int + n_total_req: int + trace_path: str + file_size: int + def get_wss(self, ignore_obj_size: bool = False) -> int: ... + def __iter__(self) -> Reader: ... + def __next__(self) -> Request: ... diff --git a/libCacheSim-python/libcachesim/const.py b/libCacheSim-python/libcachesim/const.py new file mode 100644 index 000000000..9276d2447 --- /dev/null +++ b/libCacheSim-python/libcachesim/const.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +import enum + + +class TraceType(enum.Enum): + CSV_TRACE = 0 + BIN_TRACE = 1 + PLAIN_TXT_TRACE = 2 + ORACLE_GENERAL_TRACE = 3 + LCS_TRACE = 4 # libCacheSim format diff --git a/libCacheSim-python/libcachesim/eviction.py b/libCacheSim-python/libcachesim/eviction.py new file mode 100644 index 000000000..a5beae3e9 --- /dev/null +++ b/libCacheSim-python/libcachesim/eviction.py @@ -0,0 +1,295 @@ +"""Registry of eviction policies.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod + +from ._libcachesim import ( + ARC_init, + Cache, + Clock_init, + FIFO_init, + LRB_init, + LRU_init, + Request, + S3FIFO_init, + Sieve_init, + ThreeLCache_init, + TinyLFU_init, + TwoQ_init, +) + + +class EvictionPolicyBase(ABC): + """Abstract base class for all eviction policies.""" + @abstractmethod + def get(self, req: Request) -> bool: + pass + + @abstractmethod + def __repr__(self) -> str: + pass + + +class EvictionPolicy(EvictionPolicyBase): + """Base class for all eviction policies.""" + def __init__(self, cache_size: int, **kwargs) -> None: + self.cache: Cache = self.init_cache(cache_size, **kwargs) + + @abstractmethod + def init_cache(self, cache_size: int, **kwargs) -> Cache: + pass + + def get(self, req: Request) -> bool: + return self.cache.get(req) + + def __repr__(self): + return f"{self.__class__.__name__}(cache_size={self.cache.cache_size})" + + +class FIFO(EvictionPolicy): + """First In First Out replacement policy. + + Args: + cache_size: Size of the cache + """ + def init_cache(self, cache_size: int, **kwargs) -> Cache: # noqa: ARG002 + return FIFO_init(cache_size) + + +class Clock(EvictionPolicy): + """Clock (Second Chance or FIFO-Reinsertion) replacement policy. + + Args: + cache_size: Size of the cache + n_bit_counter: Number of bits for counter (default: 1) + init_freq: Initial frequency value (default: 0) + """ + def __init__(self, cache_size: int, n_bit_counter: int = 1, init_freq: int = 0): + super().__init__(cache_size, n_bit_counter=n_bit_counter, init_freq=init_freq) + + def init_cache(self, cache_size: int, **kwargs): + init_freq = kwargs.get('init_freq', 0) + n_bit_counter = kwargs.get('n_bit_counter', 1) + + if n_bit_counter < 1 or n_bit_counter > 32: + msg = "n_bit_counter must be between 1 and 32" + raise ValueError(msg) + if init_freq < 0 or init_freq > 2**n_bit_counter - 1: + msg = "init_freq must be between 0 and 2^n_bit_counter - 1" + raise ValueError(msg) + + self.init_freq = init_freq + self.n_bit_counter = n_bit_counter + + return Clock_init(cache_size, n_bit_counter, init_freq) + + def __repr__(self): + return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ + f"n_bit_counter={self.n_bit_counter}, " \ + f"init_freq={self.init_freq})" + + +class TwoQ(EvictionPolicy): + """2Q replacement policy. + + 2Q has three queues: Ain, Aout, Am. When a obj hits in Aout, it will be + inserted into Am otherwise it will be inserted into Ain. + + Args: + cache_size: Total size of the cache + ain_size_ratio: Size ratio for Ain queue (default: 0.25) + aout_size_ratio: Size ratio for Aout queue (default: 0.5) + """ + def __init__(self, cache_size: int, ain_size_ratio: float = 0.25, aout_size_ratio: float = 0.5): + super().__init__(cache_size, ain_size_ratio=ain_size_ratio, aout_size_ratio=aout_size_ratio) + + def init_cache(self, cache_size: int, **kwargs): + ain_size_ratio = kwargs.get('ain_size_ratio', 0.25) + aout_size_ratio = kwargs.get('aout_size_ratio', 0.5) + + if ain_size_ratio <= 0 or aout_size_ratio <= 0: + msg = "ain_size_ratio and aout_size_ratio must be greater than 0" + raise ValueError(msg) + + self.ain_size_ratio = ain_size_ratio + self.aout_size_ratio = aout_size_ratio + + return TwoQ_init(cache_size, ain_size_ratio, aout_size_ratio) + + def __repr__(self): + return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ + f"ain_size_ratio={self.ain_size_ratio}, " \ + f"aout_size_ratio={self.aout_size_ratio})" + + +class LRB(EvictionPolicy): + """LRB (Learning Relaxed Belady) replacement policy. + + LRB is a learning-based replacement policy that uses a neural network to + predict the future access patterns of the cache, randomly select one obj + outside the Belady boundary to evict. + + Args: + cache_size: Size of the cache + objective: Objective function to optimize (default: "byte-miss-ratio") + """ + def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"): + super().__init__(cache_size, objective=objective) + + def init_cache(self, cache_size: int, **kwargs) -> Cache: + objective = kwargs.get('objective', "byte-miss-ratio") + + if objective not in ["byte-miss-ratio", "byte-hit-ratio"]: + msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'" + raise ValueError(msg) + + self.objective = objective + + return LRB_init(cache_size, objective) + + def __repr__(self): + return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ + f"objective={self.objective})" + + +class LRU(EvictionPolicy): + """Least Recently Used replacement policy. + + Args: + cache_size: Size of the cache + """ + def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 + return LRU_init(cache_size) + + +class ARC(EvictionPolicy): + """Adaptive Replacement Cache policy. + + ARC is a two-tiered cache with two LRU caches (T1 and T2) and two ghost + lists (B1 and B2). T1 records the obj accessed only once, T2 records + the obj accessed more than once. ARC has an internal parameter `p` to + learn and dynamically control the size of T1 and T2. + + Args: + cache_size: Size of the cache + """ + def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 + return ARC_init(cache_size) + + +class S3FIFO(EvictionPolicy): + """S3FIFO replacement policy. + + S3FIFO consists of three FIFO queues: Small, Main, and Ghost. Small + queue gets the obj and records the freq. + When small queue is full, if the obj to evict satisfies the threshold, + it will be moved to main queue. Otherwise, it will be evicted from small + queue and inserted into ghost queue. + When main queue is full, the obj to evict will be evicted and reinserted + like Clock. + If obj hits in the ghost queue, it will be moved to main queue. + + Args: + cache_size: Size of the cache + fifo_size_ratio: Size ratio for FIFO queue (default: 0.1) + ghost_size_ratio: Size ratio for ghost queue (default: 0.9) + move_to_main_threshold: Threshold for moving obj from ghost to main (default: 2) + """ + def __init__(self, cache_size: int, fifo_size_ratio: float = 0.1, + ghost_size_ratio: float = 0.9, move_to_main_threshold: int = 2): + super().__init__(cache_size, fifo_size_ratio=fifo_size_ratio, + ghost_size_ratio=ghost_size_ratio, + move_to_main_threshold=move_to_main_threshold) + + def init_cache(self, cache_size: int, **kwargs): + fifo_size_ratio = kwargs.get('fifo_size_ratio', 0.1) + ghost_size_ratio = kwargs.get('ghost_size_ratio', 0.9) + move_to_main_threshold = kwargs.get('move_to_main_threshold', 2) + + if fifo_size_ratio <= 0 or ghost_size_ratio <= 0: + msg = "fifo_size_ratio and ghost_size_ratio must be greater than 0" + raise ValueError(msg) + if move_to_main_threshold < 0: + msg = "move_to_main_threshold must be greater or equal to 0" + raise ValueError(msg) + + self.fifo_size_ratio = fifo_size_ratio + self.ghost_size_ratio = ghost_size_ratio + self.move_to_main_threshold = move_to_main_threshold + + return S3FIFO_init(cache_size, fifo_size_ratio, ghost_size_ratio, move_to_main_threshold) + + def __repr__(self): + return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ + f"fifo_size_ratio={self.fifo_size_ratio}, " \ + f"ghost_size_ratio={self.ghost_size_ratio}, " \ + f"move_to_main_threshold={self.move_to_main_threshold})" + + +class Sieve(EvictionPolicy): + """Sieve replacement policy. + + FIFO-Reinsertion with check pointer. + + Args: + cache_size: Size of the cache + """ + def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 + return Sieve_init(cache_size) + + +class ThreeLCache(EvictionPolicy): + """3L-Cache replacement policy. + + Args: + cache_size: Size of the cache + objective: Objective function to optimize (default: "byte-miss-ratio") + """ + def __init__(self, cache_size: int, objective: str = "byte-miss-ratio"): + super().__init__(cache_size, objective=objective) + + def init_cache(self, cache_size: int, **kwargs): + objective = kwargs.get('objective', "byte-miss-ratio") + + if objective not in ["byte-miss-ratio", "byte-hit-ratio"]: + msg = "objective must be either 'byte-miss-ratio' or 'byte-hit-ratio'" + raise ValueError(msg) + + self.objective = objective + + return ThreeLCache_init(cache_size, objective) + + def __repr__(self): + return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ + f"objective={self.objective})" + + +class TinyLFU(EvictionPolicy): + """TinyLFU replacement policy. + + Args: + cache_size: Size of the cache + main_cache: Main cache to use (default: "SLRU") + window_size: Window size for TinyLFU (default: 0.01) + """ + def __init__(self, cache_size: int, main_cache: str = "SLRU", window_size: float = 0.01): + super().__init__(cache_size, main_cache=main_cache, window_size=window_size) + + def init_cache(self, cache_size: int, **kwargs): + main_cache = kwargs.get('main_cache', "SLRU") + window_size = kwargs.get('window_size', 0.01) + + if window_size <= 0: + msg = "window_size must be greater than 0" + raise ValueError(msg) + + self.main_cache = main_cache + self.window_size = window_size + + return TinyLFU_init(cache_size, main_cache, window_size) + + def __repr__(self): + return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ + f"main_cache={self.main_cache}, " \ + f"window_size={self.window_size})" \ No newline at end of file diff --git a/libCacheSim-python/pyproject.toml b/libCacheSim-python/pyproject.toml new file mode 100644 index 000000000..753aca946 --- /dev/null +++ b/libCacheSim-python/pyproject.toml @@ -0,0 +1,73 @@ +[build-system] +requires = ["scikit-build-core>=0.10", "pybind11"] +build-backend = "scikit_build_core.build" + + +[project] +name = "libcachesim" +version = "0.0.1" +description="Python bindings for libCacheSim" +readme = "README.md" +requires-python = ">=3.9" + +[project.optional-dependencies] +test = ["pytest"] + + +[tool.scikit-build] +wheel.expand-macos-universal-tags = true +minimum-version = "build-system.requires" +cmake.args = ["-G", "Ninja"] + +[tool.pytest.ini_options] +minversion = "8.0" +addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] +xfail_strict = true +log_cli_level = "INFO" +filterwarnings = [ + "error", + "ignore::pytest.PytestCacheWarning", +] +testpaths = ["tests"] + + +[tool.cibuildwheel] +build-frontend = "build[uv]" +test-command = "pytest {project}/tests" +test-extras = ["test"] + +[tool.cibuildwheel.pyodide] +build-frontend = {name = "build", args = ["--exports", "whole_archive"]} + +[tool.ruff.lint] +extend-select = [ + "B", # flake8-bugbear + "I", # isort + "ARG", # flake8-unused-arguments + "C4", # flake8-comprehensions + "EM", # flake8-errmsg + "ICN", # flake8-import-conventions + "G", # flake8-logging-format + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PL", # pylint + "PT", # flake8-pytest-style + "PTH", # flake8-use-pathlib + "RET", # flake8-return + "RUF", # Ruff-specific + "SIM", # flake8-simplify + "T20", # flake8-print + "UP", # pyupgrade + "YTT", # flake8-2020 + "EXE", # flake8-executable + "NPY", # NumPy specific rules + "PD", # pandas-vet +] +ignore = [ + "PLR09", # Too many X + "PLR2004", # Magic comparison +] +isort.required-imports = ["from __future__ import annotations"] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["T20"] \ No newline at end of file diff --git a/libCacheSim-python/requirements.txt b/libCacheSim-python/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp new file mode 100644 index 000000000..b40bf3b20 --- /dev/null +++ b/libCacheSim-python/src/pylibcachesim.cpp @@ -0,0 +1,566 @@ +#include + +#include + +#include "config.h" +#include "libCacheSim/cache.h" +#include "libCacheSim/cacheObj.h" +#include "libCacheSim/const.h" +#include "libCacheSim/enum.h" +#include "libCacheSim/logging.h" +#include "libCacheSim/macro.h" +#include "libCacheSim/reader.h" +#include "libCacheSim/request.h" +#include "libCacheSim/sampling.h" +#include "mystr.h" + +/* admission */ +#include "libCacheSim/admissionAlgo.h" + +/* eviction */ +#include "libCacheSim/evictionAlgo.h" + +/* sampling */ +#include "libCacheSim/sampling.h" + +/* cache simulator */ +#include "libCacheSim/profilerLRU.h" +#include "libCacheSim/simulator.h" + +/* bin */ +#include "cachesim/cache_init.h" +#include "cli_reader_utils.h" + +#define STRINGIFY(x) #x +#define MACRO_STRINGIFY(x) STRINGIFY(x) + +struct CacheDeleter { + void operator()(cache_t* ptr) const { + if (ptr != nullptr) ptr->cache_free(ptr); + } +}; + +struct RequestDeleter { + void operator()(request_t* ptr) const { + if (ptr != nullptr) free_request(ptr); + } +}; + +struct ReaderDeleter { + void operator()(reader_t* ptr) const { + if (ptr != nullptr) close_trace(ptr); + } +}; + +namespace py = pybind11; + +PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) + m.doc() = R"pbdoc( + libCacheSim Python bindings + -------------------------- + + .. currentmodule:: libcachesim + + .. autosummary:: + :toctree: _generate + + TODO(haocheng): add meaningful methods + )pbdoc"; + + py::enum_(m, "TraceType") + .value("CSV_TRACE", trace_type_e::CSV_TRACE) + .value("PLAIN_TXT_TRACE", trace_type_e::PLAIN_TXT_TRACE) + .value("BIN_TRACE", trace_type_e::BIN_TRACE) + .value("VSCSI_TRACE", trace_type_e::VSCSI_TRACE) + .export_values(); + + // *************** structs *************** + /** + * @brief Cache structure + */ + py::class_>(m, "Cache") + .def_readwrite("n_req", &cache_t::n_req) + .def_readwrite("n_obj", &cache_t::n_obj) + .def_readwrite("occupied_byte", &cache_t::occupied_byte) + .def_readwrite("cache_size", &cache_t::cache_size) + // methods + .def("get", [](cache_t& self, const request_t& req) { + return self.get(&self, &req); + }); + + /** + * @brief Request structure + */ + py::class_>(m, + "Request") + .def(py::init([]() { return new_request(); })) + .def_readwrite("clock_time", &request_t::clock_time) + .def_readwrite("hv", &request_t::hv) + .def_readwrite("obj_id", &request_t::obj_id) + .def_readwrite("obj_size", &request_t::obj_size) + .def_readwrite("op", &request_t::op); + + /** + * @brief Reader structure + */ + py::class_>(m, "Reader") + .def_readwrite("n_read_req", &reader_t::n_read_req) + .def_readwrite("n_total_req", &reader_t::n_total_req) + .def_readwrite("trace_path", &reader_t::trace_path) + .def_readwrite("file_size", &reader_t::file_size) + // methods + .def( + "get_wss", + [](reader_t& self, bool ignore_obj_size) { + int64_t wss_obj = 0, wss_byte = 0; + cal_working_set_size(&self, &wss_obj, &wss_byte); + return ignore_obj_size ? wss_obj : wss_byte; + }, + py::arg("ignore_obj_size") = false, + R"pbdoc( + Get the working set size of the trace. + + Args: + ignore_obj_size (bool): Whether to ignore the object size. + + Returns: + int: The working set size of the trace. + )pbdoc") + .def("__iter__", [](reader_t& self) -> reader_t& { return self; }) + .def("__next__", [](reader_t& self) { + auto req = std::unique_ptr(new_request()); + int ret = read_one_req(&self, req.get()); + if (ret != 0) { + throw py::stop_iteration(); + } + // std::cout << "Read request: " << req->obj_id + // << ", size: " << req->obj_size << std::endl; + return req; + }); + + py::class_(m, "reader_init_param_t") + .def(py::init<>()) + .def_readwrite("time_field", &reader_init_param_t::time_field) + .def_readwrite("obj_id_field", &reader_init_param_t::obj_id_field) + .def_readwrite("obj_size_field", &reader_init_param_t::obj_size_field) + .def_readwrite("delimiter", &reader_init_param_t::delimiter) + .def_readwrite("has_header", &reader_init_param_t::has_header) + .def_property( + "binary_fmt_str", + // Getter: C char* to Python string (returns copy) + [](const reader_init_param_t& self) { + return self.binary_fmt_str ? std::string(self.binary_fmt_str) : ""; + }, + // Setter: Python string to C char* (handles deep copy and old memory) + [](reader_init_param_t& self, const std::string& value) { + // Free existing memory if any + if (self.binary_fmt_str != nullptr) { + free(self.binary_fmt_str); // Use free() since it was + // strdup'd/malloc'd + } + // Deep copy the new string + self.binary_fmt_str = strdup(value.c_str()); + if (self.binary_fmt_str == nullptr && !value.empty()) { + throw std::runtime_error( + "Failed to allocate memory for binary_fmt_str"); + } + }); + + // *************** functions *************** + /** + * @brief Open a trace file for reading + */ + m.def( + "open_trace", + [](const std::string& trace_path, int type, const py::object& params) { + // Create an init_param instance, it will be populated from Python + reader_init_param_t init_param = {}; + + // === IMPORTANT: Initialize binary_fmt_str to nullptr === + // This is crucial if it's not always set from Python, + // so that free() won't be called on uninitialized memory if not set + // later. + init_param.binary_fmt_str = nullptr; + + // Populate other fields from Python dict or object + if (py::isinstance(params)) { + py::dict dict_params = params.cast(); + init_param.time_field = dict_params["time_field"].cast(); + init_param.obj_id_field = dict_params["obj_id_field"].cast(); + init_param.obj_size_field = dict_params["obj_size_field"].cast(); + init_param.delimiter = + dict_params["delimiter"].cast()[0]; + init_param.has_header = dict_params["has_header"].cast(); + // If binary_fmt_str is in dict_params, set it via property setter + if (dict_params.contains("binary_fmt_str") && + !dict_params["binary_fmt_str"].is_none()) { + std::string bfs_val = + dict_params["binary_fmt_str"].cast(); + if (init_param.binary_fmt_str != nullptr) + free(init_param.binary_fmt_str); + init_param.binary_fmt_str = strdup(bfs_val.c_str()); + if (init_param.binary_fmt_str == nullptr && !bfs_val.empty()) { + throw std::runtime_error( + "Failed to allocate memory for binary_fmt_str from dict"); + } + } + } else if (!params.is_none()) { + // If using a reader_init_param_t object from Python, its members are + // already set via def_property (No need to copy here, just ensure + // it's reader_init_param_t object) If `params` is a + // `reader_init_param_t` object, Pybind11 will pass its fields + // directly We need to ensure that the `binary_fmt_str` member of + // `params` is correctly handled. The direct `getattr` below is for + // other fields, for binary_fmt_str, the `def_property` takes care. + init_param.time_field = py::getattr(params, "time_field").cast(); + init_param.obj_id_field = + py::getattr(params, "obj_id_field").cast(); + init_param.obj_size_field = + py::getattr(params, "obj_size_field").cast(); + init_param.delimiter = + py::getattr(params, "delimiter").cast()[0]; + init_param.has_header = + py::getattr(params, "has_header").cast(); + // Handle binary_fmt_str if it's set on the Python object + if (py::hasattr(params, "binary_fmt_str") && + !py::getattr(params, "binary_fmt_str").is_none()) { + std::string bfs_val = + py::getattr(params, "binary_fmt_str").cast(); + if (init_param.binary_fmt_str != nullptr) + free(init_param.binary_fmt_str); + init_param.binary_fmt_str = strdup(bfs_val.c_str()); + if (init_param.binary_fmt_str == nullptr && !bfs_val.empty()) { + throw std::runtime_error( + "Failed to allocate memory for binary_fmt_str from object"); + } + } + } + // ... (rest of open_trace function) ... + reader_t* ptr = open_trace( + trace_path.c_str(), static_cast(type), &init_param); + return std::unique_ptr(ptr); + }, + py::arg("trace_path"), py::arg("type"), + py::arg("reader_init_param") = py::none(), + R"pbdoc( + Open a trace file for reading. + + Args: + trace_path (str): Path to the trace file. + type (int): Type of the trace (e.g., CSV_TRACE). + reader_init_param (Union[dict, reader_init_param_t, None]): Initialization parameters for the reader. + + Returns: + Reader: A new reader instance for the trace. + )pbdoc"); + + /** + * @brief Generic function to create a cache instance. + */ + m.def( + "create_cache", + [](const std::string& eviction_algo, const uint64_t cache_size, + const std::string& eviction_params, + bool consider_obj_metadata) { return nullptr; }, + py::arg("eviction_algo"), py::arg("cache_size"), + py::arg("eviction_params"), py::arg("consider_obj_metadata"), + R"pbdoc( + Create a cache instance. + + Args: + eviction_algo (str): Eviction algorithm to use (e.g., "LRU", "FIFO", "Random"). + cache_size (int): Size of the cache in bytes. + eviction_params (str): Additional parameters for the eviction algorithm. + consider_obj_metadata (bool): Whether to consider object metadata in eviction decisions. + + Returns: + Cache: A new cache instance. + )pbdoc"); + + /* TODO(haocheng): should we support all parameters in the + * common_cache_params_t? (hash_power, etc.) */ + + // Currently supported eviction algorithms with direct initialization: + // - "ARC" + // - "Clock" + // - "FIFO" + // - "LRB" + // - "LRU" + // - "S3FIFO" + // - "Sieve" + // - "ThreeLCache" + // - "TinyLFU" + // - "TwoQ" + + /** + * @brief Create a ARC cache instance. + */ + m.def( + "ARC_init", + [](uint64_t cache_size) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = ARC_init(cc_params, nullptr); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), + R"pbdoc( + Create a ARC cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + )pbdoc"); + + /** + * @brief Create a Clock cache instance. + */ + m.def( + "Clock_init", + [](uint64_t cache_size, long int n_bit_counter, long int init_freq) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + // assemble the cache specific parameters + std::string cache_specific_params = + "n-bit-counter=" + std::to_string(n_bit_counter) + "," + + "init-freq=" + std::to_string(init_freq); + + cache_t* ptr = Clock_init(cc_params, cache_specific_params.c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("n_bit_counter") = 1, + py::arg("init_freq") = 0, + R"pbdoc( + Create a Clock cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + n_bit_counter (int): Number of bits for counter (default: 1). + init_freq (int): Initial frequency value (default: 0). + + Returns: + Cache: A new Clock cache instance. + )pbdoc"); + + /** + * @brief Create a FIFO cache instance. + */ + m.def( + "FIFO_init", + [](uint64_t cache_size) { + // Construct common cache parameters + common_cache_params_t cc_params = {.cache_size = cache_size}; + // FIFO no specific parameters, so we pass nullptr + cache_t* ptr = FIFO_init(cc_params, nullptr); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), + R"pbdoc( + Create a FIFO cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + + Returns: + Cache: A new FIFO cache instance. + )pbdoc"); + +#ifdef ENABLE_LRB + /** + * @brief Create a LRB cache instance. + */ + m.def( + "LRB_init", + [](uint64_t cache_size, std::string objective) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = LRB_init(cc_params, ("objective=" + objective).c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio", + R"pbdoc( + Create a LRB cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + objective (str): Objective function to optimize (default: "byte-miss-ratio"). + + Returns: + Cache: A new LRB cache instance. + )pbdoc"); +#else + // TODO(haocheng): add a dummy function to avoid the error when LRB is not + // enabled + m.def( + "LRB_init", + [](uint64_t cache_size, std::string objective) { + throw std::runtime_error("LRB is not enabled"); + }, + py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio"); +#endif + + /** + * @brief Create a LRU cache instance. + */ + m.def( + "LRU_init", + [](uint64_t cache_size) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = LRU_init(cc_params, nullptr); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), + R"pbdoc( + Create a LRU cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + + Returns: + Cache: A new LRU cache instance. + )pbdoc"); + + /** + * @brief Create a S3FIFO cache instance. + */ + m.def( + "S3FIFO_init", + [](uint64_t cache_size, double fifo_size_ratio, double ghost_size_ratio, + int move_to_main_threshold) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = S3FIFO_init( + cc_params, + ("fifo-size-ratio=" + std::to_string(fifo_size_ratio) + "," + + "ghost-size-ratio=" + std::to_string(ghost_size_ratio) + "," + + "move-to-main-threshold=" + std::to_string(move_to_main_threshold)) + .c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("fifo_size_ratio") = 0.10, + py::arg("ghost_size_ratio") = 0.90, py::arg("move_to_main_threshold") = 2, + R"pbdoc( + Create a S3FIFO cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + fifo_size_ratio (float): Ratio of FIFO size to cache size (default: 0.10). + ghost_size_ratio (float): Ratio of ghost size to cache size (default: 0.90). + move_to_main_threshold (int): Threshold for moving to main queue (default: 2). + + Returns: + Cache: A new S3FIFO cache instance. + )pbdoc"); + + /** + * @brief Create a Sieve cache instance. + */ + m.def( + "Sieve_init", + [](uint64_t cache_size) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = Sieve_init(cc_params, nullptr); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), + R"pbdoc( + Create a Sieve cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + + Returns: + Cache: A new Sieve cache instance. + )pbdoc"); + +#ifdef ENABLE_3L_CACHE + /** + * @brief Create a ThreeL cache instance. + */ + m.def( + "ThreeLCache_init", + [](uint64_t cache_size, std::string objective) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = + ThreeLCache_init(cc_params, ("objective=" + objective).c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio", + R"pbdoc( + Create a ThreeL cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + objective (str): Objective function to optimize (default: "byte-miss-ratio"). + + Returns: + Cache: A new ThreeL cache instance. + )pbdoc"); +#else + // TODO(haocheng): add a dummy function to avoid the error when ThreeLCache is + // not enabled + m.def( + "ThreeLCache_init", + [](uint64_t cache_size, std::string objective) { + throw std::runtime_error("ThreeLCache is not enabled"); + }, + py::arg("cache_size"), py::arg("objective") = "byte-miss-ratio"); +#endif + + /** + * @brief Create a TinyLFU cache instance. + */ + // mark evivtion parsing need change + m.def( + "TinyLFU_init", + [](uint64_t cache_size, std::string main_cache, double window_size) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = WTinyLFU_init( + cc_params, ("main-cache=" + main_cache + "," + + "window-size=" + std::to_string(window_size)) + .c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("main_cache") = "SLRU", + py::arg("window_size") = 0.01, + R"pbdoc( + Create a TinyLFU cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + main_cache (str): Main cache to use (default: "SLRU"). + window_size (float): Window size for TinyLFU (default: 0.01). + + Returns: + Cache: A new TinyLFU cache instance. + )pbdoc"); + + /** + * @brief Create a TwoQ cache instance. + */ + m.def( + "TwoQ_init", + [](uint64_t cache_size, double Ain_size_ratio, double Aout_size_ratio) { + common_cache_params_t cc_params = {.cache_size = cache_size}; + cache_t* ptr = TwoQ_init( + cc_params, + ("Ain-size-ratio=" + std::to_string(Ain_size_ratio) + "," + + "Aout-size-ratio=" + std::to_string(Aout_size_ratio)) + .c_str()); + return std::unique_ptr(ptr); + }, + py::arg("cache_size"), py::arg("Ain_size_ratio") = 0.25, + py::arg("Aout_size_ratio") = 0.5, + R"pbdoc( + Create a TwoQ cache instance. + + Args: + cache_size (int): Size of the cache in bytes. + Ain_size_ratio (float): Ratio of A-in size to cache size (default: 0.25). + Aout_size_ratio (float): Ratio of A-out size to cache size (default: 0.5). + + Returns: + Cache: A new TwoQ cache instance. + )pbdoc"); + +#ifdef VERSION_INFO + m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); +#else + m.attr("__version__") = "dev"; +#endif +} diff --git a/libCacheSim-python/tests/conftest.py b/libCacheSim-python/tests/conftest.py new file mode 100644 index 000000000..2ea1ade15 --- /dev/null +++ b/libCacheSim-python/tests/conftest.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +import os +import gc + +import pytest + +from libcachesim import Reader, TraceType, open_trace + + +@pytest.fixture +def mock_reader(): + data_file = os.path.join( # noqa: PTH118 + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), # noqa: PTH120 + "data", + "cloudPhysicsIO.oracleGeneral.bin" + ) + reader: Reader = open_trace( + data_file, + type=TraceType.ORACLE_GENERAL_TRACE.value, + ) + try: + yield reader + finally: + # More careful cleanup + try: + if hasattr(reader, 'close'): + reader.close() + except: + pass + # Don't explicitly del reader here, let Python handle it + gc.collect() diff --git a/libCacheSim-python/tests/pytest.ini b/libCacheSim-python/tests/pytest.ini new file mode 100644 index 000000000..561da0177 --- /dev/null +++ b/libCacheSim-python/tests/pytest.ini @@ -0,0 +1,9 @@ +[pytest] +addopts = -ra --strict-markers -m "not optional" + +markers = + optional: mark test as optional + +python_files = test.py test_*.py *_test.py +python_classes = Test* +python_functions = test_* \ No newline at end of file diff --git a/libCacheSim-python/tests/reference.csv b/libCacheSim-python/tests/reference.csv new file mode 100644 index 000000000..cb569d0c9 --- /dev/null +++ b/libCacheSim-python/tests/reference.csv @@ -0,0 +1,20 @@ +FIFO,0.01,0.8368 +ARC,0.01,0.8222 +Clock,0.01,0.8328 +LRB,0.01,0.8339 +LRU,0.01,0.8339 +S3FIFO,0.01,0.8235 +Sieve,0.01,0.8231 +3LCache,0.01,0.8339 +TinyLFU,0.01,0.8262 +TwoQ,0.01,0.8276 +FIFO,0.1,0.8075 +ARC,0.1,0.7688 +Clock,0.1,0.8086 +LRB,0.1,0.8097 +LRU,0.1,0.8097 +S3FIFO,0.1,0.7542 +Sieve,0.1,0.7903 +3LCache,0.1,0.8097 +TinyLFU,0.1,0.7666 +TwoQ,0.1,0.7695 diff --git a/libCacheSim-python/tests/test_eviction.py b/libCacheSim-python/tests/test_eviction.py new file mode 100644 index 000000000..ef896b2ca --- /dev/null +++ b/libCacheSim-python/tests/test_eviction.py @@ -0,0 +1,94 @@ +import pytest +import gc +import sys +import os + +from libcachesim import ( + ARC, + FIFO, + LRU, + S3FIFO, + Clock, + Sieve, + TinyLFU, + TwoQ, + create_cache, +) +from tests.utils import get_reference_data + + +@pytest.mark.parametrize("eviction_algo", [ + FIFO, + ARC, + # Clock, + # LRU, + # S3FIFO, + # Sieve, + # TinyLFU, + # TwoQ, +]) +@pytest.mark.parametrize("cache_size_ratio", [0.01]) +def test_eviction_algo(eviction_algo, cache_size_ratio, mock_reader): + cache = None + try: + # create a cache with the eviction policy + cache = eviction_algo(cache_size=int(mock_reader.get_wss()*cache_size_ratio)) + req_count = 0 + miss_count = 0 + + # Limit the number of requests to avoid long test times + # max_requests = 1000 + for i, req in enumerate(mock_reader): + # if i >= max_requests: + # break + hit = cache.get(req) + if not hit: + miss_count += 1 + req_count += 1 + + if req_count == 0: + pytest.skip("No requests processed") + + miss_ratio = miss_count / req_count + reference_miss_ratio = get_reference_data(eviction_algo.__name__, cache_size_ratio) + if reference_miss_ratio is None: + pytest.skip(f"No reference data for {eviction_algo.__name__} with cache size ratio {cache_size_ratio}") + assert abs(miss_ratio - reference_miss_ratio) < 0.01, f"Miss ratio {miss_ratio} is not close to reference {reference_miss_ratio}" + + except Exception as e: + print(f"Error in test_eviction_algo: {e}") + raise + finally: + pass + + +# @pytest.mark.parametrize("eviction_algo", [ +# "FIFO", +# "ARC", +# "Clock", +# "LRU", +# "S3FIFO", +# "Sieve", +# "TinyLFU", +# "TwoQ", +# ]) +# @pytest.mark.parametrize("cache_size_ratio", [0.01, 0.1]) +# def test_eviction_algo_generic(eviction_algo, cache_size_ratio, mock_reader): +# cache = create_cache(eviction_algo=eviction_algo, +# cache_size=int(mock_reader.get_wss()*cache_size_ratio), +# eviction_params="", +# consider_obj_metadata=False) +# req_count = 0 +# miss_count = 0 +# for req in mock_reader: +# hit = cache.get(req) +# if not hit: +# miss_count += 1 +# req_count += 1 + +# miss_ratio = miss_count / req_count +# print("Check eviction algo: ", eviction_algo, "with cache size ratio: ", cache_size_ratio) +# reference_miss_ratio = get_reference_data(eviction_algo, cache_size_ratio) +# if reference_miss_ratio is None: +# pytest.skip(f"No reference data for {eviction_algo} with cache size ratio {cache_size_ratio}") +# assert abs(miss_ratio - reference_miss_ratio) < 0.01, f"Miss ratio {miss_ratio} is not close to reference {reference_miss_ratio}" diff --git a/libCacheSim-python/tests/utils.py b/libCacheSim-python/tests/utils.py new file mode 100644 index 000000000..632fdc3f8 --- /dev/null +++ b/libCacheSim-python/tests/utils.py @@ -0,0 +1,17 @@ +import os + + +def get_reference_data(eviction_algo, cache_size_ratio): + data_file = os.path.join( # noqa: PTH118 + (os.path.dirname(os.path.dirname(__file__))), # noqa: PTH120 + "tests", + "reference.csv" + ) + with open(data_file, "r") as f: # noqa: PTH123 + lines = f.readlines() + for line in lines: + if eviction_algo == "ThreeLCache": + eviction_algo = "3LCache" + if line.startswith(f"{eviction_algo},{cache_size_ratio}"): + return float(line.split(",")[-1]) + return None \ No newline at end of file diff --git a/scripts/install_python.sh b/scripts/install_python.sh new file mode 100644 index 000000000..f581c48cb --- /dev/null +++ b/scripts/install_python.sh @@ -0,0 +1,9 @@ +rm -rf ./build +cmake -G Ninja -B build +pushd libCacheSim-python +pip install -e . -vvv +popd +python -c "import libCacheSim" +pushd libCacheSim-python +pytest . +popd \ No newline at end of file diff --git a/scripts/sync_node_version.py b/scripts/sync_node_version.py index d45a391a2..7497b1c00 100755 --- a/scripts/sync_node_version.py +++ b/scripts/sync_node_version.py @@ -22,18 +22,18 @@ def read_main_version(): """Read version from version.txt.""" project_root = get_project_root() version_file = project_root / "version.txt" - + if not version_file.exists(): print(f"Error: {version_file} not found", file=sys.stderr) sys.exit(1) - + with open(version_file, 'r') as f: version = f.read().strip() - + if not version: print("Error: version.txt is empty", file=sys.stderr) sys.exit(1) - + return version @@ -41,29 +41,29 @@ def update_package_json(version): """Update package.json with the new version.""" project_root = get_project_root() package_json_path = project_root / "libCacheSim-node" / "package.json" - + if not package_json_path.exists(): print(f"Error: {package_json_path} not found", file=sys.stderr) sys.exit(1) - + # Read current package.json with open(package_json_path, 'r') as f: package_data = json.load(f) - + current_version = package_data.get('version', 'unknown') - + if current_version == version: print(f"Version already up to date: {version}") return False - + # Update version package_data['version'] = version - + # Write back to file with proper formatting with open(package_json_path, 'w') as f: json.dump(package_data, f, indent=2) f.write('\n') # Add trailing newline - + print(f"Updated Node.js binding version: {current_version} → {version}") return True @@ -74,19 +74,19 @@ def main(): # Read main project version main_version = read_main_version() print(f"Main project version: {main_version}") - + # Update Node.js binding version updated = update_package_json(main_version) - + if updated: print("✓ Node.js binding version synchronized successfully") else: print("✓ No changes needed") - + except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/scripts/sync_python_version.py b/scripts/sync_python_version.py new file mode 100644 index 000000000..01b1631b8 --- /dev/null +++ b/scripts/sync_python_version.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +""" +Script to synchronize version between libCacheSim main project and Python bindings. + +This script reads the version from version.txt and updates the pyproject.toml +in libCacheSim-python to match. +""" + +import json +import os +import sys +import re +from pathlib import Path + + +def get_project_root(): + """Get the project root directory.""" + script_dir = Path(__file__).parent + return script_dir.parent + + +def read_main_version(): + """Read version from version.txt.""" + project_root = get_project_root() + version_file = project_root / "version.txt" + + if not version_file.exists(): + print(f"Error: {version_file} not found", file=sys.stderr) + sys.exit(1) + + with open(version_file, 'r') as f: + version = f.read().strip() + + if not version: + print("Error: version.txt is empty", file=sys.stderr) + sys.exit(1) + + return version + +def update_pyproject_toml(version): + """Update pyproject.toml with the new version.""" + project_root = get_project_root() + pyproject_toml_path = project_root / "libCacheSim-python" / "pyproject.toml" + + if not pyproject_toml_path.exists(): + print(f"Error: {pyproject_toml_path} not found", file=sys.stderr) + return False + + # Read current pyproject.toml + with open(pyproject_toml_path, 'r') as f: + pyproject_data = f.read() + + # Update the version line in pyproject.toml, make it can match any version in version.txt, like "0.3.1" or "dev" + current_version = re.search(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", pyproject_data).group(1) + if current_version == version: + print(f"Python binding version already up to date: {version}") + return False + # replace the version line with the new version + pyproject_data = re.sub(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", f"version = \"{version}\"", pyproject_data) + + # Write back to file with proper formatting + with open(pyproject_toml_path, 'w') as f: + f.write(pyproject_data) + + print(f"Updated Python version: {current_version} → {version}") + return True + + +def main(): + """Main function.""" + try: + # Read main project version + main_version = read_main_version() + print(f"Main project version: {main_version}") + + # Update Python binding version + updated = update_pyproject_toml(main_version) + + if updated: + print("✓ Python binding version synchronized successfully") + else: + print("✓ No changes needed") + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() From bd1b0aad86cce2092f189e635ccb3af4e4c4f609 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 10 Jul 2025 21:23:04 -0400 Subject: [PATCH 02/10] Feat: support plugin in python --- libCacheSim-python/README.md | 156 +++++++++- .../examples/python_hook_cache_example.py | 175 +++++++++++ libCacheSim-python/libcachesim/__init__.py | 2 + libCacheSim-python/libcachesim/eviction.py | 164 ++++++++-- libCacheSim-python/src/pylibcachesim.cpp | 135 ++++++++ .../tests/test_python_hook_cache.py | 293 ++++++++++++++++++ scripts/install_python.sh | 26 +- 7 files changed, 918 insertions(+), 33 deletions(-) create mode 100644 libCacheSim-python/examples/python_hook_cache_example.py create mode 100644 libCacheSim-python/tests/test_python_hook_cache.py diff --git a/libCacheSim-python/README.md b/libCacheSim-python/README.md index 67039e04c..f251ded71 100644 --- a/libCacheSim-python/README.md +++ b/libCacheSim-python/README.md @@ -22,6 +22,8 @@ python -m pytest . ## Usage +### Basic Cache Usage + ```python import libcachesim as cachesim @@ -38,7 +40,155 @@ hit = cache.get(req) print(f"Cache hit: {hit}") ``` -## Features +### Custom Cache Policies + +The Python binding supports custom cache replacement algorithms using Python function hooks - no C/C++ compilation required: + +#### Python Hook Cache + +Define custom cache policies using pure Python functions: + +```python +import libcachesim as cachesim +from collections import OrderedDict + +# Create a Python hook-based cache +cache = cachesim.PythonHookCachePolicy(cache_size=1024*1024, cache_name="MyLRU") + +# Define LRU policy hooks +def init_hook(cache_size): + return OrderedDict() # Track access order + +def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) # Move to end (most recent) + +def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True # Add to end + +def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) # Return least recent + +def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + +# Set the hooks +cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + +# Use it like any other cache +req = cachesim.Request() +req.obj_id = 1 +req.obj_size = 100 +hit = cache.get(req) +``` + +### Available Cache Algorithms + +The following built-in cache algorithms are available: + +- **FIFO**: First-In-First-Out +- **LRU**: Least Recently Used +- **ARC**: Adaptive Replacement Cache +- **Clock**: Clock algorithm +- **S3FIFO**: Simple, Fast, Fair FIFO +- **Sieve**: Sieve cache algorithm +- **TinyLFU**: TinyLFU with window +- **TwoQ**: Two-Queue algorithm +- **LRB**: Learning-based cache (if enabled) +- **ThreeLCache**: Three-level cache (if enabled) + +Each algorithm can be used similarly: + +```python +# Examples of different cache algorithms +lru_cache = cachesim.LRU(cache_size=1024*1024) +arc_cache = cachesim.ARC(cache_size=1024*1024) +s3fifo_cache = cachesim.S3FIFO(cache_size=1024*1024) +``` + +### Custom Cache Implementation Example + +Here's a complete example implementing a custom FIFO cache using Python hooks: + +```python +import libcachesim as cachesim +from collections import deque + +# Create a custom FIFO cache +cache = cachesim.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO") + +def init_hook(cache_size): + return deque() # Use deque for FIFO order + +def hit_hook(fifo_queue, obj_id, obj_size): + pass # FIFO doesn't reorder on hit + +def miss_hook(fifo_queue, obj_id, obj_size): + fifo_queue.append(obj_id) # Add to end of queue + +def eviction_hook(fifo_queue, obj_id, obj_size): + return fifo_queue[0] # Return first item (oldest) + +def remove_hook(fifo_queue, obj_id): + if fifo_queue and fifo_queue[0] == obj_id: + fifo_queue.popleft() + +# Set the hooks +cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + +# Test the cache +req = cachesim.Request() +req.obj_id = 1 +req.obj_size = 100 +hit = cache.get(req) +print(f"Cache hit: {hit}") # Should be False (miss) +``` + +### Testing and Validation + +To ensure your custom cache implementation is correct, you can compare it against the built-in implementations: + +```python +import libcachesim as cachesim + +# Test your custom cache against the built-in LRU +def test_custom_vs_builtin(): + cache_size = 1024 + + # Your custom LRU implementation + custom_cache = cachesim.PythonHookCachePolicy(cache_size, "CustomLRU") + # ... set up your LRU hooks here ... + + # Built-in LRU for comparison + builtin_cache = cachesim.LRU(cache_size) + + # Test with same request sequence + test_requests = [(1, 100), (2, 100), (3, 100), (1, 100)] + + for obj_id, obj_size in test_requests: + req1 = cachesim.Request() + req1.obj_id = obj_id + req1.obj_size = obj_size + + req2 = cachesim.Request() + req2.obj_id = obj_id + req2.obj_size = obj_size + + custom_result = custom_cache.get(req1) + builtin_result = builtin_cache.get(req2) + + assert custom_result == builtin_result, f"Mismatch at obj_id {obj_id}" + print(f"obj_id {obj_id}: {'HIT' if custom_result else 'MISS'} ✓") +``` + +### Hook Function Reference + +When implementing `PythonHookCachePolicy`, you need to provide these hook functions: + +- **`init_hook(cache_size: int) -> Any`**: Initialize and return plugin data structure +- **`hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache hits +- **`miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache misses +- **`eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int`**: Return object ID to evict +- **`remove_hook(plugin_data: Any, obj_id: int) -> None`**: Clean up when object is removed +- **`free_hook(plugin_data: Any) -> None`**: [Optional] Clean up plugin resources -- [x] Support for multiple eviction policies (FIFO, LRU, ARC, Clock, etc.) -- [ ] trace analysis tools +The `plugin_data` is whatever object you return from `init_hook()` - it can be any Python object like a list, dict, class instance, etc. diff --git a/libCacheSim-python/examples/python_hook_cache_example.py b/libCacheSim-python/examples/python_hook_cache_example.py new file mode 100644 index 000000000..daef56a73 --- /dev/null +++ b/libCacheSim-python/examples/python_hook_cache_example.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +""" +Example demonstrating how to create custom cache policies using Python hooks. + +This example shows how to implement LRU and FIFO cache policies using the +PythonHookCachePolicy class, which allows users to define cache behavior using +pure Python functions instead of C/C++ plugins. +""" + +import libcachesim as lcs +from collections import OrderedDict, deque + + +class LRUPolicy: + """LRU (Least Recently Used) cache policy implementation.""" + + def __init__(self, cache_size): + self.cache_size = cache_size + self.access_order = OrderedDict() # obj_id -> True (for ordering) + + def on_hit(self, obj_id, obj_size): + """Move accessed object to end (most recent).""" + if obj_id in self.access_order: + # Move to end (most recent) + self.access_order.move_to_end(obj_id) + + def on_miss(self, obj_id, obj_size): + """Add new object to end (most recent).""" + self.access_order[obj_id] = True + + def evict(self, obj_id, obj_size): + """Return the least recently used object ID.""" + if self.access_order: + # Return first item (least recent) + victim_id = next(iter(self.access_order)) + return victim_id + raise RuntimeError("No objects to evict") + + def on_remove(self, obj_id): + """Remove object from tracking.""" + self.access_order.pop(obj_id, None) + + +class FIFOPolicy: + """FIFO (First In First Out) cache policy implementation.""" + + def __init__(self, cache_size): + self.cache_size = cache_size + self.insertion_order = deque() # obj_id queue + + def on_hit(self, obj_id, obj_size): + """FIFO doesn't change order on hits.""" + pass + + def on_miss(self, obj_id, obj_size): + """Add new object to end of queue.""" + self.insertion_order.append(obj_id) + + def evict(self, obj_id, obj_size): + """Return the first inserted object ID.""" + if self.insertion_order: + victim_id = self.insertion_order.popleft() + return victim_id + raise RuntimeError("No objects to evict") + + def on_remove(self, obj_id): + """Remove object from tracking.""" + try: + self.insertion_order.remove(obj_id) + except ValueError: + pass # Object not in queue + + +def create_lru_cache(cache_size): + """Create an LRU cache using Python hooks.""" + cache = lcs.PythonHookCachePolicy(cache_size, "PythonLRU") + + def init_hook(cache_size): + return LRUPolicy(cache_size) + + def hit_hook(policy, obj_id, obj_size): + policy.on_hit(obj_id, obj_size) + + def miss_hook(policy, obj_id, obj_size): + policy.on_miss(obj_id, obj_size) + + def eviction_hook(policy, obj_id, obj_size): + return policy.evict(obj_id, obj_size) + + def remove_hook(policy, obj_id): + policy.on_remove(obj_id) + + def free_hook(policy): + # Python garbage collection handles cleanup + pass + + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook) + return cache + + +def create_fifo_cache(cache_size): + """Create a FIFO cache using Python hooks.""" + cache = lcs.PythonHookCachePolicy(cache_size, "PythonFIFO") + + def init_hook(cache_size): + return FIFOPolicy(cache_size) + + def hit_hook(policy, obj_id, obj_size): + policy.on_hit(obj_id, obj_size) + + def miss_hook(policy, obj_id, obj_size): + policy.on_miss(obj_id, obj_size) + + def eviction_hook(policy, obj_id, obj_size): + return policy.evict(obj_id, obj_size) + + def remove_hook(policy, obj_id): + policy.on_remove(obj_id) + + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + return cache + + +def test_cache_policy(cache, name): + """Test a cache policy with sample requests.""" + print(f"\n=== Testing {name} Cache ===") + + # Test requests: obj_id, obj_size + test_requests = [ + (1, 100), (2, 100), (3, 100), (4, 100), (5, 100), # Fill cache + (1, 100), # Hit + (6, 100), # Miss, should evict something + (2, 100), # Hit or miss depending on policy + (7, 100), # Miss, should evict something + ] + + hits = 0 + misses = 0 + + for obj_id, obj_size in test_requests: + req = lcs.Request() + req.obj_id = obj_id + req.obj_size = obj_size + + hit = cache.get(req) + if hit: + hits += 1 + print(f"Request {obj_id}: HIT") + else: + misses += 1 + print(f"Request {obj_id}: MISS") + + print(f"Total: {hits} hits, {misses} misses") + print(f"Cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes occupied") + + +def main(): + """Main example function.""" + cache_size = 400 # Bytes (can hold 4 objects of size 100 each) + + # Test LRU cache + lru_cache = create_lru_cache(cache_size) + test_cache_policy(lru_cache, "LRU") + + # Test FIFO cache + fifo_cache = create_fifo_cache(cache_size) + test_cache_policy(fifo_cache, "FIFO") + + print("\n=== Comparison ===") + print("LRU keeps recently accessed items, evicting least recently used") + print("FIFO keeps items in insertion order, evicting oldest inserted") + + +if __name__ == "__main__": + main() diff --git a/libCacheSim-python/libcachesim/__init__.py b/libCacheSim-python/libcachesim/__init__.py index 92e667fb7..67f0c2706 100644 --- a/libCacheSim-python/libcachesim/__init__.py +++ b/libCacheSim-python/libcachesim/__init__.py @@ -21,6 +21,7 @@ ThreeLCache, TinyLFU, TwoQ, + PythonHookCachePolicy, ) __all__ = [ @@ -38,6 +39,7 @@ "TinyLFU", "TraceType", "TwoQ", + "PythonHookCachePolicy", "__doc__", "__version__", "create_cache", diff --git a/libCacheSim-python/libcachesim/eviction.py b/libCacheSim-python/libcachesim/eviction.py index a5beae3e9..673a0963f 100644 --- a/libCacheSim-python/libcachesim/eviction.py +++ b/libCacheSim-python/libcachesim/eviction.py @@ -17,6 +17,7 @@ ThreeLCache_init, TinyLFU_init, TwoQ_init, + PythonHookCache, ) @@ -49,7 +50,7 @@ def __repr__(self): class FIFO(EvictionPolicy): """First In First Out replacement policy. - + Args: cache_size: Size of the cache """ @@ -59,7 +60,7 @@ def init_cache(self, cache_size: int, **kwargs) -> Cache: # noqa: ARG002 class Clock(EvictionPolicy): """Clock (Second Chance or FIFO-Reinsertion) replacement policy. - + Args: cache_size: Size of the cache n_bit_counter: Number of bits for counter (default: 1) @@ -78,7 +79,7 @@ def init_cache(self, cache_size: int, **kwargs): if init_freq < 0 or init_freq > 2**n_bit_counter - 1: msg = "init_freq must be between 0 and 2^n_bit_counter - 1" raise ValueError(msg) - + self.init_freq = init_freq self.n_bit_counter = n_bit_counter @@ -93,9 +94,9 @@ def __repr__(self): class TwoQ(EvictionPolicy): """2Q replacement policy. - 2Q has three queues: Ain, Aout, Am. When a obj hits in Aout, it will be + 2Q has three queues: Ain, Aout, Am. When a obj hits in Aout, it will be inserted into Am otherwise it will be inserted into Ain. - + Args: cache_size: Total size of the cache ain_size_ratio: Size ratio for Ain queue (default: 0.25) @@ -125,11 +126,11 @@ def __repr__(self): class LRB(EvictionPolicy): """LRB (Learning Relaxed Belady) replacement policy. - - LRB is a learning-based replacement policy that uses a neural network to + + LRB is a learning-based replacement policy that uses a neural network to predict the future access patterns of the cache, randomly select one obj outside the Belady boundary to evict. - + Args: cache_size: Size of the cache objective: Objective function to optimize (default: "byte-miss-ratio") @@ -155,7 +156,7 @@ def __repr__(self): class LRU(EvictionPolicy): """Least Recently Used replacement policy. - + Args: cache_size: Size of the cache """ @@ -167,10 +168,10 @@ class ARC(EvictionPolicy): """Adaptive Replacement Cache policy. ARC is a two-tiered cache with two LRU caches (T1 and T2) and two ghost - lists (B1 and B2). T1 records the obj accessed only once, T2 records - the obj accessed more than once. ARC has an internal parameter `p` to + lists (B1 and B2). T1 records the obj accessed only once, T2 records + the obj accessed more than once. ARC has an internal parameter `p` to learn and dynamically control the size of T1 and T2. - + Args: cache_size: Size of the cache """ @@ -181,25 +182,25 @@ def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 class S3FIFO(EvictionPolicy): """S3FIFO replacement policy. - S3FIFO consists of three FIFO queues: Small, Main, and Ghost. Small + S3FIFO consists of three FIFO queues: Small, Main, and Ghost. Small queue gets the obj and records the freq. - When small queue is full, if the obj to evict satisfies the threshold, - it will be moved to main queue. Otherwise, it will be evicted from small + When small queue is full, if the obj to evict satisfies the threshold, + it will be moved to main queue. Otherwise, it will be evicted from small queue and inserted into ghost queue. - When main queue is full, the obj to evict will be evicted and reinserted + When main queue is full, the obj to evict will be evicted and reinserted like Clock. If obj hits in the ghost queue, it will be moved to main queue. - + Args: cache_size: Size of the cache fifo_size_ratio: Size ratio for FIFO queue (default: 0.1) ghost_size_ratio: Size ratio for ghost queue (default: 0.9) move_to_main_threshold: Threshold for moving obj from ghost to main (default: 2) """ - def __init__(self, cache_size: int, fifo_size_ratio: float = 0.1, + def __init__(self, cache_size: int, fifo_size_ratio: float = 0.1, ghost_size_ratio: float = 0.9, move_to_main_threshold: int = 2): - super().__init__(cache_size, fifo_size_ratio=fifo_size_ratio, - ghost_size_ratio=ghost_size_ratio, + super().__init__(cache_size, fifo_size_ratio=fifo_size_ratio, + ghost_size_ratio=ghost_size_ratio, move_to_main_threshold=move_to_main_threshold) def init_cache(self, cache_size: int, **kwargs): @@ -231,7 +232,7 @@ class Sieve(EvictionPolicy): """Sieve replacement policy. FIFO-Reinsertion with check pointer. - + Args: cache_size: Size of the cache """ @@ -241,7 +242,7 @@ def init_cache(self, cache_size: int, **kwargs): # noqa: ARG002 class ThreeLCache(EvictionPolicy): """3L-Cache replacement policy. - + Args: cache_size: Size of the cache objective: Objective function to optimize (default: "byte-miss-ratio") @@ -267,7 +268,7 @@ def __repr__(self): class TinyLFU(EvictionPolicy): """TinyLFU replacement policy. - + Args: cache_size: Size of the cache main_cache: Main cache to use (default: "SLRU") @@ -292,4 +293,119 @@ def init_cache(self, cache_size: int, **kwargs): def __repr__(self): return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ f"main_cache={self.main_cache}, " \ - f"window_size={self.window_size})" \ No newline at end of file + f"window_size={self.window_size})" + + + +class PythonHookCachePolicy(EvictionPolicyBase): + """Python hook-based cache that allows defining custom policies using Python functions. + + This cache implementation allows users to define custom cache replacement algorithms + using pure Python functions instead of compiling C/C++ plugins. Users provide hook + functions for cache initialization, hit handling, miss handling, eviction decisions, + and cleanup. + + Args: + cache_size: Size of the cache in bytes + cache_name: Optional name for the cache (default: "PythonHookCache") + + Hook Functions Required: + init_hook(cache_size: int) -> Any: + Initialize plugin data structures. Return any object to be passed to other hooks. + + hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None: + Handle cache hit events. Update internal state as needed. + + miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None: + Handle cache miss events. Update internal state for new object. + + eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int: + Determine which object to evict. Return the object ID to be evicted. + + remove_hook(plugin_data: Any, obj_id: int) -> None: + Clean up when objects are removed from cache. + + free_hook(plugin_data: Any) -> None: [Optional] + Clean up plugin resources when cache is destroyed. + + Example: + >>> from collections import OrderedDict + >>> + >>> cache = PythonHookCachePolicy(1024) + >>> + >>> def init_hook(cache_size): + ... return OrderedDict() # LRU tracking + >>> + >>> def hit_hook(lru_dict, obj_id, obj_size): + ... lru_dict.move_to_end(obj_id) # Move to end (most recent) + >>> + >>> def miss_hook(lru_dict, obj_id, obj_size): + ... lru_dict[obj_id] = True # Add to end + >>> + >>> def eviction_hook(lru_dict, obj_id, obj_size): + ... return next(iter(lru_dict)) # Return least recent + >>> + >>> def remove_hook(lru_dict, obj_id): + ... lru_dict.pop(obj_id, None) + >>> + >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + >>> + >>> req = Request() + >>> req.obj_id = 1 + >>> req.obj_size = 100 + >>> hit = cache.get(req) + """ + def __init__(self, cache_size: int, cache_name: str = "PythonHookCache"): + self.cache_size = cache_size + self.cache_name = cache_name + self.cache = PythonHookCache(cache_size, cache_name) + self._hooks_set = False + + def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=None): + """Set the hook functions for the cache. + + Args: + init_hook: Function called during cache initialization + hit_hook: Function called on cache hit + miss_hook: Function called on cache miss + eviction_hook: Function called to select eviction candidate + remove_hook: Function called when object is removed + free_hook: Optional function called during cache cleanup + """ + self.cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook) + self._hooks_set = True + + def get(self, req: Request) -> bool: + """Process a cache request. + + Args: + req: The cache request to process + + Returns: + True if cache hit, False if cache miss + + Raises: + RuntimeError: If hooks have not been set + """ + if not self._hooks_set: + raise RuntimeError("Hooks must be set before using the cache. Call set_hooks() first.") + return self.cache.get(req) + + @property + def n_req(self): + """Number of requests processed.""" + return self.cache.n_req + + @property + def n_obj(self): + """Number of objects currently in cache.""" + return self.cache.n_obj + + @property + def occupied_byte(self): + """Number of bytes currently occupied in cache.""" + return self.cache.occupied_byte + + def __repr__(self): + return f"{self.__class__.__name__}(cache_size={self.cache_size}, " \ + f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})" diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp index b40bf3b20..88dfb1411 100644 --- a/libCacheSim-python/src/pylibcachesim.cpp +++ b/libCacheSim-python/src/pylibcachesim.cpp @@ -1,6 +1,10 @@ #include +#include +#include #include +#include +#include #include "config.h" #include "libCacheSim/cache.h" @@ -34,6 +38,97 @@ #define STRINGIFY(x) #x #define MACRO_STRINGIFY(x) STRINGIFY(x) +namespace py = pybind11; + +// Python Hook Cache Implementation +class PythonHookCache { +private: + uint64_t cache_size_; + std::string cache_name_; + std::unordered_map objects_; // obj_id -> obj_size + py::object plugin_data_; + + // Hook functions + py::function init_hook_; + py::function hit_hook_; + py::function miss_hook_; + py::function eviction_hook_; + py::function remove_hook_; + py::object free_hook_; // Changed to py::object to allow py::none() + +public: + uint64_t n_req = 0; + uint64_t n_obj = 0; + uint64_t occupied_byte = 0; + uint64_t cache_size; + + PythonHookCache(uint64_t cache_size, const std::string& cache_name = "PythonHookCache") + : cache_size_(cache_size), cache_name_(cache_name), cache_size(cache_size), + free_hook_(py::none()) {} + + void set_hooks(py::function init_hook, py::function hit_hook, py::function miss_hook, + py::function eviction_hook, py::function remove_hook, + py::object free_hook = py::none()) { + init_hook_ = init_hook; + hit_hook_ = hit_hook; + miss_hook_ = miss_hook; + eviction_hook_ = eviction_hook; + remove_hook_ = remove_hook; + + // Handle free_hook properly + if (!free_hook.is_none()) { + free_hook_ = free_hook; + } else { + free_hook_ = py::none(); + } + + // Initialize plugin data + plugin_data_ = init_hook_(cache_size_); + } + + bool get(const request_t& req) { + n_req++; + + auto it = objects_.find(req.obj_id); + if (it != objects_.end()) { + // Cache hit + hit_hook_(plugin_data_, req.obj_id, req.obj_size); + return true; + } else { + // Cache miss - need to insert + // Check if eviction is needed + if (occupied_byte + req.obj_size > cache_size_ && !objects_.empty()) { + // Need to evict + uint64_t victim_id = eviction_hook_(plugin_data_, req.obj_id, req.obj_size).cast(); + auto victim_it = objects_.find(victim_id); + if (victim_it != objects_.end()) { + occupied_byte -= victim_it->second; + objects_.erase(victim_it); + n_obj--; + remove_hook_(plugin_data_, victim_id); + } + } + + // Insert new object if there's space + if (occupied_byte + req.obj_size <= cache_size_) { + objects_[req.obj_id] = req.obj_size; + occupied_byte += req.obj_size; + n_obj++; + } + + miss_hook_(plugin_data_, req.obj_id, req.obj_size); + return false; + } + } + + ~PythonHookCache() { + if (!free_hook_.is_none()) { + py::function free_func = free_hook_.cast(); + free_func(plugin_data_); + } + } +}; + struct CacheDeleter { void operator()(cache_t* ptr) const { if (ptr != nullptr) ptr->cache_free(ptr); @@ -558,6 +653,46 @@ PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) Cache: A new TwoQ cache instance. )pbdoc"); + /** + * @brief Create a Python hook-based cache instance. + */ + py::class_(m, "PythonHookCache") + .def(py::init(), py::arg("cache_size"), py::arg("cache_name") = "PythonHookCache") + .def("set_hooks", &PythonHookCache::set_hooks, + py::arg("init_hook"), py::arg("hit_hook"), py::arg("miss_hook"), + py::arg("eviction_hook"), py::arg("remove_hook"), py::arg("free_hook") = py::none(), + R"pbdoc( + Set the hook functions for the cache. + + Args: + init_hook (callable): Function called during cache initialization. + Signature: init_hook(cache_size: int) -> Any + hit_hook (callable): Function called on cache hit. + Signature: hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None + miss_hook (callable): Function called on cache miss. + Signature: miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None + eviction_hook (callable): Function called to select eviction candidate. + Signature: eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int + remove_hook (callable): Function called when object is removed. + Signature: remove_hook(plugin_data: Any, obj_id: int) -> None + free_hook (callable, optional): Function called during cache cleanup. + Signature: free_hook(plugin_data: Any) -> None + )pbdoc") + .def("get", &PythonHookCache::get, py::arg("req"), + R"pbdoc( + Process a cache request. + + Args: + req (Request): The cache request to process. + + Returns: + bool: True if cache hit, False if cache miss. + )pbdoc") + .def_readwrite("n_req", &PythonHookCache::n_req) + .def_readwrite("n_obj", &PythonHookCache::n_obj) + .def_readwrite("occupied_byte", &PythonHookCache::occupied_byte) + .def_readwrite("cache_size", &PythonHookCache::cache_size); + #ifdef VERSION_INFO m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); #else diff --git a/libCacheSim-python/tests/test_python_hook_cache.py b/libCacheSim-python/tests/test_python_hook_cache.py new file mode 100644 index 000000000..2e0326a82 --- /dev/null +++ b/libCacheSim-python/tests/test_python_hook_cache.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python3 +""" +Test file for PythonHookCachePolicy functionality. +""" + +import sys +import os + +# Add the parent directory to the Python path for development testing +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +try: + import libcachesim as lcs +except ImportError as e: + print(f"Error importing libcachesim: {e}") + print("Make sure the Python binding is built and installed") + sys.exit(1) + +from collections import OrderedDict + + +def test_python_hook_cache(): + """Test the Python hook cache implementation.""" + print("Testing PythonHookCachePolicy...") + + # Create cache + cache_size = 300 # 3 objects of size 100 each + cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU") + + # Define LRU hooks + def init_hook(cache_size): + print(f"Initializing LRU cache with size {cache_size}") + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + print(f"Hit: object {obj_id}") + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + print(f"Miss: object {obj_id}, size {obj_size}") + lru_dict[obj_id] = True + + def eviction_hook(lru_dict, obj_id, obj_size): + victim = next(iter(lru_dict)) + print(f"Evicting object {victim} to make room for {obj_id}") + return victim + + def remove_hook(lru_dict, obj_id): + print(f"Removing object {obj_id}") + lru_dict.pop(obj_id, None) + + # Set hooks + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + # Test sequence + test_requests = [ + (1, 100), # Miss - insert 1 + (2, 100), # Miss - insert 2 + (3, 100), # Miss - insert 3 (cache full) + (1, 100), # Hit - move 1 to end + (4, 100), # Miss - should evict 2 (LRU), insert 4 + (2, 100), # Miss - should evict 3, insert 2 + (1, 100), # Hit - move 1 to end + ] + + print("\n--- Starting cache simulation ---") + for obj_id, obj_size in test_requests: + req = lcs.Request() + req.obj_id = obj_id + req.obj_size = obj_size + + result = cache.get(req) + print(f"Request {obj_id}: {'HIT' if result else 'MISS'}") + print(f" Cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes\n") + + print("Test completed successfully!") + + +def test_error_handling(): + """Test error handling.""" + print("\nTesting error handling...") + + cache = lcs.PythonHookCachePolicy(1000) + + # Try to use cache without setting hooks + req = lcs.Request() + req.obj_id = 1 + req.obj_size = 100 + + try: + cache.get(req) + print("ERROR: Should have raised RuntimeError") + except RuntimeError as e: + print(f"Correctly caught error: {e}") + + print("Error handling test passed!") + + +def test_lru_comparison(): + """Test Python hook LRU against native LRU to verify identical behavior.""" + print("\nTesting Python hook LRU vs Native LRU comparison...") + + cache_size = 300 # 3 objects of size 100 each + + # Create native LRU cache + native_lru = lcs.LRU(cache_size) + + # Create Python hook LRU cache + hook_lru = lcs.PythonHookCachePolicy(cache_size, "TestLRU") + + # Define LRU hooks + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True + + def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + # Set hooks + hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + # Test sequence with various access patterns + test_requests = [ + (1, 100), # Miss - insert 1 + (2, 100), # Miss - insert 2 + (3, 100), # Miss - insert 3 (cache full) + (1, 100), # Hit - move 1 to end + (4, 100), # Miss - should evict 2 (LRU), insert 4 + (2, 100), # Miss - should evict 3, insert 2 + (1, 100), # Hit - move 1 to end + (3, 100), # Miss - should evict 4, insert 3 + (5, 100), # Miss - should evict 2, insert 5 + (1, 100), # Hit - move 1 to end + (3, 100), # Hit - move 3 to end + (6, 100), # Miss - should evict 5, insert 6 + ] + + print("\n--- Comparing LRU implementations ---") + hit_rate_matches = 0 + total_requests = len(test_requests) + + for i, (obj_id, obj_size) in enumerate(test_requests): + # Test native LRU + req_native = lcs.Request() + req_native.obj_id = obj_id + req_native.obj_size = obj_size + native_result = native_lru.get(req_native) + + # Test hook LRU + req_hook = lcs.Request() + req_hook.obj_id = obj_id + req_hook.obj_size = obj_size + hook_result = hook_lru.get(req_hook) + + # Compare results + match = native_result == hook_result + if match: + hit_rate_matches += 1 + + print(f"Request {i+1}: obj_id={obj_id}") + print(f" Native LRU: {'HIT' if native_result else 'MISS'}") + print(f" Hook LRU: {'HIT' if hook_result else 'MISS'}") + print(f" Match: {'✓' if match else '✗'}") + + # Compare cache statistics + stats_match = (native_lru.cache.n_obj == hook_lru.n_obj and + native_lru.cache.occupied_byte == hook_lru.occupied_byte) + print(f" Native stats: {native_lru.cache.n_obj} objects, {native_lru.cache.occupied_byte} bytes") + print(f" Hook stats: {hook_lru.n_obj} objects, {hook_lru.occupied_byte} bytes") + print(f" Stats match: {'✓' if stats_match else '✗'}") + print() + + if not match: + print(f"ERROR: Hit/miss mismatch at request {i+1}") + return False + + if not stats_match: + print(f"ERROR: Cache statistics mismatch at request {i+1}") + return False + + accuracy = (hit_rate_matches / total_requests) * 100 + print(f"LRU comparison test results:") + print(f" Total requests: {total_requests}") + print(f" Matching results: {hit_rate_matches}") + print(f" Accuracy: {accuracy:.1f}%") + + if accuracy == 100.0: + print("✓ LRU comparison test PASSED - Both implementations behave identically!") + return True + else: + print("✗ LRU comparison test FAILED - Implementations differ!") + return False + + +def test_lru_comparison_variable_sizes(): + """Test Python hook LRU vs Native LRU with variable object sizes.""" + print("\nTesting Python hook LRU vs Native LRU with variable object sizes...") + + cache_size = 1000 # Total cache capacity + + # Create native LRU cache + native_lru = lcs.LRU(cache_size) + + # Create Python hook LRU cache + hook_lru = lcs.PythonHookCachePolicy(cache_size, "VariableSizeLRU") + + # Define LRU hooks + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True + + def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + # Set hooks + hook_lru.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + # Test sequence with variable object sizes + test_requests = [ + (1, 200), # Miss - insert 1 (200 bytes) + (2, 300), # Miss - insert 2 (300 bytes) + (3, 400), # Miss - insert 3 (400 bytes) - total 900 bytes + (4, 200), # Miss - should evict 1, insert 4 (total would be 1100, over limit) + (1, 200), # Miss - should evict 2, insert 1 + (5, 100), # Miss - should evict 3, insert 5 + (4, 200), # Hit - access 4 + (6, 500), # Miss - should evict multiple objects to fit + (4, 200), # Miss - 4 was evicted + ] + + print("\n--- Comparing LRU implementations with variable sizes ---") + all_match = True + + for i, (obj_id, obj_size) in enumerate(test_requests): + # Test native LRU + req_native = lcs.Request() + req_native.obj_id = obj_id + req_native.obj_size = obj_size + native_result = native_lru.get(req_native) + + # Test hook LRU + req_hook = lcs.Request() + req_hook.obj_id = obj_id + req_hook.obj_size = obj_size + hook_result = hook_lru.get(req_hook) + + # Compare results + result_match = native_result == hook_result + stats_match = (native_lru.cache.n_obj == hook_lru.n_obj and + native_lru.cache.occupied_byte == hook_lru.occupied_byte) + + print(f"Request {i+1}: obj_id={obj_id}, size={obj_size}") + print(f" Native LRU: {'HIT' if native_result else 'MISS'}") + print(f" Hook LRU: {'HIT' if hook_result else 'MISS'}") + print(f" Result match: {'✓' if result_match else '✗'}") + print(f" Native stats: {native_lru.cache.n_obj} objects, {native_lru.cache.occupied_byte} bytes") + print(f" Hook stats: {hook_lru.n_obj} objects, {hook_lru.occupied_byte} bytes") + print(f" Stats match: {'✓' if stats_match else '✗'}") + print() + + if not result_match or not stats_match: + all_match = False + print(f"ERROR: Mismatch at request {i+1}") + + if all_match: + print("✓ Variable size LRU comparison test PASSED!") + return True + else: + print("✗ Variable size LRU comparison test FAILED!") + return False + + +if __name__ == "__main__": + test_python_hook_cache() + test_error_handling() + test_lru_comparison() + test_lru_comparison_variable_sizes() diff --git a/scripts/install_python.sh b/scripts/install_python.sh index f581c48cb..a224feff7 100644 --- a/scripts/install_python.sh +++ b/scripts/install_python.sh @@ -1,9 +1,23 @@ +#!/bin/bash + +# Build the main libCacheSim C++ library first +echo "Building main libCacheSim library..." rm -rf ./build -cmake -G Ninja -B build -pushd libCacheSim-python +cmake -G Ninja -B build -DENABLE_3L_CACHE=ON +ninja -C build + +# Now build and install the Python binding +echo "Building Python binding..." +cd libCacheSim-python pip install -e . -vvv -popd -python -c "import libCacheSim" -pushd libCacheSim-python +cd .. + +# Test that the import works +echo "Testing import..." +python -c "import libcachesim" + +# Run tests +echo "Running tests..." +cd libCacheSim-python pytest . -popd \ No newline at end of file +cd .. From c3b66d49fd2e0e112bb428ff7439cee7baf2e228 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 10 Jul 2025 22:08:41 -0400 Subject: [PATCH 03/10] Clean up --- .github/workflows/python.yml | 2 +- libCacheSim-python/README.md | 54 +++- .../examples/demo_unified_interface.py | 131 ++++++++ libCacheSim-python/libcachesim/__init__.py | 4 + libCacheSim-python/libcachesim/eviction.py | 105 +++++- libCacheSim-python/pyproject.toml | 2 +- libCacheSim-python/src/pylibcachesim.cpp | 300 +++++++++++++----- .../tests/test_process_trace.py | 274 ++++++++++++++++ .../tests/test_python_hook_cache.py | 24 +- .../tests/test_unified_interface.py | 201 ++++++++++++ scripts/install_python.sh | 12 +- 11 files changed, 995 insertions(+), 114 deletions(-) create mode 100644 libCacheSim-python/examples/demo_unified_interface.py create mode 100644 libCacheSim-python/tests/test_process_trace.py create mode 100644 libCacheSim-python/tests/test_unified_interface.py diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index ebd08a41a..74ff0bb58 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -6,7 +6,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 diff --git a/libCacheSim-python/README.md b/libCacheSim-python/README.md index f251ded71..5dfb549a2 100644 --- a/libCacheSim-python/README.md +++ b/libCacheSim-python/README.md @@ -5,13 +5,8 @@ Python bindings for libCacheSim, a high-performance cache simulator. ## Installation ```bash -pip install . -``` - -## Development - -```bash -pip install -e . +cd .. +bash scripts/install_python.sh ``` Test @@ -192,3 +187,48 @@ When implementing `PythonHookCachePolicy`, you need to provide these hook functi - **`free_hook(plugin_data: Any) -> None`**: [Optional] Clean up plugin resources The `plugin_data` is whatever object you return from `init_hook()` - it can be any Python object like a list, dict, class instance, etc. + +### Unified Interface + +All cache policies (both built-in and Python hook-based) share the same unified interface: + +```python +import libcachesim as cachesim + +# All cache policies work the same way +cache = cachesim.LRU(cache_size=1024*1024) +# or +cache = cachesim.PythonHookCachePolicy(cache_size=1024*1024) +# cache.set_hooks(...) for Python hook cache + +# Unified interface for all caches: +req = cachesim.Request() +req.obj_id = 1 +req.obj_size = 100 +hit = cache.get(req) # Process single request + +reader = cachesim.open_trace("trace.bin", cachesim.TraceType.ORACLE_GENERAL_TRACE.value) +miss_ratio = cache.process_trace(reader) # Process entire trace efficiently + +# Unified properties for all caches: +print(f"Cache size: {cache.cache_size}") +print(f"Objects: {cache.n_obj}") +print(f"Occupied bytes: {cache.occupied_byte}") +print(f"Requests processed: {cache.n_req}") +``` + +### Efficient Trace Processing + +The `process_trace` method processes trace data entirely on the C++ side to minimize overhead: + +```python +# Process entire trace with optional limits +miss_ratio = cache.process_trace( + reader, + max_req=10000, # Process max 10K requests + max_sec=3600, # Process max 1 hour of trace + start_time=1000, # Start from timestamp 1000 + end_time=5000 # End at timestamp 5000 +) +print(f"Miss ratio: {miss_ratio:.4f}") +``` diff --git a/libCacheSim-python/examples/demo_unified_interface.py b/libCacheSim-python/examples/demo_unified_interface.py new file mode 100644 index 000000000..c51c3e344 --- /dev/null +++ b/libCacheSim-python/examples/demo_unified_interface.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Demo script showing the unified interface for all cache policies. +This demonstrates how to use both native and Python hook-based caches +with the same API for seamless algorithm comparison and switching. +""" + +import sys +import os + +# Add parent directory for development testing +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +try: + import libcachesim as lcs +except ImportError as e: + print(f"Error importing libcachesim: {e}") + print("Make sure the Python binding is built and installed") + sys.exit(1) + +from collections import OrderedDict + + +def create_trace_reader(): + """Helper function to create a trace reader.""" + data_file = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), + "data", + "cloudPhysicsIO.oracleGeneral.bin" + ) + if not os.path.exists(data_file): + print(f"Warning: Trace file not found at {data_file}") + return None + return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE.value) + + +def create_demo_lru_hooks(): + """Create demo LRU hooks for Python-based cache policy.""" + + def init_hook(cache_size): + print(f" Initializing custom LRU with {cache_size} bytes") + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + if obj_id in lru_dict: + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = obj_size + + def eviction_hook(lru_dict, obj_id, obj_size): + if lru_dict: + return next(iter(lru_dict)) + return obj_id + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook + + +def demo_unified_interface(): + """Demonstrate the unified interface across different cache policies.""" + print("libCacheSim Python Binding - Unified Interface Demo") + print("=" * 60) + + cache_size = 1024 * 1024 # 1MB + + # Create different cache policies + caches = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + "ARC": lcs.ARC(cache_size), + } + + # Create Python hook-based LRU + python_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU") + init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_demo_lru_hooks() + python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + caches["Custom Python LRU"] = python_cache + + print(f"Testing {len(caches)} different cache policies with unified interface:") + + # Demo 1: Single request interface + print("1. Single Request Interface:") + print(" All caches use: cache.get(request)") + + test_req = lcs.Request() + test_req.obj_id = 1 + test_req.obj_size = 1024 + + for name, cache in caches.items(): + result = cache.get(test_req) + print(f" {name:20s}: {'HIT' if result else 'MISS'}") + + # Demo 2: Unified properties interface + print("\n2. Unified Properties Interface:") + print(" All caches provide: cache_size, n_obj, occupied_byte, n_req") + + for name, cache in caches.items(): + print(f" {name:20s}: size={cache.cache_size}, objs={cache.n_obj}, " + f"bytes={cache.occupied_byte}, reqs={cache.n_req}") + + # Demo 3: Efficient trace processing + print("\n3. Efficient Trace Processing Interface:") + print(" All caches use: cache.process_trace(reader, max_req=N)") + + max_requests = 1000 + + for name, cache in caches.items(): + # Create fresh reader for each cache + reader = create_trace_reader() + if not reader: + print(f" {name:20s}: trace file not available") + continue + + miss_ratio = cache.process_trace(reader, max_req=max_requests) + print(f" {name:20s}: miss_ratio={miss_ratio:.4f}") + + print("\nKey Benefits of Unified Interface:") + print(" • Same API for all cache policies (built-in + custom)") + print(" • Easy to switch between different algorithms") + print(" • Efficient trace processing in C++ (no Python overhead)") + print(" • Consistent properties and statistics") + print(" • Type-safe and well-documented") + + print("\nDemo completed! All cache policies work with the same interface.") + + +if __name__ == "__main__": + demo_unified_interface() diff --git a/libCacheSim-python/libcachesim/__init__.py b/libCacheSim-python/libcachesim/__init__.py index 67f0c2706..e01826375 100644 --- a/libCacheSim-python/libcachesim/__init__.py +++ b/libCacheSim-python/libcachesim/__init__.py @@ -8,6 +8,8 @@ __version__, create_cache, open_trace, + process_trace, + process_trace_python_hook, ) from .const import TraceType from .eviction import ( @@ -44,5 +46,7 @@ "__version__", "create_cache", "open_trace", + "process_trace", + "process_trace_python_hook", # TODO(haocheng): add more eviction policies ] diff --git a/libCacheSim-python/libcachesim/eviction.py b/libCacheSim-python/libcachesim/eviction.py index 673a0963f..1a145be2e 100644 --- a/libCacheSim-python/libcachesim/eviction.py +++ b/libCacheSim-python/libcachesim/eviction.py @@ -31,6 +31,25 @@ def get(self, req: Request) -> bool: def __repr__(self) -> str: pass + @abstractmethod + def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1): + """Process a trace with this cache and return miss ratio. + + This method processes trace data entirely on the C++ side to avoid + data movement overhead between Python and C++. + + Args: + reader: The trace reader instance + max_req: Maximum number of requests to process (-1 for no limit) + max_sec: Maximum seconds to process (-1 for no limit) + start_time: Start time filter (-1 for no filter) + end_time: End time filter (-1 for no filter) + + Returns: + float: Miss ratio (0.0 to 1.0) + """ + pass + class EvictionPolicy(EvictionPolicyBase): """Base class for all eviction policies.""" @@ -44,9 +63,54 @@ def init_cache(self, cache_size: int, **kwargs) -> Cache: def get(self, req: Request) -> bool: return self.cache.get(req) + def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1): + """Process a trace with this cache and return miss ratio. + + This method processes trace data entirely on the C++ side to avoid + data movement overhead between Python and C++. + + Args: + reader: The trace reader instance + max_req: Maximum number of requests to process (-1 for no limit) + max_sec: Maximum seconds to process (-1 for no limit) + start_time: Start time filter (-1 for no filter) + end_time: End time filter (-1 for no filter) + + Returns: + float: Miss ratio (0.0 to 1.0) + + Example: + >>> cache = LRU(1024*1024) + >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE) + >>> miss_ratio = cache.process_trace(reader) + >>> print(f"Miss ratio: {miss_ratio:.4f}") + """ + from ._libcachesim import process_trace + return process_trace(self.cache, reader, max_req, max_sec, start_time, end_time) + def __repr__(self): return f"{self.__class__.__name__}(cache_size={self.cache.cache_size})" + @property + def n_req(self): + """Number of requests processed.""" + return self.cache.n_req + + @property + def n_obj(self): + """Number of objects currently in cache.""" + return self.cache.n_obj + + @property + def occupied_byte(self): + """Number of bytes currently occupied in cache.""" + return self.cache.occupied_byte + + @property + def cache_size(self): + """Total cache size in bytes.""" + return self.cache.cache_size + class FIFO(EvictionPolicy): """First In First Out replacement policy. @@ -356,7 +420,7 @@ class PythonHookCachePolicy(EvictionPolicyBase): >>> hit = cache.get(req) """ def __init__(self, cache_size: int, cache_name: str = "PythonHookCache"): - self.cache_size = cache_size + self._cache_size = cache_size self.cache_name = cache_name self.cache = PythonHookCache(cache_size, cache_name) self._hooks_set = False @@ -391,6 +455,38 @@ def get(self, req: Request) -> bool: raise RuntimeError("Hooks must be set before using the cache. Call set_hooks() first.") return self.cache.get(req) + def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1): + """Process a trace with this cache and return miss ratio. + + This method processes trace data entirely on the C++ side to avoid + data movement overhead between Python and C++. + + Args: + reader: The trace reader instance + max_req: Maximum number of requests to process (-1 for no limit) + max_sec: Maximum seconds to process (-1 for no limit) + start_time: Start time filter (-1 for no filter) + end_time: End time filter (-1 for no filter) + + Returns: + float: Miss ratio (0.0 to 1.0) + + Raises: + RuntimeError: If hooks have not been set + + Example: + >>> cache = PythonHookCachePolicy(1024*1024) + >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + >>> reader = open_trace("trace.csv", TraceType.CSV_TRACE) + >>> miss_ratio = cache.process_trace(reader) + >>> print(f"Miss ratio: {miss_ratio:.4f}") + """ + if not self._hooks_set: + raise RuntimeError("Hooks must be set before processing trace. Call set_hooks() first.") + + from ._libcachesim import process_trace_python_hook + return process_trace_python_hook(self.cache, reader, max_req, max_sec, start_time, end_time) + @property def n_req(self): """Number of requests processed.""" @@ -406,6 +502,11 @@ def occupied_byte(self): """Number of bytes currently occupied in cache.""" return self.cache.occupied_byte + @property + def cache_size(self): + """Total cache size in bytes.""" + return self.cache.cache_size + def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self.cache_size}, " \ + return f"{self.__class__.__name__}(cache_size={self._cache_size}, " \ f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})" diff --git a/libCacheSim-python/pyproject.toml b/libCacheSim-python/pyproject.toml index 753aca946..10343bc52 100644 --- a/libCacheSim-python/pyproject.toml +++ b/libCacheSim-python/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "scikit_build_core.build" [project] name = "libcachesim" -version = "0.0.1" +version = "0.3.1" description="Python bindings for libCacheSim" readme = "README.md" requires-python = ">=3.9" diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp index 88dfb1411..6ab9fdce9 100644 --- a/libCacheSim-python/src/pylibcachesim.cpp +++ b/libCacheSim-python/src/pylibcachesim.cpp @@ -1,10 +1,14 @@ -#include #include +#include #include +// Suppress visibility warnings for pybind11 types +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wattributes" + #include -#include #include +#include #include "config.h" #include "libCacheSim/cache.h" @@ -42,93 +46,106 @@ namespace py = pybind11; // Python Hook Cache Implementation class PythonHookCache { -private: - uint64_t cache_size_; - std::string cache_name_; - std::unordered_map objects_; // obj_id -> obj_size - py::object plugin_data_; - - // Hook functions - py::function init_hook_; - py::function hit_hook_; - py::function miss_hook_; - py::function eviction_hook_; - py::function remove_hook_; - py::object free_hook_; // Changed to py::object to allow py::none() - -public: - uint64_t n_req = 0; - uint64_t n_obj = 0; - uint64_t occupied_byte = 0; - uint64_t cache_size; - - PythonHookCache(uint64_t cache_size, const std::string& cache_name = "PythonHookCache") - : cache_size_(cache_size), cache_name_(cache_name), cache_size(cache_size), - free_hook_(py::none()) {} - - void set_hooks(py::function init_hook, py::function hit_hook, py::function miss_hook, - py::function eviction_hook, py::function remove_hook, - py::object free_hook = py::none()) { - init_hook_ = init_hook; - hit_hook_ = hit_hook; - miss_hook_ = miss_hook; - eviction_hook_ = eviction_hook; - remove_hook_ = remove_hook; - - // Handle free_hook properly - if (!free_hook.is_none()) { - free_hook_ = free_hook; - } else { - free_hook_ = py::none(); - } - - // Initialize plugin data - plugin_data_ = init_hook_(cache_size_); + private: + uint64_t cache_size_; + std::string cache_name_; + std::unordered_map objects_; // obj_id -> obj_size + py::object plugin_data_; + + // Hook functions + py::function init_hook_; + py::function hit_hook_; + py::function miss_hook_; + py::function eviction_hook_; + py::function remove_hook_; + py::object free_hook_; // Changed to py::object to allow py::none() + + public: + uint64_t n_req = 0; + uint64_t n_obj = 0; + uint64_t occupied_byte = 0; + uint64_t cache_size; + + PythonHookCache(uint64_t cache_size, + const std::string& cache_name = "PythonHookCache") + : cache_size_(cache_size), + cache_name_(cache_name), + cache_size(cache_size), + free_hook_(py::none()) {} + + void set_hooks(py::function init_hook, py::function hit_hook, + py::function miss_hook, py::function eviction_hook, + py::function remove_hook, py::object free_hook = py::none()) { + init_hook_ = init_hook; + hit_hook_ = hit_hook; + miss_hook_ = miss_hook; + eviction_hook_ = eviction_hook; + remove_hook_ = remove_hook; + + // Handle free_hook properly + if (!free_hook.is_none()) { + free_hook_ = free_hook; + } else { + free_hook_ = py::none(); } - bool get(const request_t& req) { - n_req++; + // Initialize plugin data + plugin_data_ = init_hook_(cache_size_); + } - auto it = objects_.find(req.obj_id); - if (it != objects_.end()) { - // Cache hit - hit_hook_(plugin_data_, req.obj_id, req.obj_size); - return true; + bool get(const request_t& req) { + n_req++; + + auto it = objects_.find(req.obj_id); + if (it != objects_.end()) { + // Cache hit + hit_hook_(plugin_data_, req.obj_id, req.obj_size); + return true; + } else { + // Cache miss - call miss hook first + miss_hook_(plugin_data_, req.obj_id, req.obj_size); + + // Check if eviction is needed + while (occupied_byte + req.obj_size > cache_size_ && !objects_.empty()) { + // Need to evict + uint64_t victim_id = + eviction_hook_(plugin_data_, req.obj_id, req.obj_size) + .cast(); + auto victim_it = objects_.find(victim_id); + if (victim_it != objects_.end()) { + occupied_byte -= victim_it->second; + objects_.erase(victim_it); + n_obj--; + remove_hook_(plugin_data_, victim_id); } else { - // Cache miss - need to insert - // Check if eviction is needed - if (occupied_byte + req.obj_size > cache_size_ && !objects_.empty()) { - // Need to evict - uint64_t victim_id = eviction_hook_(plugin_data_, req.obj_id, req.obj_size).cast(); - auto victim_it = objects_.find(victim_id); - if (victim_it != objects_.end()) { - occupied_byte -= victim_it->second; - objects_.erase(victim_it); - n_obj--; - remove_hook_(plugin_data_, victim_id); - } - } + // Safety check: if eviction hook returns invalid ID, break to avoid + // infinite loop + break; + } + } - // Insert new object if there's space - if (occupied_byte + req.obj_size <= cache_size_) { - objects_[req.obj_id] = req.obj_size; - occupied_byte += req.obj_size; - n_obj++; - } + // Insert new object if there's space + if (occupied_byte + req.obj_size <= cache_size_) { + objects_[req.obj_id] = req.obj_size; + occupied_byte += req.obj_size; + n_obj++; + } - miss_hook_(plugin_data_, req.obj_id, req.obj_size); - return false; - } + return false; } + } - ~PythonHookCache() { - if (!free_hook_.is_none()) { - py::function free_func = free_hook_.cast(); - free_func(plugin_data_); - } + ~PythonHookCache() { + if (!free_hook_.is_none()) { + py::function free_func = free_hook_.cast(); + free_func(plugin_data_); } + } }; +// Restore visibility warnings +#pragma GCC diagnostic pop + struct CacheDeleter { void operator()(cache_t* ptr) const { if (ptr != nullptr) ptr->cache_free(ptr); @@ -657,10 +674,11 @@ PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) * @brief Create a Python hook-based cache instance. */ py::class_(m, "PythonHookCache") - .def(py::init(), py::arg("cache_size"), py::arg("cache_name") = "PythonHookCache") - .def("set_hooks", &PythonHookCache::set_hooks, - py::arg("init_hook"), py::arg("hit_hook"), py::arg("miss_hook"), - py::arg("eviction_hook"), py::arg("remove_hook"), py::arg("free_hook") = py::none(), + .def(py::init(), py::arg("cache_size"), + py::arg("cache_name") = "PythonHookCache") + .def("set_hooks", &PythonHookCache::set_hooks, py::arg("init_hook"), + py::arg("hit_hook"), py::arg("miss_hook"), py::arg("eviction_hook"), + py::arg("remove_hook"), py::arg("free_hook") = py::none(), R"pbdoc( Set the hook functions for the cache. @@ -693,6 +711,124 @@ PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) .def_readwrite("occupied_byte", &PythonHookCache::occupied_byte) .def_readwrite("cache_size", &PythonHookCache::cache_size); + /** + * @brief Process a trace with a cache and return miss ratio. + */ + m.def( + "process_trace", + [](cache_t& cache, reader_t& reader, int max_req = -1, int max_sec = -1, + int64_t start_time = -1, int64_t end_time = -1) { + request_t* req = new_request(); + int n_req = 0, n_hit = 0; + bool hit; + + read_one_req(&reader, req); + while (req->valid) { + // Check limits + if (max_req != -1 && n_req >= max_req) break; + if (max_sec != -1 && req->clock_time >= end_time) break; + if (start_time != -1 && req->clock_time < start_time) { + read_one_req(&reader, req); + continue; + } + + n_req += 1; + hit = cache.get(&cache, req); + if (hit) n_hit += 1; + read_one_req(&reader, req); + } + + free_request(req); + // return the miss ratio + return n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0; + }, + py::arg("cache"), py::arg("reader"), py::arg("max_req") = -1, + py::arg("max_sec") = -1, py::arg("start_time") = -1, + py::arg("end_time") = -1, + R"pbdoc( + Process a trace with a cache and return miss ratio. + + This function processes trace data entirely on the C++ side to avoid + data movement overhead between Python and C++. + + Args: + cache (Cache): The cache instance to use for processing. + reader (Reader): The trace reader instance. + max_req (int): Maximum number of requests to process (-1 for no limit). + max_sec (int): Maximum seconds to process (-1 for no limit). + start_time (int): Start time filter (-1 for no filter). + end_time (int): End time filter (-1 for no filter). + + Returns: + float: Miss ratio (0.0 to 1.0). + + Example: + >>> cache = libcachesim.LRU(1024*1024) + >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE) + >>> miss_ratio = libcachesim.process_trace(cache, reader) + >>> print(f"Miss ratio: {miss_ratio:.4f}") + )pbdoc"); + + /** + * @brief Process a trace with a Python hook cache and return miss ratio. + */ + m.def( + "process_trace_python_hook", + [](PythonHookCache& cache, reader_t& reader, int max_req = -1, + int max_sec = -1, int64_t start_time = -1, int64_t end_time = -1) { + request_t* req = new_request(); + int n_req = 0, n_hit = 0; + bool hit; + + read_one_req(&reader, req); + while (req->valid) { + // Check limits + if (max_req != -1 && n_req >= max_req) break; + if (max_sec != -1 && req->clock_time >= end_time) break; + if (start_time != -1 && req->clock_time < start_time) { + read_one_req(&reader, req); + continue; + } + + n_req += 1; + hit = cache.get(*req); + if (hit) n_hit += 1; + read_one_req(&reader, req); + } + + free_request(req); + // return the miss ratio + return n_req > 0 ? 1.0 - (double)n_hit / n_req : 0.0; + }, + py::arg("cache"), py::arg("reader"), py::arg("max_req") = -1, + py::arg("max_sec") = -1, py::arg("start_time") = -1, + py::arg("end_time") = -1, + R"pbdoc( + Process a trace with a Python hook cache and return miss ratio. + + This function processes trace data entirely on the C++ side to avoid + data movement overhead between Python and C++. Specifically designed + for PythonHookCache instances. + + Args: + cache (PythonHookCache): The Python hook cache instance to use. + reader (Reader): The trace reader instance. + max_req (int): Maximum number of requests to process (-1 for no limit). + max_sec (int): Maximum seconds to process (-1 for no limit). + start_time (int): Start time filter (-1 for no filter). + end_time (int): End time filter (-1 for no filter). + + Returns: + float: Miss ratio (0.0 to 1.0). + + Example: + >>> cache = libcachesim.PythonHookCachePolicy(1024*1024) + >>> cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + >>> reader = libcachesim.open_trace("trace.csv", libcachesim.TraceType.CSV_TRACE) + >>> miss_ratio = libcachesim.process_trace_python_hook(cache.cache, reader) + >>> print(f"Miss ratio: {miss_ratio:.4f}") + )pbdoc"); + #ifdef VERSION_INFO m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); #else diff --git a/libCacheSim-python/tests/test_process_trace.py b/libCacheSim-python/tests/test_process_trace.py new file mode 100644 index 000000000..bc841b1eb --- /dev/null +++ b/libCacheSim-python/tests/test_process_trace.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python3 +""" +Test file for process_trace functionality. +""" + +import sys +import os + +# Add the parent directory to the Python path for development testing +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +try: + import libcachesim as lcs +except ImportError as e: + print(f"Error importing libcachesim: {e}") + print("Make sure the Python binding is built and installed") + sys.exit(1) + +from collections import OrderedDict + + +def create_trace_reader(): + """Helper function to create a trace reader with binary trace file.""" + data_file = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), + "data", + "cloudPhysicsIO.oracleGeneral.bin" + ) + if not os.path.exists(data_file): + return None + return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE.value) + + +def test_process_trace_native(): + """Test process_trace with native LRU cache.""" + print("Testing process_trace with native LRU...") + + # Open trace + reader = create_trace_reader() + if reader is None: + print("Warning: Test trace file not found, skipping test") + return # Skip test + + # Create LRU cache + cache = lcs.LRU(1024*1024) # 1MB cache + + # Process trace and get miss ratio + miss_ratio = cache.process_trace(reader, max_req=1000) + + print(f"Native LRU miss ratio (first 1000 requests): {miss_ratio:.4f}") + + # Verify miss ratio is reasonable (should be between 0 and 1) + assert 0.0 <= miss_ratio <= 1.0, f"Invalid miss ratio: {miss_ratio}" + print("PASS: Native LRU process_trace test PASSED") + + +def test_process_trace_python_hook(): + """Test process_trace with Python hook cache.""" + print("\nTesting process_trace with Python hook cache...") + + # Open trace + reader = create_trace_reader() + if reader is None: + print("Warning: Test trace file not found, skipping test") + return # Skip test + + # Create Python hook LRU cache + cache = lcs.PythonHookCachePolicy(1024*1024, "TestLRU") + + # Define LRU hooks + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True + + def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + # Set hooks + cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + # Test both methods + # Method 1: Direct function call + miss_ratio1 = lcs.process_trace_python_hook(cache.cache, reader, max_req=1000) + + # Need to reopen the trace for second test + reader2 = create_trace_reader() + if reader2 is None: + print("Warning: Cannot reopen trace file, skipping second test") + # Continue with just the first test result + assert miss_ratio1 is not None and 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio: {miss_ratio1}" + return + + # Reset cache for fair comparison + cache2 = lcs.PythonHookCachePolicy(1024*1024, "TestLRU2") + cache2.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + # Method 2: Convenience method + miss_ratio2 = cache2.process_trace(reader2, max_req=1000) + + print(f"Python hook LRU miss ratio (method 1): {miss_ratio1:.4f}") + print(f"Python hook LRU miss ratio (method 2): {miss_ratio2:.4f}") + + # Verify both methods give the same result and miss ratios are reasonable + assert 0.0 <= miss_ratio1 <= 1.0, f"Invalid miss ratio 1: {miss_ratio1}" + assert 0.0 <= miss_ratio2 <= 1.0, f"Invalid miss ratio 2: {miss_ratio2}" + assert abs(miss_ratio1 - miss_ratio2) < 0.001, f"Different results from the two methods: {miss_ratio1} vs {miss_ratio2}" + print("PASS: Python hook process_trace test PASSED") + + +def test_compare_native_vs_python_hook(): + """Compare native LRU vs Python hook LRU using process_trace.""" + print("\nComparing native LRU vs Python hook LRU using process_trace...") + + cache_size = 512*1024 # 512KB cache + max_requests = 500 + + # Test native LRU + native_cache = lcs.LRU(cache_size) + reader1 = create_trace_reader() + if reader1 is None: + print("Warning: Test trace file not found, skipping test") + return # Skip test + + native_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests) + + # Test Python hook LRU + hook_cache = lcs.PythonHookCachePolicy(cache_size, "HookLRU") + + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = True + + def eviction_hook(lru_dict, obj_id, obj_size): + return next(iter(lru_dict)) + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + reader2 = create_trace_reader() + if reader2 is None: + print("Warning: Cannot reopen trace file, skipping comparison") + return # Skip test + + hook_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests) + + print(f"Native LRU miss ratio: {native_miss_ratio:.4f}") + print(f"Python hook LRU miss ratio: {hook_miss_ratio:.4f}") + print(f"Difference: {abs(native_miss_ratio - hook_miss_ratio):.4f}") + + # They should be very similar (allowing for some small differences due to implementation details) + assert abs(native_miss_ratio - hook_miss_ratio) < 0.05, f"Too much difference: {abs(native_miss_ratio - hook_miss_ratio):.4f}" + print("PASS: Native vs Python hook comparison test PASSED") + + +def test_error_handling(): + """Test error handling for process_trace.""" + print("\nTesting error handling...") + + cache = lcs.PythonHookCachePolicy(1024) + + reader = create_trace_reader() + if reader is None: + print("Warning: Test trace file not found, skipping error test") + return # Skip test + + # Try to process trace without setting hooks + try: + cache.process_trace(reader) + assert False, "Should have raised RuntimeError" + except RuntimeError as e: + print(f"Correctly caught error: {e}") + print("PASS: Error handling test PASSED") + + +def test_lru_implementation_accuracy(): + """Test that Python hook LRU implementation matches native LRU closely.""" + print("Testing LRU implementation accuracy...") + + cache_size = 1024 * 1024 # 1MB + max_requests = 100 + + # Create readers + reader1 = create_trace_reader() + reader2 = create_trace_reader() + + if not reader1 or not reader2: + print("Warning: Cannot open trace files for LRU accuracy test") + return + + # Test native LRU + native_cache = lcs.LRU(cache_size) + native_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests) + + # Test Python hook LRU + hook_cache = lcs.PythonHookCachePolicy(cache_size, "AccuracyTestLRU") + init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_optimized_lru_hooks() + hook_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + + hook_miss_ratio = hook_cache.process_trace(reader2, max_req=max_requests) + + # Calculate difference + difference = abs(native_miss_ratio - hook_miss_ratio) + percentage_diff = (difference / native_miss_ratio) * 100 if native_miss_ratio > 0 else 0 + + print(f"Native LRU miss ratio: {native_miss_ratio:.6f}") + print(f"Hook LRU miss ratio: {hook_miss_ratio:.6f}") + print(f"Percentage difference: {percentage_diff:.4f}%") + + # Assert that the difference is small (< 5%) + assert percentage_diff < 5.0, f"LRU implementation difference too large: {percentage_diff:.4f}%" + print("PASS: LRU implementation accuracy test passed") + + +def create_optimized_lru_hooks(): + """Create optimized LRU hooks that closely match native LRU behavior.""" + + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + if obj_id in lru_dict: + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = obj_size + + def eviction_hook(lru_dict, obj_id, obj_size): + if lru_dict: + return next(iter(lru_dict)) + return obj_id + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook + + +if __name__ == "__main__": + tests = [ + test_process_trace_native, + test_process_trace_python_hook, + test_compare_native_vs_python_hook, + test_error_handling, + test_lru_implementation_accuracy, + ] + + all_passed = True + for test in tests: + try: + test() # Just call the test, don't check return value + print(f"PASS: {test.__name__} passed") + except Exception as e: + print(f"FAIL: {test.__name__} failed with exception: {e}") + all_passed = False + + if all_passed: + print("\nAll process_trace tests PASSED!") + else: + print("\nSome process_trace tests FAILED!") diff --git a/libCacheSim-python/tests/test_python_hook_cache.py b/libCacheSim-python/tests/test_python_hook_cache.py index 2e0326a82..34a8d7b8e 100644 --- a/libCacheSim-python/tests/test_python_hook_cache.py +++ b/libCacheSim-python/tests/test_python_hook_cache.py @@ -168,14 +168,14 @@ def remove_hook(lru_dict, obj_id): print(f"Request {i+1}: obj_id={obj_id}") print(f" Native LRU: {'HIT' if native_result else 'MISS'}") print(f" Hook LRU: {'HIT' if hook_result else 'MISS'}") - print(f" Match: {'✓' if match else '✗'}") + print(f" Match: {'PASS' if match else 'FAIL'}") # Compare cache statistics stats_match = (native_lru.cache.n_obj == hook_lru.n_obj and native_lru.cache.occupied_byte == hook_lru.occupied_byte) print(f" Native stats: {native_lru.cache.n_obj} objects, {native_lru.cache.occupied_byte} bytes") print(f" Hook stats: {hook_lru.n_obj} objects, {hook_lru.occupied_byte} bytes") - print(f" Stats match: {'✓' if stats_match else '✗'}") + print(f" Stats match: {'PASS' if stats_match else 'FAIL'}") print() if not match: @@ -192,12 +192,8 @@ def remove_hook(lru_dict, obj_id): print(f" Matching results: {hit_rate_matches}") print(f" Accuracy: {accuracy:.1f}%") - if accuracy == 100.0: - print("✓ LRU comparison test PASSED - Both implementations behave identically!") - return True - else: - print("✗ LRU comparison test FAILED - Implementations differ!") - return False + assert accuracy == 100.0, f"LRU implementations differ! Accuracy: {accuracy:.1f}%" + print("PASS: LRU comparison test PASSED - Both implementations behave identically!") def test_lru_comparison_variable_sizes(): @@ -268,22 +264,18 @@ def remove_hook(lru_dict, obj_id): print(f"Request {i+1}: obj_id={obj_id}, size={obj_size}") print(f" Native LRU: {'HIT' if native_result else 'MISS'}") print(f" Hook LRU: {'HIT' if hook_result else 'MISS'}") - print(f" Result match: {'✓' if result_match else '✗'}") + print(f" Result match: {'PASS' if result_match else 'FAIL'}") print(f" Native stats: {native_lru.cache.n_obj} objects, {native_lru.cache.occupied_byte} bytes") print(f" Hook stats: {hook_lru.n_obj} objects, {hook_lru.occupied_byte} bytes") - print(f" Stats match: {'✓' if stats_match else '✗'}") + print(f" Stats match: {'PASS' if stats_match else 'FAIL'}") print() if not result_match or not stats_match: all_match = False print(f"ERROR: Mismatch at request {i+1}") - if all_match: - print("✓ Variable size LRU comparison test PASSED!") - return True - else: - print("✗ Variable size LRU comparison test FAILED!") - return False + assert all_match, "Variable size LRU comparison failed - implementations differ!" + print("PASS: Variable size LRU comparison test PASSED!") if __name__ == "__main__": diff --git a/libCacheSim-python/tests/test_unified_interface.py b/libCacheSim-python/tests/test_unified_interface.py new file mode 100644 index 000000000..f91463726 --- /dev/null +++ b/libCacheSim-python/tests/test_unified_interface.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +""" +Test the unified interface for all cache policies. +""" + +import sys +import os + +# Add the parent directory to the Python path for development testing +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +try: + import libcachesim as lcs +except ImportError as e: + print(f"Error importing libcachesim: {e}") + print("Make sure the Python binding is built and installed") + sys.exit(1) + +from collections import OrderedDict + + +def create_trace_reader(): + """Helper function to create a trace reader.""" + data_file = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), + "data", + "cloudPhysicsIO.oracleGeneral.bin" + ) + if not os.path.exists(data_file): + return None + return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE.value) + + +def create_test_lru_hooks(): + """Create LRU hooks for testing.""" + + def init_hook(cache_size): + return OrderedDict() + + def hit_hook(lru_dict, obj_id, obj_size): + if obj_id in lru_dict: + lru_dict.move_to_end(obj_id) + + def miss_hook(lru_dict, obj_id, obj_size): + lru_dict[obj_id] = obj_size + + def eviction_hook(lru_dict, obj_id, obj_size): + if lru_dict: + return next(iter(lru_dict)) + return obj_id + + def remove_hook(lru_dict, obj_id): + lru_dict.pop(obj_id, None) + + return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook + + +def test_unified_process_trace_interface(): + """Test that all cache policies have the same process_trace interface.""" + print("Testing unified process_trace interface...") + + cache_size = 1024 * 1024 # 1MB + max_requests = 100 + + # Create trace reader + reader = create_trace_reader() + if not reader: + print("Warning: Cannot open trace file for unified interface test") + return True + + # Test different cache policies + caches = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + "ARC": lcs.ARC(cache_size), + } + + # Add Python hook cache + python_cache = lcs.PythonHookCachePolicy(cache_size, "TestLRU") + init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks() + python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + caches["Python Hook LRU"] = python_cache + + print("\n--- Testing unified process_trace interface ---") + + results = {} + for name, cache in caches.items(): + # Create fresh reader for each test + reader = create_trace_reader() + if not reader: + continue + + # Test process_trace method exists + assert hasattr(cache, 'process_trace'), f"{name} missing process_trace method" + + # Test process_trace functionality + miss_ratio = cache.process_trace(reader, max_req=max_requests) + results[name] = miss_ratio + + print(f"{name:15s}: miss_ratio = {miss_ratio:.4f}") + print(f" cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes") + + print(f"\nPASS: All {len(caches)} cache policies support unified process_trace interface!") + # Test passes - no explicit return needed for pytest + + +def test_unified_properties_interface(): + """Test that all cache policies have the same properties interface.""" + print("\nTesting unified properties interface...") + + cache_size = 1024 * 1024 + + # Create different cache types + caches = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + "Python Hook": lcs.PythonHookCachePolicy(cache_size, "TestCache"), + } + + print("\n--- Testing unified properties interface ---") + + required_properties = ['cache_size', 'n_req', 'n_obj', 'occupied_byte'] + + for name, cache in caches.items(): + print(f"{name:15s}:") + + # Test all required properties exist + for prop in required_properties: + assert hasattr(cache, prop), f"{name} missing {prop} property" + value = getattr(cache, prop) + print(f" {prop} = {value}") + + # Test cache_size is correct + assert cache.cache_size == cache_size, f"{name} cache_size mismatch" + + print("PASS: All cache policies support unified properties interface!") + # Test passes - no explicit return needed for pytest + + +def test_get_interface_consistency(): + """Test that get() method works consistently across all cache policies.""" + print("\nTesting get() interface consistency...") + + cache_size = 1024 * 1024 + + # Create caches + caches = { + "LRU": lcs.LRU(cache_size), + "FIFO": lcs.FIFO(cache_size), + } + + # Add Python hook cache + python_cache = lcs.PythonHookCachePolicy(cache_size, "ConsistencyTest") + init_hook, hit_hook, miss_hook, eviction_hook, remove_hook = create_test_lru_hooks() + python_cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + caches["Python Hook"] = python_cache + + # Create a test request using the proper request class + test_req = lcs.Request() + test_req.obj_id = 1 + test_req.obj_size = 1024 + + print("Testing get() method with test request...") + + for name, cache in caches.items(): + # Test get method exists + assert hasattr(cache, 'get'), f"{name} missing get method" + + # Test first access (should be miss) + result = cache.get(test_req) + print(f"{name:15s}: first access = {'HIT' if result else 'MISS'}") + + # Test properties updated + assert cache.n_req > 0, f"{name} n_req not updated" + assert cache.n_obj > 0, f"{name} n_obj not updated" + assert cache.occupied_byte > 0, f"{name} occupied_byte not updated" + + print("PASS: Get interface consistency test passed!") + # Test passes - no explicit return needed for pytest + + +if __name__ == "__main__": + tests = [ + test_unified_process_trace_interface, + test_unified_properties_interface, + test_get_interface_consistency, + ] + + all_passed = True + for test in tests: + try: + test() # Just call the test, don't check return value + print(f"PASS: {test.__name__} passed") + except Exception as e: + print(f"FAIL: {test.__name__} failed with exception: {e}") + all_passed = False + + if all_passed: + print("\nAll unified interface tests PASSED!") + else: + print("\nSome unified interface tests FAILED!") diff --git a/scripts/install_python.sh b/scripts/install_python.sh index a224feff7..f6808f854 100644 --- a/scripts/install_python.sh +++ b/scripts/install_python.sh @@ -3,14 +3,16 @@ # Build the main libCacheSim C++ library first echo "Building main libCacheSim library..." rm -rf ./build -cmake -G Ninja -B build -DENABLE_3L_CACHE=ON +cmake -G Ninja -B build # -DENABLE_3L_CACHE=ON ninja -C build # Now build and install the Python binding echo "Building Python binding..." -cd libCacheSim-python +echo "Sync python version..." +python scripts/sync_python_version.py +pushd libCacheSim-python pip install -e . -vvv -cd .. +popd # Test that the import works echo "Testing import..." @@ -18,6 +20,6 @@ python -c "import libcachesim" # Run tests echo "Running tests..." -cd libCacheSim-python +pushd libCacheSim-python pytest . -cd .. +popd From 1808ded12e484c23b4e79a51cced384a83363fdf Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 10 Jul 2025 22:52:19 -0400 Subject: [PATCH 04/10] Apply the suggestion from copilot --- libCacheSim-python/src/pylibcachesim.cpp | 5 ++--- libCacheSim-python/tests/utils.py | 5 ++--- scripts/install_python.sh | 1 + scripts/sync_python_version.py | 10 +++++++--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp index 6ab9fdce9..49c8b36b8 100644 --- a/libCacheSim-python/src/pylibcachesim.cpp +++ b/libCacheSim-python/src/pylibcachesim.cpp @@ -164,8 +164,6 @@ struct ReaderDeleter { } }; -namespace py = pybind11; - PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) m.doc() = R"pbdoc( libCacheSim Python bindings @@ -617,7 +615,8 @@ PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) /** * @brief Create a TinyLFU cache instance. */ - // mark evivtion parsing need change + // TODO: Review and update the eviction parsing logic in TinyLFU_init if + // necessary. m.def( "TinyLFU_init", [](uint64_t cache_size, std::string main_cache, double window_size) { diff --git a/libCacheSim-python/tests/utils.py b/libCacheSim-python/tests/utils.py index 632fdc3f8..6eabbdd2a 100644 --- a/libCacheSim-python/tests/utils.py +++ b/libCacheSim-python/tests/utils.py @@ -9,9 +9,8 @@ def get_reference_data(eviction_algo, cache_size_ratio): ) with open(data_file, "r") as f: # noqa: PTH123 lines = f.readlines() + key = "3LCache" if eviction_algo == "ThreeLCache" else eviction_algo for line in lines: - if eviction_algo == "ThreeLCache": - eviction_algo = "3LCache" - if line.startswith(f"{eviction_algo},{cache_size_ratio}"): + if line.startswith(f"{key},{cache_size_ratio}"): return float(line.split(",")[-1]) return None \ No newline at end of file diff --git a/scripts/install_python.sh b/scripts/install_python.sh index f6808f854..d0ff2eba6 100644 --- a/scripts/install_python.sh +++ b/scripts/install_python.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -euo pipefail # Build the main libCacheSim C++ library first echo "Building main libCacheSim library..." diff --git a/scripts/sync_python_version.py b/scripts/sync_python_version.py index 01b1631b8..65e51a92f 100644 --- a/scripts/sync_python_version.py +++ b/scripts/sync_python_version.py @@ -51,7 +51,11 @@ def update_pyproject_toml(version): pyproject_data = f.read() # Update the version line in pyproject.toml, make it can match any version in version.txt, like "0.3.1" or "dev" - current_version = re.search(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", pyproject_data).group(1) + match = re.search(r"version = \"(dev|[0-9]+\.[0-9]+\.[0-9]+)\"", pyproject_data) + if not match: + print("Error: Could not find a valid version line in pyproject.toml", file=sys.stderr) + return False + current_version = match.group(1) if current_version == version: print(f"Python binding version already up to date: {version}") return False @@ -77,9 +81,9 @@ def main(): updated = update_pyproject_toml(main_version) if updated: - print("✓ Python binding version synchronized successfully") + print("Python binding version synchronized successfully") else: - print("✓ No changes needed") + print("No changes needed") except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1) From ef02bfb37a38ce78659737480e3bf02f61961e6f Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 10 Jul 2025 23:15:59 -0400 Subject: [PATCH 05/10] Remove create_cache --- libCacheSim-python/libcachesim/__init__.py | 2 - libCacheSim-python/libcachesim/__init__.pyi | 9 ---- libCacheSim-python/src/pylibcachesim.cpp | 23 --------- libCacheSim-python/tests/test_eviction.py | 53 ++++----------------- 4 files changed, 10 insertions(+), 77 deletions(-) diff --git a/libCacheSim-python/libcachesim/__init__.py b/libCacheSim-python/libcachesim/__init__.py index e01826375..5cac3c360 100644 --- a/libCacheSim-python/libcachesim/__init__.py +++ b/libCacheSim-python/libcachesim/__init__.py @@ -6,7 +6,6 @@ Request, __doc__, __version__, - create_cache, open_trace, process_trace, process_trace_python_hook, @@ -44,7 +43,6 @@ "PythonHookCachePolicy", "__doc__", "__version__", - "create_cache", "open_trace", "process_trace", "process_trace_python_hook", diff --git a/libCacheSim-python/libcachesim/__init__.pyi b/libCacheSim-python/libcachesim/__init__.pyi index 2d4937f7f..4148ddc84 100644 --- a/libCacheSim-python/libcachesim/__init__.pyi +++ b/libCacheSim-python/libcachesim/__init__.pyi @@ -7,7 +7,6 @@ libCacheSim Python bindings .. autosummary:: :toctree: _generate - create_cache open_trace ARC_init Clock_init @@ -28,14 +27,6 @@ libCacheSim Python bindings from .const import TraceType -def create_cache( - eviction_algo: str, - cache_size: int, - eviction_params: str, - consider_obj_metadata: bool -) -> Cache: ... - - def open_trace( trace_path: str, type: TraceType, diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp index 49c8b36b8..7528235a1 100644 --- a/libCacheSim-python/src/pylibcachesim.cpp +++ b/libCacheSim-python/src/pylibcachesim.cpp @@ -364,29 +364,6 @@ PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) Reader: A new reader instance for the trace. )pbdoc"); - /** - * @brief Generic function to create a cache instance. - */ - m.def( - "create_cache", - [](const std::string& eviction_algo, const uint64_t cache_size, - const std::string& eviction_params, - bool consider_obj_metadata) { return nullptr; }, - py::arg("eviction_algo"), py::arg("cache_size"), - py::arg("eviction_params"), py::arg("consider_obj_metadata"), - R"pbdoc( - Create a cache instance. - - Args: - eviction_algo (str): Eviction algorithm to use (e.g., "LRU", "FIFO", "Random"). - cache_size (int): Size of the cache in bytes. - eviction_params (str): Additional parameters for the eviction algorithm. - consider_obj_metadata (bool): Whether to consider object metadata in eviction decisions. - - Returns: - Cache: A new cache instance. - )pbdoc"); - /* TODO(haocheng): should we support all parameters in the * common_cache_params_t? (hash_power, etc.) */ diff --git a/libCacheSim-python/tests/test_eviction.py b/libCacheSim-python/tests/test_eviction.py index ef896b2ca..1de462a84 100644 --- a/libCacheSim-python/tests/test_eviction.py +++ b/libCacheSim-python/tests/test_eviction.py @@ -12,7 +12,6 @@ Sieve, TinyLFU, TwoQ, - create_cache, ) from tests.utils import get_reference_data @@ -20,12 +19,12 @@ @pytest.mark.parametrize("eviction_algo", [ FIFO, ARC, - # Clock, - # LRU, - # S3FIFO, - # Sieve, - # TinyLFU, - # TwoQ, + Clock, + LRU, + S3FIFO, + Sieve, + TinyLFU, + TwoQ, ]) @pytest.mark.parametrize("cache_size_ratio", [0.01]) def test_eviction_algo(eviction_algo, cache_size_ratio, mock_reader): @@ -35,7 +34,7 @@ def test_eviction_algo(eviction_algo, cache_size_ratio, mock_reader): cache = eviction_algo(cache_size=int(mock_reader.get_wss()*cache_size_ratio)) req_count = 0 miss_count = 0 - + # Limit the number of requests to avoid long test times # max_requests = 1000 for i, req in enumerate(mock_reader): @@ -45,50 +44,18 @@ def test_eviction_algo(eviction_algo, cache_size_ratio, mock_reader): if not hit: miss_count += 1 req_count += 1 - + if req_count == 0: pytest.skip("No requests processed") - + miss_ratio = miss_count / req_count reference_miss_ratio = get_reference_data(eviction_algo.__name__, cache_size_ratio) if reference_miss_ratio is None: pytest.skip(f"No reference data for {eviction_algo.__name__} with cache size ratio {cache_size_ratio}") assert abs(miss_ratio - reference_miss_ratio) < 0.01, f"Miss ratio {miss_ratio} is not close to reference {reference_miss_ratio}" - + except Exception as e: print(f"Error in test_eviction_algo: {e}") raise finally: pass - - -# @pytest.mark.parametrize("eviction_algo", [ -# "FIFO", -# "ARC", -# "Clock", -# "LRU", -# "S3FIFO", -# "Sieve", -# "TinyLFU", -# "TwoQ", -# ]) -# @pytest.mark.parametrize("cache_size_ratio", [0.01, 0.1]) -# def test_eviction_algo_generic(eviction_algo, cache_size_ratio, mock_reader): -# cache = create_cache(eviction_algo=eviction_algo, -# cache_size=int(mock_reader.get_wss()*cache_size_ratio), -# eviction_params="", -# consider_obj_metadata=False) -# req_count = 0 -# miss_count = 0 -# for req in mock_reader: -# hit = cache.get(req) -# if not hit: -# miss_count += 1 -# req_count += 1 - -# miss_ratio = miss_count / req_count -# print("Check eviction algo: ", eviction_algo, "with cache size ratio: ", cache_size_ratio) -# reference_miss_ratio = get_reference_data(eviction_algo, cache_size_ratio) -# if reference_miss_ratio is None: -# pytest.skip(f"No reference data for {eviction_algo} with cache size ratio {cache_size_ratio}") -# assert abs(miss_ratio - reference_miss_ratio) < 0.01, f"Miss ratio {miss_ratio} is not close to reference {reference_miss_ratio}" From 2fbede907356ac5d8b9d6b9cb3aaa477ba2fc6d7 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 10 Jul 2025 23:27:38 -0400 Subject: [PATCH 06/10] Remove redundant header --- libCacheSim-python/src/pylibcachesim.cpp | 31 ++++++++++++++++--- .../tests/test_python_hook_cache.py | 6 ++-- .../tests/test_unified_interface.py | 4 +-- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp index 7528235a1..c0f2909c3 100644 --- a/libCacheSim-python/src/pylibcachesim.cpp +++ b/libCacheSim-python/src/pylibcachesim.cpp @@ -28,9 +28,6 @@ /* eviction */ #include "libCacheSim/evictionAlgo.h" -/* sampling */ -#include "libCacheSim/sampling.h" - /* cache simulator */ #include "libCacheSim/profilerLRU.h" #include "libCacheSim/simulator.h" @@ -164,6 +161,8 @@ struct ReaderDeleter { } }; +namespace py = pybind11; + PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) m.doc() = R"pbdoc( libCacheSim Python bindings @@ -364,6 +363,29 @@ PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) Reader: A new reader instance for the trace. )pbdoc"); + /** + * @brief Generic function to create a cache instance. + */ + m.def( + "create_cache", + [](const std::string& eviction_algo, const uint64_t cache_size, + const std::string& eviction_params, + bool consider_obj_metadata) { return nullptr; }, + py::arg("eviction_algo"), py::arg("cache_size"), + py::arg("eviction_params"), py::arg("consider_obj_metadata"), + R"pbdoc( + Create a cache instance. + + Args: + eviction_algo (str): Eviction algorithm to use (e.g., "LRU", "FIFO", "Random"). + cache_size (int): Size of the cache in bytes. + eviction_params (str): Additional parameters for the eviction algorithm. + consider_obj_metadata (bool): Whether to consider object metadata in eviction decisions. + + Returns: + Cache: A new cache instance. + )pbdoc"); + /* TODO(haocheng): should we support all parameters in the * common_cache_params_t? (hash_power, etc.) */ @@ -592,8 +614,7 @@ PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) /** * @brief Create a TinyLFU cache instance. */ - // TODO: Review and update the eviction parsing logic in TinyLFU_init if - // necessary. + // mark evivtion parsing need change m.def( "TinyLFU_init", [](uint64_t cache_size, std::string main_cache, double window_size) { diff --git a/libCacheSim-python/tests/test_python_hook_cache.py b/libCacheSim-python/tests/test_python_hook_cache.py index 34a8d7b8e..c84c03cbb 100644 --- a/libCacheSim-python/tests/test_python_hook_cache.py +++ b/libCacheSim-python/tests/test_python_hook_cache.py @@ -5,6 +5,7 @@ import sys import os +import pytest # Add the parent directory to the Python path for development testing sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) @@ -87,11 +88,8 @@ def test_error_handling(): req.obj_id = 1 req.obj_size = 100 - try: + with pytest.raises(RuntimeError): cache.get(req) - print("ERROR: Should have raised RuntimeError") - except RuntimeError as e: - print(f"Correctly caught error: {e}") print("Error handling test passed!") diff --git a/libCacheSim-python/tests/test_unified_interface.py b/libCacheSim-python/tests/test_unified_interface.py index f91463726..e399a22a5 100644 --- a/libCacheSim-python/tests/test_unified_interface.py +++ b/libCacheSim-python/tests/test_unified_interface.py @@ -5,6 +5,7 @@ import sys import os +import pytest # Add the parent directory to the Python path for development testing sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) @@ -65,8 +66,7 @@ def test_unified_process_trace_interface(): # Create trace reader reader = create_trace_reader() if not reader: - print("Warning: Cannot open trace file for unified interface test") - return True + pytest.skip("Skipping test: Trace file not available") # Test different cache policies caches = { From 9678a2deb237a6f5a2e54714c6341466b1c3a701 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Thu, 10 Jul 2025 23:46:36 -0400 Subject: [PATCH 07/10] Complete binding for trace type --- libCacheSim-python/libcachesim/const.py | 11 ++--------- libCacheSim-python/src/pylibcachesim.cpp | 14 +++++++++++--- libCacheSim-python/tests/test_process_trace.py | 18 +++++++----------- .../tests/test_unified_interface.py | 2 +- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/libCacheSim-python/libcachesim/const.py b/libCacheSim-python/libcachesim/const.py index 9276d2447..142f3cccb 100644 --- a/libCacheSim-python/libcachesim/const.py +++ b/libCacheSim-python/libcachesim/const.py @@ -1,11 +1,4 @@ from __future__ import annotations -import enum - - -class TraceType(enum.Enum): - CSV_TRACE = 0 - BIN_TRACE = 1 - PLAIN_TXT_TRACE = 2 - ORACLE_GENERAL_TRACE = 3 - LCS_TRACE = 4 # libCacheSim format +# Import TraceType directly from the C++ binding to avoid duplication +from ._libcachesim import TraceType diff --git a/libCacheSim-python/src/pylibcachesim.cpp b/libCacheSim-python/src/pylibcachesim.cpp index c0f2909c3..43d875788 100644 --- a/libCacheSim-python/src/pylibcachesim.cpp +++ b/libCacheSim-python/src/pylibcachesim.cpp @@ -161,8 +161,6 @@ struct ReaderDeleter { } }; -namespace py = pybind11; - PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) m.doc() = R"pbdoc( libCacheSim Python bindings @@ -178,9 +176,19 @@ PYBIND11_MODULE(_libcachesim, m) { // NOLINT(readability-named-parameter) py::enum_(m, "TraceType") .value("CSV_TRACE", trace_type_e::CSV_TRACE) - .value("PLAIN_TXT_TRACE", trace_type_e::PLAIN_TXT_TRACE) .value("BIN_TRACE", trace_type_e::BIN_TRACE) + .value("PLAIN_TXT_TRACE", trace_type_e::PLAIN_TXT_TRACE) + .value("ORACLE_GENERAL_TRACE", trace_type_e::ORACLE_GENERAL_TRACE) + .value("LCS_TRACE", trace_type_e::LCS_TRACE) .value("VSCSI_TRACE", trace_type_e::VSCSI_TRACE) + .value("TWR_TRACE", trace_type_e::TWR_TRACE) + .value("TWRNS_TRACE", trace_type_e::TWRNS_TRACE) + .value("ORACLE_SIM_TWR_TRACE", trace_type_e::ORACLE_SIM_TWR_TRACE) + .value("ORACLE_SYS_TWR_TRACE", trace_type_e::ORACLE_SYS_TWR_TRACE) + .value("ORACLE_SIM_TWRNS_TRACE", trace_type_e::ORACLE_SIM_TWRNS_TRACE) + .value("ORACLE_SYS_TWRNS_TRACE", trace_type_e::ORACLE_SYS_TWRNS_TRACE) + .value("VALPIN_TRACE", trace_type_e::VALPIN_TRACE) + .value("UNKNOWN_TRACE", trace_type_e::UNKNOWN_TRACE) .export_values(); // *************** structs *************** diff --git a/libCacheSim-python/tests/test_process_trace.py b/libCacheSim-python/tests/test_process_trace.py index bc841b1eb..0d08edeab 100644 --- a/libCacheSim-python/tests/test_process_trace.py +++ b/libCacheSim-python/tests/test_process_trace.py @@ -5,6 +5,7 @@ import sys import os +import pytest # Add the parent directory to the Python path for development testing sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) @@ -28,7 +29,7 @@ def create_trace_reader(): ) if not os.path.exists(data_file): return None - return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE.value) + return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE) def test_process_trace_native(): @@ -38,8 +39,7 @@ def test_process_trace_native(): # Open trace reader = create_trace_reader() if reader is None: - print("Warning: Test trace file not found, skipping test") - return # Skip test + pytest.skip("Test trace file not found, skipping test") # Create LRU cache cache = lcs.LRU(1024*1024) # 1MB cache @@ -61,8 +61,7 @@ def test_process_trace_python_hook(): # Open trace reader = create_trace_reader() if reader is None: - print("Warning: Test trace file not found, skipping test") - return # Skip test + pytest.skip("Test trace file not found, skipping test") # Create Python hook LRU cache cache = lcs.PythonHookCachePolicy(1024*1024, "TestLRU") @@ -126,8 +125,7 @@ def test_compare_native_vs_python_hook(): native_cache = lcs.LRU(cache_size) reader1 = create_trace_reader() if reader1 is None: - print("Warning: Test trace file not found, skipping test") - return # Skip test + pytest.skip("Test trace file not found, skipping test") native_miss_ratio = native_cache.process_trace(reader1, max_req=max_requests) @@ -175,8 +173,7 @@ def test_error_handling(): reader = create_trace_reader() if reader is None: - print("Warning: Test trace file not found, skipping error test") - return # Skip test + pytest.skip("Test trace file not found, skipping error test") # Try to process trace without setting hooks try: @@ -199,8 +196,7 @@ def test_lru_implementation_accuracy(): reader2 = create_trace_reader() if not reader1 or not reader2: - print("Warning: Cannot open trace files for LRU accuracy test") - return + pytest.skip("Cannot open trace files for LRU accuracy test") # Test native LRU native_cache = lcs.LRU(cache_size) diff --git a/libCacheSim-python/tests/test_unified_interface.py b/libCacheSim-python/tests/test_unified_interface.py index e399a22a5..fcd973997 100644 --- a/libCacheSim-python/tests/test_unified_interface.py +++ b/libCacheSim-python/tests/test_unified_interface.py @@ -29,7 +29,7 @@ def create_trace_reader(): ) if not os.path.exists(data_file): return None - return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE.value) + return lcs.open_trace(data_file, lcs.TraceType.ORACLE_GENERAL_TRACE) def create_test_lru_hooks(): From f1909d3c60817a2e33d1d43b7a03fa8481d2311d Mon Sep 17 00:00:00 2001 From: haochengxia Date: Fri, 11 Jul 2025 00:07:52 -0400 Subject: [PATCH 08/10] Clean up python code --- libCacheSim-python/libcachesim/eviction.py | 42 +++++++------- libCacheSim-python/tests/conftest.py | 2 +- .../tests/test_unified_interface.py | 58 ++++++++++++++----- 3 files changed, 66 insertions(+), 36 deletions(-) diff --git a/libCacheSim-python/libcachesim/eviction.py b/libCacheSim-python/libcachesim/eviction.py index 1a145be2e..fa1cfb836 100644 --- a/libCacheSim-python/libcachesim/eviction.py +++ b/libCacheSim-python/libcachesim/eviction.py @@ -32,7 +32,7 @@ def __repr__(self) -> str: pass @abstractmethod - def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1): + def process_trace(self, reader, max_req: int = -1, max_sec: int = -1, start_time: int = -1, end_time: int = -1) -> float: """Process a trace with this cache and return miss ratio. This method processes trace data entirely on the C++ side to avoid @@ -63,7 +63,7 @@ def init_cache(self, cache_size: int, **kwargs) -> Cache: def get(self, req: Request) -> bool: return self.cache.get(req) - def process_trace(self, reader, max_req=-1, max_sec=-1, start_time=-1, end_time=-1): + def process_trace(self, reader, max_req: int = -1, max_sec: int = -1, start_time: int = -1, end_time: int = -1) -> float: """Process a trace with this cache and return miss ratio. This method processes trace data entirely on the C++ side to avoid @@ -150,9 +150,9 @@ def init_cache(self, cache_size: int, **kwargs): return Clock_init(cache_size, n_bit_counter, init_freq) def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ - f"n_bit_counter={self.n_bit_counter}, " \ - f"init_freq={self.init_freq})" + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"n_bit_counter={self.n_bit_counter}, " + f"init_freq={self.init_freq})") class TwoQ(EvictionPolicy): @@ -183,9 +183,9 @@ def init_cache(self, cache_size: int, **kwargs): return TwoQ_init(cache_size, ain_size_ratio, aout_size_ratio) def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ - f"ain_size_ratio={self.ain_size_ratio}, " \ - f"aout_size_ratio={self.aout_size_ratio})" + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"ain_size_ratio={self.ain_size_ratio}, " + f"aout_size_ratio={self.aout_size_ratio})") class LRB(EvictionPolicy): @@ -214,8 +214,8 @@ def init_cache(self, cache_size: int, **kwargs) -> Cache: return LRB_init(cache_size, objective) def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ - f"objective={self.objective})" + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"objective={self.objective})") class LRU(EvictionPolicy): @@ -286,10 +286,10 @@ def init_cache(self, cache_size: int, **kwargs): return S3FIFO_init(cache_size, fifo_size_ratio, ghost_size_ratio, move_to_main_threshold) def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ - f"fifo_size_ratio={self.fifo_size_ratio}, " \ - f"ghost_size_ratio={self.ghost_size_ratio}, " \ - f"move_to_main_threshold={self.move_to_main_threshold})" + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"fifo_size_ratio={self.fifo_size_ratio}, " + f"ghost_size_ratio={self.ghost_size_ratio}, " + f"move_to_main_threshold={self.move_to_main_threshold})") class Sieve(EvictionPolicy): @@ -326,8 +326,8 @@ def init_cache(self, cache_size: int, **kwargs): return ThreeLCache_init(cache_size, objective) def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ - f"objective={self.objective})" + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"objective={self.objective})") class TinyLFU(EvictionPolicy): @@ -355,9 +355,9 @@ def init_cache(self, cache_size: int, **kwargs): return TinyLFU_init(cache_size, main_cache, window_size) def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " \ - f"main_cache={self.main_cache}, " \ - f"window_size={self.window_size})" + return (f"{self.__class__.__name__}(cache_size={self.cache.cache_size}, " + f"main_cache={self.main_cache}, " + f"window_size={self.window_size})") @@ -508,5 +508,5 @@ def cache_size(self): return self.cache.cache_size def __repr__(self): - return f"{self.__class__.__name__}(cache_size={self._cache_size}, " \ - f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})" + return (f"{self.__class__.__name__}(cache_size={self._cache_size}, " + f"cache_name='{self.cache_name}', hooks_set={self._hooks_set})") diff --git a/libCacheSim-python/tests/conftest.py b/libCacheSim-python/tests/conftest.py index 2ea1ade15..5335134b1 100644 --- a/libCacheSim-python/tests/conftest.py +++ b/libCacheSim-python/tests/conftest.py @@ -26,7 +26,7 @@ def mock_reader(): try: if hasattr(reader, 'close'): reader.close() - except: + except Exception: # Be specific about exception type pass # Don't explicitly del reader here, let Python handle it gc.collect() diff --git a/libCacheSim-python/tests/test_unified_interface.py b/libCacheSim-python/tests/test_unified_interface.py index fcd973997..48d3751de 100644 --- a/libCacheSim-python/tests/test_unified_interface.py +++ b/libCacheSim-python/tests/test_unified_interface.py @@ -21,7 +21,11 @@ def create_trace_reader(): - """Helper function to create a trace reader.""" + """Helper function to create a trace reader. + + Returns: + Reader or None: A trace reader instance, or None if trace file not found. + """ data_file = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "data", @@ -33,24 +37,33 @@ def create_trace_reader(): def create_test_lru_hooks(): - """Create LRU hooks for testing.""" + """Create LRU hooks for testing. + + Returns: + tuple: A tuple of (init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) + """ def init_hook(cache_size): + """Initialize LRU data structure.""" return OrderedDict() def hit_hook(lru_dict, obj_id, obj_size): + """Handle cache hit by moving to end (most recently used).""" if obj_id in lru_dict: lru_dict.move_to_end(obj_id) def miss_hook(lru_dict, obj_id, obj_size): + """Handle cache miss by adding new object.""" lru_dict[obj_id] = obj_size def eviction_hook(lru_dict, obj_id, obj_size): + """Return the least recently used object ID for eviction.""" if lru_dict: return next(iter(lru_dict)) return obj_id def remove_hook(lru_dict, obj_id): + """Remove object from LRU structure.""" lru_dict.pop(obj_id, None) return init_hook, hit_hook, miss_hook, eviction_hook, remove_hook @@ -86,22 +99,27 @@ def test_unified_process_trace_interface(): results = {} for name, cache in caches.items(): # Create fresh reader for each test - reader = create_trace_reader() - if not reader: - continue + test_reader = create_trace_reader() + if not test_reader: + pytest.skip(f"Cannot create reader for {name} test") # Test process_trace method exists assert hasattr(cache, 'process_trace'), f"{name} missing process_trace method" # Test process_trace functionality - miss_ratio = cache.process_trace(reader, max_req=max_requests) + miss_ratio = cache.process_trace(test_reader, max_req=max_requests) results[name] = miss_ratio print(f"{name:15s}: miss_ratio = {miss_ratio:.4f}") print(f" cache stats: {cache.n_obj} objects, {cache.occupied_byte} bytes") + # Verify miss_ratio is valid + assert 0.0 <= miss_ratio <= 1.0, f"{name} returned invalid miss_ratio: {miss_ratio}" + print(f"\nPASS: All {len(caches)} cache policies support unified process_trace interface!") - # Test passes - no explicit return needed for pytest + + # Verify we got results for all caches + assert len(results) == len(caches), "Not all caches were tested" def test_unified_properties_interface(): @@ -134,7 +152,6 @@ def test_unified_properties_interface(): assert cache.cache_size == cache_size, f"{name} cache_size mismatch" print("PASS: All cache policies support unified properties interface!") - # Test passes - no explicit return needed for pytest def test_get_interface_consistency(): @@ -163,20 +180,33 @@ def test_get_interface_consistency(): print("Testing get() method with test request...") for name, cache in caches.items(): + # Reset cache state for consistent testing + initial_n_req = cache.n_req + initial_n_obj = cache.n_obj + initial_occupied = cache.occupied_byte + # Test get method exists assert hasattr(cache, 'get'), f"{name} missing get method" - # Test first access (should be miss) + # Test first access (should be miss for new object) result = cache.get(test_req) print(f"{name:15s}: first access = {'HIT' if result else 'MISS'}") - # Test properties updated - assert cache.n_req > 0, f"{name} n_req not updated" - assert cache.n_obj > 0, f"{name} n_obj not updated" - assert cache.occupied_byte > 0, f"{name} occupied_byte not updated" + # Test properties updated correctly + assert cache.n_req > initial_n_req, f"{name} n_req not updated" + if not result: # If it was a miss, object should be added + assert cache.n_obj > initial_n_obj, f"{name} n_obj not updated after miss" + assert cache.occupied_byte > initial_occupied, f"{name} occupied_byte not updated after miss" + + # Test second access to same object (should be hit) + second_result = cache.get(test_req) + print(f"{name:15s}: second access = {'HIT' if second_result else 'MISS'}") + + # Second access should be a hit (unless cache is too small) + if cache.cache_size >= test_req.obj_size: + assert second_result, f"{name} second access should be a hit" print("PASS: Get interface consistency test passed!") - # Test passes - no explicit return needed for pytest if __name__ == "__main__": From 22c4953b451df92f43cf92b6b9e14f07f68bf52a Mon Sep 17 00:00:00 2001 From: Percy Date: Sat, 12 Jul 2025 19:37:58 -0400 Subject: [PATCH 09/10] Update README.md --- libCacheSim-python/README.md | 474 ++++++++++++++++++++++------ libCacheSim-python/export/README.md | 94 ++---- 2 files changed, 409 insertions(+), 159 deletions(-) diff --git a/libCacheSim-python/README.md b/libCacheSim-python/README.md index 5dfb549a2..db46af6a1 100644 --- a/libCacheSim-python/README.md +++ b/libCacheSim-python/README.md @@ -1,61 +1,118 @@ # libCacheSim Python Binding -Python bindings for libCacheSim, a high-performance cache simulator. +Python bindings for libCacheSim, a high-performance cache simulator and analysis library. ## Installation +### Quick Install (Recommended) ```bash -cd .. +# From the libCacheSim root directory bash scripts/install_python.sh ``` -Test +### Manual Install +```bash +# Build the main libCacheSim library first +cmake -G Ninja -B build +ninja -C build +# Install Python binding +cd libCacheSim-python +pip install -e . -v ``` + +### Testing +```bash +# Run all tests python -m pytest . + +# Test import +python -c "import libcachesim; print('Success!')" ``` -## Usage +## Quick Start -### Basic Cache Usage +### Basic Usage ```python -import libcachesim as cachesim +import libcachesim as lcs -# Create a cache with FIFO eviction policy -cache = cachesim.FIFO(cache_size=1024*1024) +# Create a cache +cache = lcs.LRU(cache_size=1024*1024) # 1MB cache -# Create a request -req = cachesim.Request() +# Process requests +req = lcs.Request() req.obj_id = 1 req.obj_size = 100 -# Check if object is in cache -hit = cache.get(req) -print(f"Cache hit: {hit}") +hit = cache.get(req) # False (first access) +hit = cache.get(req) # True (second access) + +# Check statistics +print(f"Hit rate: {(cache.n_req - cache.n_miss)/cache.n_req:.2%}") +``` + +### Trace Processing + +```python +import libcachesim as lcs + +# Open trace and process efficiently +reader = lcs.open_trace("trace.bin", lcs.TraceType.ORACLE_GENERAL_TRACE.value) +cache = lcs.S3FIFO(cache_size=1024*1024) + +# Process entire trace efficiently (C++ backend) +miss_ratio = cache.process_trace(reader) +print(f"Miss ratio: {miss_ratio:.4f}") + +# Process with limits and time ranges +miss_ratio = cache.process_trace( + reader, + max_req=10000, # Process max 10K requests + max_sec=3600, # Process max 1 hour + start_time=1000, # Start from timestamp 1000 + end_time=5000 # End at timestamp 5000 +) ``` -### Custom Cache Policies +## Custom Cache Policies + +Implement custom cache replacement algorithms using pure Python functions - no C/C++ compilation required. + +### Python Hook Cache Overview + +The `PythonHookCachePolicy` allows you to define custom caching behavior through Python callback functions. This is perfect for: +- Prototyping new cache algorithms +- Educational purposes and learning +- Research and experimentation +- Custom business logic implementation + +### Hook Functions -The Python binding supports custom cache replacement algorithms using Python function hooks - no C/C++ compilation required: +You need to implement these callback functions: -#### Python Hook Cache +- **`init_hook(cache_size: int) -> Any`**: Initialize your data structure +- **`hit_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache hits +- **`miss_hook(data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache misses +- **`eviction_hook(data: Any, obj_id: int, obj_size: int) -> int`**: Return object ID to evict +- **`remove_hook(data: Any, obj_id: int) -> None`**: Clean up when object removed +- **`free_hook(data: Any) -> None`**: [Optional] Final cleanup -Define custom cache policies using pure Python functions: +### Example: Custom LRU Implementation ```python -import libcachesim as cachesim +import libcachesim as lcs from collections import OrderedDict # Create a Python hook-based cache -cache = cachesim.PythonHookCachePolicy(cache_size=1024*1024, cache_name="MyLRU") +cache = lcs.PythonHookCachePolicy(cache_size=1024*1024, cache_name="MyLRU") # Define LRU policy hooks def init_hook(cache_size): return OrderedDict() # Track access order def hit_hook(lru_dict, obj_id, obj_size): - lru_dict.move_to_end(obj_id) # Move to end (most recent) + lru_dict.move_to_end(obj_id) # Move to most recent def miss_hook(lru_dict, obj_id, obj_size): lru_dict[obj_id] = True # Add to end @@ -70,46 +127,20 @@ def remove_hook(lru_dict, obj_id): cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) # Use it like any other cache -req = cachesim.Request() +req = lcs.Request() req.obj_id = 1 req.obj_size = 100 hit = cache.get(req) ``` -### Available Cache Algorithms - -The following built-in cache algorithms are available: - -- **FIFO**: First-In-First-Out -- **LRU**: Least Recently Used -- **ARC**: Adaptive Replacement Cache -- **Clock**: Clock algorithm -- **S3FIFO**: Simple, Fast, Fair FIFO -- **Sieve**: Sieve cache algorithm -- **TinyLFU**: TinyLFU with window -- **TwoQ**: Two-Queue algorithm -- **LRB**: Learning-based cache (if enabled) -- **ThreeLCache**: Three-level cache (if enabled) - -Each algorithm can be used similarly: - -```python -# Examples of different cache algorithms -lru_cache = cachesim.LRU(cache_size=1024*1024) -arc_cache = cachesim.ARC(cache_size=1024*1024) -s3fifo_cache = cachesim.S3FIFO(cache_size=1024*1024) -``` - -### Custom Cache Implementation Example - -Here's a complete example implementing a custom FIFO cache using Python hooks: +### Example: Custom FIFO Implementation ```python -import libcachesim as cachesim +import libcachesim as lcs from collections import deque # Create a custom FIFO cache -cache = cachesim.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO") +cache = lcs.PythonHookCachePolicy(cache_size=1024, cache_name="CustomFIFO") def init_hook(cache_size): return deque() # Use deque for FIFO order @@ -127,44 +158,131 @@ def remove_hook(fifo_queue, obj_id): if fifo_queue and fifo_queue[0] == obj_id: fifo_queue.popleft() -# Set the hooks +# Set the hooks and test cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook) -# Test the cache -req = cachesim.Request() +req = lcs.Request() req.obj_id = 1 req.obj_size = 100 hit = cache.get(req) print(f"Cache hit: {hit}") # Should be False (miss) ``` -### Testing and Validation +## Available Algorithms + +### Built-in Cache Algorithms + +#### Basic Algorithms +- **FIFO**: First-In-First-Out +- **LRU**: Least Recently Used +- **LFU**: Least Frequently Used +- **Clock**: Clock/Second-chance algorithm + +#### Advanced Algorithms +- **S3FIFO**: Simple, Fast, Fair FIFO (recommended for most workloads) +- **Sieve**: High-performance eviction algorithm +- **ARC**: Adaptive Replacement Cache +- **TwoQ**: Two-Queue algorithm +- **TinyLFU**: TinyLFU with window +- **SLRU**: Segmented LRU + +#### Research/ML Algorithms +- **LRB**: Learning-based cache (if enabled) +- **GLCache**: Machine learning-based cache +- **ThreeLCache**: Three-level cache hierarchy (if enabled) + +```python +import libcachesim as lcs + +# All algorithms use the same unified interface +cache_size = 1024 * 1024 # 1MB + +lru_cache = lcs.LRU(cache_size) +s3fifo_cache = lcs.S3FIFO(cache_size) # Recommended +sieve_cache = lcs.Sieve(cache_size) +arc_cache = lcs.ARC(cache_size) + +# All caches work identically +req = lcs.Request() +req.obj_id = 1 +req.obj_size = 100 +hit = lru_cache.get(req) +``` + +## Examples and Testing + +### Algorithm Comparison +```python +import libcachesim as lcs + +def compare_algorithms(trace_path): + reader = lcs.open_trace(trace_path, lcs.TraceType.VSCSI_TRACE.value) + algorithms = ['LRU', 'S3FIFO', 'Sieve', 'ARC'] + + print("Algorithm\tMiss Ratio") + print("-" * 25) + for algo_name in algorithms: + cache = getattr(lcs, algo_name)(cache_size=1024*1024) + miss_ratio = cache.process_trace(reader) + print(f"{algo_name}\t\t{miss_ratio:.4f}") -To ensure your custom cache implementation is correct, you can compare it against the built-in implementations: +compare_algorithms("workload.vscsi") +``` +### Performance Benchmarking ```python -import libcachesim as cachesim +import time + +def benchmark_cache(cache, num_requests=100000): + """Benchmark cache performance""" + start_time = time.time() -# Test your custom cache against the built-in LRU + for i in range(num_requests): + req = lcs.Request() + req.obj_id = i % 1000 # Working set of 1000 objects + req.obj_size = 100 + cache.get(req) + + end_time = time.time() + throughput = num_requests / (end_time - start_time) + + print(f"Processed {num_requests} requests in {end_time - start_time:.2f}s") + print(f"Throughput: {throughput:.0f} requests/sec") + print(f"Miss ratio: {cache.n_miss / cache.n_req:.4f}") + +# Compare performance +lru_cache = lcs.LRU(cache_size=1024*1024) +s3fifo_cache = lcs.S3FIFO(cache_size=1024*1024) + +print("LRU Performance:") +benchmark_cache(lru_cache) + +print("\nS3-FIFO Performance:") +benchmark_cache(s3fifo_cache) +``` + +### Validate Custom Implementation +```python def test_custom_vs_builtin(): + """Test custom cache against built-in implementation""" cache_size = 1024 # Your custom LRU implementation - custom_cache = cachesim.PythonHookCachePolicy(cache_size, "CustomLRU") + custom_cache = lcs.PythonHookCachePolicy(cache_size, "CustomLRU") # ... set up your LRU hooks here ... # Built-in LRU for comparison - builtin_cache = cachesim.LRU(cache_size) + builtin_cache = lcs.LRU(cache_size) # Test with same request sequence test_requests = [(1, 100), (2, 100), (3, 100), (1, 100)] for obj_id, obj_size in test_requests: - req1 = cachesim.Request() + req1 = lcs.Request() req1.obj_id = obj_id req1.obj_size = obj_size - req2 = cachesim.Request() + req2 = lcs.Request() req2.obj_id = obj_id req2.obj_size = obj_size @@ -175,60 +293,230 @@ def test_custom_vs_builtin(): print(f"obj_id {obj_id}: {'HIT' if custom_result else 'MISS'} ✓") ``` -### Hook Function Reference +## Advanced Usage + +### Multi-Format Trace Processing + +```python +import libcachesim as lcs + +# Supported trace types +trace_types = { + "oracle": lcs.TraceType.ORACLE_GENERAL_TRACE.value, + "csv": lcs.TraceType.CSV_TRACE.value, + "vscsi": lcs.TraceType.VSCSI_TRACE.value, + "txt": lcs.TraceType.TXT_TRACE.value +} + +# Open different trace formats +oracle_reader = lcs.open_trace("trace.bin", trace_types["oracle"]) +csv_reader = lcs.open_trace("trace.csv", trace_types["csv"], + "time-col=1,obj-id-col=2,obj-size-col=3,delimiter=,") + +# Process traces with different caches +caches = [ + lcs.LRU(cache_size=1024*1024), + lcs.S3FIFO(cache_size=1024*1024), + lcs.Sieve(cache_size=1024*1024) +] + +for i, cache in enumerate(caches): + miss_ratio = cache.process_trace(oracle_reader) + print(f"Cache {i} miss ratio: {miss_ratio:.4f}") +``` + +### Cache Hierarchy Simulation + +```python +def simulate_cache_hierarchy(): + """Simulate a two-level cache hierarchy""" + + # L1 cache (small, fast) + l1_cache = lcs.LRU(cache_size=64*1024) # 64KB -When implementing `PythonHookCachePolicy`, you need to provide these hook functions: + # L2 cache (larger, slower) + l2_cache = lcs.LRU(cache_size=1024*1024) # 1MB -- **`init_hook(cache_size: int) -> Any`**: Initialize and return plugin data structure -- **`hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache hits -- **`miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None`**: Handle cache misses -- **`eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int`**: Return object ID to evict -- **`remove_hook(plugin_data: Any, obj_id: int) -> None`**: Clean up when object is removed -- **`free_hook(plugin_data: Any) -> None`**: [Optional] Clean up plugin resources + # Simulate requests + total_requests = 0 + l1_hits = 0 + l2_hits = 0 -The `plugin_data` is whatever object you return from `init_hook()` - it can be any Python object like a list, dict, class instance, etc. + for obj_id in range(1000): + req = lcs.Request() + req.obj_id = obj_id % 100 # Working set of 100 objects + req.obj_size = 1024 -### Unified Interface + total_requests += 1 -All cache policies (both built-in and Python hook-based) share the same unified interface: + # Check L1 first + if l1_cache.get(req): + l1_hits += 1 + # Check L2 on L1 miss + elif l2_cache.get(req): + l2_hits += 1 + # Promote to L1 + l1_cache.get(req) + + print(f"L1 hit rate: {l1_hits/total_requests:.2%}") + print(f"L2 hit rate: {l2_hits/total_requests:.2%}") + print(f"Overall hit rate: {(l1_hits+l2_hits)/total_requests:.2%}") + +simulate_cache_hierarchy() +``` + +### Cache Statistics Monitoring + +```python +def analyze_cache_behavior(): + """Detailed cache statistics analysis""" + cache = lcs.S3FIFO(cache_size=1024*1024) + + # Process some requests + for i in range(1000): + req = lcs.Request() + req.obj_id = i % 100 + req.obj_size = 1024 + cache.get(req) + + # Access detailed statistics + print("=== Cache Statistics ===") + print(f"Cache size: {cache.cache_size:,} bytes") + print(f"Occupied space: {cache.occupied_byte:,} bytes") + print(f"Utilization: {cache.occupied_byte/cache.cache_size:.2%}") + print(f"Objects stored: {cache.n_obj:,}") + print(f"Total requests: {cache.n_req:,}") + print(f"Cache hits: {cache.n_req - cache.n_miss:,}") + print(f"Cache misses: {cache.n_miss:,}") + print(f"Hit rate: {(cache.n_req - cache.n_miss)/cache.n_req:.2%}") + print(f"Miss rate: {cache.n_miss/cache.n_req:.2%}") + +analyze_cache_behavior() +``` + +## API Reference + +### Unified Cache Interface + +All cache policies (built-in and Python hook-based) share the same interface: ```python -import libcachesim as cachesim +import libcachesim as lcs # All cache policies work the same way -cache = cachesim.LRU(cache_size=1024*1024) +cache = lcs.LRU(cache_size=1024*1024) # or -cache = cachesim.PythonHookCachePolicy(cache_size=1024*1024) -# cache.set_hooks(...) for Python hook cache +cache = lcs.PythonHookCachePolicy(cache_size=1024*1024, cache_name="Custom") -# Unified interface for all caches: -req = cachesim.Request() -req.obj_id = 1 -req.obj_size = 100 -hit = cache.get(req) # Process single request +# Unified methods for all caches: +req = lcs.Request() +req.obj_id = 123 # Object identifier (required) +req.obj_size = 1024 # Object size in bytes (required) +req.timestamp = 1000 # Request timestamp (optional) +req.op = 1 # Operation type (optional, default=1) -reader = cachesim.open_trace("trace.bin", cachesim.TraceType.ORACLE_GENERAL_TRACE.value) -miss_ratio = cache.process_trace(reader) # Process entire trace efficiently +hit = cache.get(req) # Process single request - returns True if hit, False if miss + +# Batch processing (faster for large traces) +reader = lcs.open_trace("trace.bin", lcs.TraceType.ORACLE_GENERAL_TRACE.value) +miss_ratio = cache.process_trace(reader, max_req=10000) # Unified properties for all caches: print(f"Cache size: {cache.cache_size}") print(f"Objects: {cache.n_obj}") print(f"Occupied bytes: {cache.occupied_byte}") -print(f"Requests processed: {cache.n_req}") +print(f"Total requests: {cache.n_req}") +print(f"Cache misses: {cache.n_miss}") +print(f"Hit rate: {(cache.n_req - cache.n_miss) / cache.n_req:.2%}") ``` -### Efficient Trace Processing - -The `process_trace` method processes trace data entirely on the C++ side to minimize overhead: +### Trace Reader ```python -# Process entire trace with optional limits +# Open trace with specific format +reader = lcs.open_trace( + trace_path="trace.csv", + trace_type=lcs.TraceType.CSV_TRACE.value, + trace_type_params="time-col=1,obj-id-col=2,obj-size-col=3,delimiter=," +) + +# Process trace with options miss_ratio = cache.process_trace( reader, - max_req=10000, # Process max 10K requests - max_sec=3600, # Process max 1 hour of trace - start_time=1000, # Start from timestamp 1000 - end_time=5000 # End at timestamp 5000 + max_req=10000, # Process max requests + max_sec=3600, # Process max seconds of trace + start_time=1000, # Start from timestamp + end_time=5000 # End at timestamp ) -print(f"Miss ratio: {miss_ratio:.4f}") ``` + +### Supported Trace Formats +```python +# Oracle format (binary, fastest) +reader = lcs.open_trace("trace.bin", lcs.TraceType.ORACLE_GENERAL_TRACE.value) + +# CSV format with custom parameters +reader = lcs.open_trace("trace.csv", lcs.TraceType.CSV_TRACE.value, + "time-col=1,obj-id-col=2,obj-size-col=3,delimiter=,") + +# VSCSI format +reader = lcs.open_trace("trace.vscsi", lcs.TraceType.VSCSI_TRACE.value) + +# Plain text format +reader = lcs.open_trace("trace.txt", lcs.TraceType.TXT_TRACE.value) +``` + +### Python Hook Cache Reference + +When implementing `PythonHookCachePolicy`, provide these hook functions: + +```python +def init_hook(cache_size: int) -> Any: + """Initialize and return plugin data structure""" + return {} # Can be any Python object + +def hit_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None: + """Handle cache hits - update your data structure""" + pass + +def miss_hook(plugin_data: Any, obj_id: int, obj_size: int) -> None: + """Handle cache misses - add object to your data structure""" + pass + +def eviction_hook(plugin_data: Any, obj_id: int, obj_size: int) -> int: + """Return object ID to evict when cache is full""" + return victim_obj_id + +def remove_hook(plugin_data: Any, obj_id: int) -> None: + """Clean up when object is removed from cache""" + pass + +def free_hook(plugin_data: Any) -> None: + """[Optional] Final cleanup when cache is destroyed""" + pass + +# Set hooks +cache.set_hooks(init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook) +``` + +## Troubleshooting + +### Common Issues + +**Import Error**: Make sure libCacheSim C++ library is built first: +```bash +cmake -G Ninja -B build && ninja -C build +``` + +**Performance Issues**: Use `process_trace()` for large workloads instead of individual `get()` calls for better performance. + +**Memory Usage**: Monitor cache statistics (`cache.occupied_byte`) and ensure proper cache size limits for your system. + +**Custom Cache Issues**: Validate your custom implementation against built-in algorithms using the test functions above. + +### Getting Help + +- Check the [main documentation](/doc/) for detailed guides +- Run tests: `python -m pytest libCacheSim-python/` +- Open issues on [GitHub](https://github.com/1a1a11a/libCacheSim/issues) +- Review [examples](/example) in the main repository diff --git a/libCacheSim-python/export/README.md b/libCacheSim-python/export/README.md index b3406c3d0..976b1daa8 100644 --- a/libCacheSim-python/export/README.md +++ b/libCacheSim-python/export/README.md @@ -1,85 +1,47 @@ -# libCacheSim Python Binding Export +# Python Binding Export System -This directory contains the export mechanism for sharing variables between the main libCacheSim project and the Python binding. +Build system bridge for sharing CMake variables between the main libCacheSim project and Python binding. -## Overview +## Purpose -The `export/CMakeLists.txt` file serves as a bridge between the main libCacheSim project and the Python binding, ensuring that all necessary variables (source files, include directories, compiler flags, etc.) are properly exported and can be imported by the Python binding's CMakeLists.txt. +The `export/CMakeLists.txt` exports all necessary build variables (source files, include directories, compiler flags, etc.) from the main project to the Python binding, enabling consistent builds without duplicating configuration. ## How It Works -### 1. Variable Export Process +1. **Export**: Main project writes variables to `export_vars.cmake` +2. **Import**: Python binding includes this file during CMake configuration +3. **Build**: Python binding uses shared variables for consistent compilation -The export mechanism works in the following steps: +## Key Exported Variables -1. **Path Conversion**: Converts relative source file paths to absolute paths using the `convert_to_absolute_paths` function -2. **Variable Collection**: Gathers all necessary variables from the main project -3. **File Generation**: Writes all variables to `export_vars.cmake` in the build directory -4. **Import**: The Python binding's CMakeLists.txt includes this file to access all variables +### Source Files +- Cache algorithms, data structures, trace readers +- Profilers, utilities, analyzers -### 2. Exported Variables - -The following categories of variables are exported: - -#### Source Files -- `ABS_cache_sources` - Cache-related source files -- `ABS_dataStructure_sources` - Data structure source files -- `ABS_traceReader_sources` - Trace reader source files -- `ABS_profiler_sources` - Profiler source files -- `ABS_utils_sources` - Utility source files -- `ABS_traceAnalyzer_sources` - Trace analyzer source files -- `ABS_mrcProfiler_sources` - MRC profiler source files - -#### Project Metadata -- `LIBCACHESIM_VERSION` - Version information - -#### Include Directories -- `libCacheSim_include_dir` - Main include directory -- `libCacheSim_binary_include_dir` - Binary include directory -- `GLib_INCLUDE_DIRS` - GLib include directories -- `XGBOOST_INCLUDE_DIR` - XGBoost include directory -- `LIGHTGBM_PATH` - LightGBM include directory -- `ZSTD_INCLUDE_DIR` - ZSTD include directory - -#### Dependencies -- `dependency_libs` - Dependency libraries - -#### Compiler Flags -- `LIBCACHESIM_C_FLAGS` - C compiler flags -- `LIBCACHESIM_CXX_FLAGS` - C++ compiler flags - -#### Build Options -- `USE_HUGEPAGE` - Hugepage usage -- `ENABLE_TESTS` - Test enablement -- `ENABLE_GLCACHE` - GLCache enablement -- `SUPPORT_TTL` - TTL support -- `OPT_SUPPORT_ZSTD_TRACE` - ZSTD trace support -- `ENABLE_LRB` - LRB enablement -- `ENABLE_3L_CACHE` - 3L Cache enablement -- `LOG_LEVEL_LOWER` - Log level +### Build Configuration +- Include directories (main, GLib, ZSTD, XGBoost, LightGBM) +- Compiler flags (C/C++) +- Dependency libraries +- Build options (hugepage, tests, optional features) ## Usage -### In Main Project - -The main project's CMakeLists.txt includes this export directory: - +**Main Project** (`CMakeLists.txt`): ```cmake add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libCacheSim-python/export) ``` -### In Python Binding +**Python Binding** (`libCacheSim-python/CMakeLists.txt`): +```cmake +set(EXPORT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../build/export_vars.cmake") +include("${EXPORT_FILE}") +``` -The Python binding's CMakeLists.txt imports the exported variables: +## For Developers -```cmake -set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") -set(EXPORT_FILE "${PARENT_BUILD_DIR}/export_vars.cmake") +This system ensures the Python binding automatically picks up changes to: +- New source files added to the main project +- Updated compiler flags or dependencies +- Modified build options -if(EXISTS "${EXPORT_FILE}") - include("${EXPORT_FILE}") - message(STATUS "Loaded variables from export_vars.cmake") -else() - message(FATAL_ERROR "export_vars.cmake not found") -endif() -``` \ No newline at end of file +No manual synchronization needed between main project and Python binding builds. From 0b0cef4e853c4c1c7d483c7085007847be29279f Mon Sep 17 00:00:00 2001 From: haochengxia Date: Sun, 13 Jul 2025 05:42:24 +0000 Subject: [PATCH 10/10] Prepare for PyPI --- .gitignore | 2 ++ libCacheSim-python/CMakeLists.txt | 15 +++++++---- libCacheSim-python/MAINFEST.in | 0 libCacheSim-python/export/CMakeLists.txt | 5 ++++ libCacheSim-python/pyproject.toml | 25 +++++++++++++------ .../dataStructure/minimalIncrementCBF.c | 2 +- libCacheSim/include/libCacheSim/mem.h | 2 +- scripts/build_pypi.sh | 1 + 8 files changed, 37 insertions(+), 15 deletions(-) create mode 100644 libCacheSim-python/MAINFEST.in create mode 100644 scripts/build_pypi.sh diff --git a/.gitignore b/.gitignore index 9913f147a..620e8536b 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ sftp-config.json # Clangd cache *.cache/ .lint-logs/ +# Python wheels +*.whl diff --git a/libCacheSim-python/CMakeLists.txt b/libCacheSim-python/CMakeLists.txt index a40a08cae..f3e1c5d6b 100644 --- a/libCacheSim-python/CMakeLists.txt +++ b/libCacheSim-python/CMakeLists.txt @@ -1,7 +1,12 @@ cmake_minimum_required(VERSION 3.15...3.27) # Include exported variables from cache -set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") +if(DEFINED LIBCB_BUILD_DIR) + set(PARENT_BUILD_DIR "${LIBCB_BUILD_DIR}") + message(STATUS "Using provided LIBCB_BUILD_DIR: ${LIBCB_BUILD_DIR}") +else() + set(PARENT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") +endif() set(EXPORT_FILE "${PARENT_BUILD_DIR}/export_vars.cmake") if(EXISTS "${EXPORT_FILE}") @@ -48,10 +53,10 @@ include_directories(${GLib_CONFIG_INCLUDE_DIR}) include_directories(${XGBOOST_INCLUDE_DIR}) include_directories(${LIGHTGBM_PATH}) include_directories(${ZSTD_INCLUDE_DIR}) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../libCacheSim/bin) +include_directories(${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin) # Find the main libCacheSim library -set(MAIN_PROJECT_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../build") +set(MAIN_PROJECT_BUILD_DIR "${PARENT_BUILD_DIR}") set(MAIN_PROJECT_LIB_PATH "${MAIN_PROJECT_BUILD_DIR}/liblibCacheSim.a") if(EXISTS "${MAIN_PROJECT_LIB_PATH}") @@ -61,7 +66,7 @@ if(EXISTS "${MAIN_PROJECT_LIB_PATH}") add_library(libCacheSim_main STATIC IMPORTED) set_target_properties(libCacheSim_main PROPERTIES IMPORTED_LOCATION "${MAIN_PROJECT_LIB_PATH}" - INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/../libCacheSim/include;${CMAKE_CURRENT_SOURCE_DIR}/../libCacheSim/utils/include;${CMAKE_CURRENT_SOURCE_DIR}/../libCacheSim" + INTERFACE_INCLUDE_DIRECTORIES "${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/utils/include;${MAIN_PROJECT_SOURCE_DIR}/libCacheSim" ) # Link dependencies that the main library needs @@ -74,7 +79,7 @@ endif() python_add_library(_libcachesim MODULE src/pylibcachesim.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../libCacheSim/bin/cli_reader_utils.c + ${MAIN_PROJECT_SOURCE_DIR}/libCacheSim/bin/cli_reader_utils.c WITH_SOABI ) diff --git a/libCacheSim-python/MAINFEST.in b/libCacheSim-python/MAINFEST.in new file mode 100644 index 000000000..e69de29bb diff --git a/libCacheSim-python/export/CMakeLists.txt b/libCacheSim-python/export/CMakeLists.txt index 3a9928f80..06a3566ac 100644 --- a/libCacheSim-python/export/CMakeLists.txt +++ b/libCacheSim-python/export/CMakeLists.txt @@ -3,6 +3,11 @@ set(EXPORT_FILE "${CMAKE_BINARY_DIR}/export_vars.cmake") file(WRITE "${EXPORT_FILE}" "") +get_filename_component(MAIN_PROJECT_SOURCE_DIR ${CMAKE_SOURCE_DIR} ABSOLUTE) +file(WRITE ${CMAKE_BINARY_DIR}/export_vars.cmake "set(MAIN_PROJECT_SOURCE_DIR \"${MAIN_PROJECT_SOURCE_DIR}\")\n") +file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(dependency_libs \"${dependency_libs}\")\n") +file(APPEND ${CMAKE_BINARY_DIR}/export_vars.cmake "set(LIBCACHESIM_VERSION \"${LIBCACHESIM_VERSION}\")\n") + # ============================================================================== # Export project metadata # ============================================================================== diff --git a/libCacheSim-python/pyproject.toml b/libCacheSim-python/pyproject.toml index 10343bc52..3bf6c66e8 100644 --- a/libCacheSim-python/pyproject.toml +++ b/libCacheSim-python/pyproject.toml @@ -16,8 +16,6 @@ test = ["pytest"] [tool.scikit-build] wheel.expand-macos-universal-tags = true -minimum-version = "build-system.requires" -cmake.args = ["-G", "Ninja"] [tool.pytest.ini_options] minversion = "8.0" @@ -32,12 +30,23 @@ testpaths = ["tests"] [tool.cibuildwheel] -build-frontend = "build[uv]" -test-command = "pytest {project}/tests" -test-extras = ["test"] +build-frontend = "build" +manylinux-x86_64-image = "quay.io/pypa/manylinux_2_34_x86_64" +# Only build for x86_64 architectures +build = "*-manylinux_x86_64" +# Install build dependencies using dnf. This runs once per container. +before-all = "dnf install -y ninja-build cmake libzstd-devel glib2-devel" +# We add 'rm -rf build' to ensure a clean build inside the container. +# The C++ core is built first, then the wheel build will use it. +# Use absolute paths to avoid issues with the working directory. +before-build = "rm -rf /project/build && cmake -S /project -B /project/build -G Ninja && cmake --build /project/build" +# Set the environment variable for the wheel build step. +environment = { LCS_BUILD_DIR = "/project/build" } +# test-requires = "pytest" +# test-command = "pytest {project}/tests" -[tool.cibuildwheel.pyodide] -build-frontend = {name = "build", args = ["--exports", "whole_archive"]} +# [tool.cibuildwheel.pyodide] +# build-frontend = {name = "build", args = ["--exports", "whole_archive"]} [tool.ruff.lint] extend-select = [ @@ -70,4 +79,4 @@ ignore = [ isort.required-imports = ["from __future__ import annotations"] [tool.ruff.lint.per-file-ignores] -"tests/**" = ["T20"] \ No newline at end of file +"tests/**" = ["T20"] diff --git a/libCacheSim/dataStructure/minimalIncrementCBF.c b/libCacheSim/dataStructure/minimalIncrementCBF.c index 82967eedb..b8667eb88 100644 --- a/libCacheSim/dataStructure/minimalIncrementCBF.c +++ b/libCacheSim/dataStructure/minimalIncrementCBF.c @@ -53,7 +53,7 @@ int minimalIncrementCBF_init(struct minimalIncrementCBF *CBF, int entries, CBF->counter_num = CBF->hashes * 2; } - CBF->bf = (unsigned int *)calloc(sizeof(unsigned int), CBF->counter_num); + CBF->bf = (unsigned int *)calloc(CBF->counter_num, sizeof(unsigned int)); // TODO: check whether unsigned int is enough for the size of each counter if (CBF->bf == NULL) { diff --git a/libCacheSim/include/libCacheSim/mem.h b/libCacheSim/include/libCacheSim/mem.h index 2f587d8b6..8068f9179 100644 --- a/libCacheSim/include/libCacheSim/mem.h +++ b/libCacheSim/include/libCacheSim/mem.h @@ -22,7 +22,7 @@ #elif HEAP_ALLOCATOR == HEAP_ALLOCATOR_MALLOC #include #define my_malloc(type) (type *)malloc(sizeof(type)) -#define my_malloc_n(type, n) (type *)calloc(sizeof(type), n) +#define my_malloc_n(type, n) (type *)calloc(n, sizeof(type)) #define my_free(size, addr) free(addr) #elif HEAP_ALLOCATOR == HEAP_ALLOCATOR_ALIGNED_MALLOC diff --git a/scripts/build_pypi.sh b/scripts/build_pypi.sh new file mode 100644 index 000000000..5f4cda97c --- /dev/null +++ b/scripts/build_pypi.sh @@ -0,0 +1 @@ +python3 -m cibuildwheel --platform linux libCacheSim-python