tsinghua-ideal · mingsol-smy · Feb 15, 2026 · Feb 15, 2026 · Feb 15, 2026 · Feb 22, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,39 +1,12 @@
-data/crankseg_2.mtx
-data/pwtk.mtx
-data/ship_001.mtx
-data/web-Google.mtx
-data/af_shell9.mtx
-data/Fault_639.mtx
-data/kkt_power.mtx
-data/kron_g500-logn18.mtx
-data/mouse_gene.mtx
-data/nd24k.mtx
-data/TSOPF_FS_b300_c3.mtx
-data/mycielskian16.mtx
-data/12month1.mtx
-data/amazon0601.mtx
-data/bibd_22_8.mtx
-data/cage14.mtx
-data/cant.mtx
-data/cari.mtx
-data/cit-Patents.mtx
-data/CoupCons3D.mtx
-data/dielFilterV2real.mtx
-data/fem_hifreq_circuit.mtx
-data/Hardesty2.mtx
-data/hugetrace-00010.mtx
-data/human_gene2.mtx
-data/IMDB.mtx
-data/ldoor.mtx
-data/mycielskian14.mtx
-data/nemsemm1.mtx
-output3/*
-output/*
+data/**
+output3/**
+output/**
 *.[Oo]
 *.out
 *.exe
 *.bin
 nohup.out
 hyte
 scache
-.vscode/
+.vscode/**
+scripts/__pycache__/**
diff --git a/Makefile b/Makefile
@@ -0,0 +1,77 @@
+###################################################
+# Constants
+###################################################
+TARGET := scache
+
+SRC_DIRS := ./src
+BUILD_DIR := ./build
+DEBUG_DIR := $(BUILD_DIR)/debug
+OUTPUT_DIR := ./output
+
+# find the source files, extract the filenames, 
+# stick them in the build dir as .o
+SRC := $(shell find $(SRC_DIRS) -name '*.cpp')
+FILENAMES := $(basename $(notdir $(SRC)))
+OBJS := $(FILENAMES:%=$(BUILD_DIR)/%.o)
+DEBUG_OBJS := $(FILENAMES:%=$(DEBUG_DIR)/%.o)
+
+# Flags for g++
+CPPFLAGS := -O3 -Wall -Wextra -Werror
+DEBUG_CPPFLAGS := $(CPPFLAGS) -g
+
+# Phony targets (do not represent a file)
+.PHONY: remake debug clean 
+
+###################################################
+# Targets
+###################################################
+# Use "order only" prereqs to make sure dirs are created
+# Use the phony to make sure that we always copy the correct
+# exe over
+all: $(BUILD_DIR)/$(TARGET) | $(BUILD_DIR) $(OUTPUT_DIR)
+	cp $(BUILD_DIR)/$(TARGET) ./$(TARGET)
+
+# Redundant target for ease of use
+$(TARGET): all
+
+# Directory targets, silent
+$(BUILD_DIR):
+	@mkdir -p $(BUILD_DIR)
+$(DEBUG_DIR):
+	@mkdir -p $(DEBUG_DIR)
+$(OUTPUT_DIR):
+	@mkdir -p $(OUTPUT_DIR)
+
+# Main and debug targets
+$(BUILD_DIR)/$(TARGET): $(OBJS) | $(BUILD_DIR)
+	g++ $(CPPFLAGS) $(OBJS) -o $(BUILD_DIR)/$(TARGET)
+
+# Construct a unique compilation step for each src->object
+# to minimize re-building. Depends on the src file
+# and order-only on the directory.
+define OBJ_COMP_TEMPLATE =
+$(1)/$(basename $(notdir $(2))).o: $(2) | $(1)
+	g++ -c $(3) $(2) -o $$@
+endef
+
+# Instantiate & evaluate each of the object build steps for f in $(SRC)
+$(foreach f,$(SRC),$(eval $(call OBJ_COMP_TEMPLATE,$(BUILD_DIR),$(f),$(CPPFLAGS))))
+
+#-------------------------------------------------- 
+# Debug targets
+#-------------------------------------------------- 
+debug: $(DEBUG_DIR)/$(TARGET) | $(DEBUG_DIR) $(OUTPUT_DIR)
+	cp $(DEBUG_DIR)/$(TARGET) ./$(TARGET)
+
+$(DEBUG_DIR)/$(TARGET): $(DEBUG_OBJS) | $(DEBUG_DIR)
+	g++ $(DEBUG_CPPFLAGS) $(DEBUG_OBJS) -o $(DEBUG_DIR)/$(TARGET)
+
+$(foreach f,$(SRC),$(eval $(call OBJ_COMP_TEMPLATE,$(DEBUG_DIR),$(f),$(DEBUG_CPPFLAGS))))
+
+#-------------------------------------------------- 
+# Helpful phonies
+#-------------------------------------------------- 
+remake: clean all
+
+clean:
+	-rm -r $(DEBUG_DIR) $(BUILD_DIR) $(OUTPUT_DIR) $(TARGET) 
diff --git a/README.md b/README.md
@@ -1,16 +1,19 @@
-# SeaCache-sim
+# UM EECS570 WN26 Project Repository
+This is the repository for our EECS570 project. Our report will be added eventually. This project is forked from SeaCache-sim, and the README below largely follows from theirs.
+
+## SeaCache-sim
 This is the source code repository of the MICRO'25 paper *SeaCache: Efficient and Adaptive Caching for Sparse Accelerators*.
 
-## Build
+### Build
 
 ```bash
-$ g++ -O3 -march=native  src/config.cpp src/data.cpp src/estimation.cpp src/parameters.cpp src/util.cpp src/statistics.cpp src/cache.cpp src/dynamic.cpp src/simulator.cpp src/main.cpp -o scache
+$ make install
 ```
 
-## Workload
+### Workload
 The scheduler and simulator accept sparse matrices from MatrixMarket (.mtx). The folder containing these matrices is under `data`.
 
-## Run
+### Run
 The following command simulates multiplication of `matrix1` and `matrix2` with the configuration specified in `config/config.json`:
 ```bash
 $ ./scache matrix1 matrix2 config/config.json
@@ -32,14 +35,21 @@ Here is a sample json configuration:
 - "condensedOP": When set to true, it uses the condensed OP dataflow instead of the default Gustavson's dataflow.
 - "tileDir": Represents the directory containing the tiling selection for each matrix.
 
-## Code description
+
+### scripts
+Check inputs exist (scache, tile files, .mtx files).
+Generate config JSON files for a sweep.
+Run ./scache matrix matrix config.json.
+Parse output .txt files into one CSV.
+
+### Code description
 
 The code shares the same base simulator as the previous work, [HYTE](https://github.com/tsinghua-ideal/HYTE-sim ""). However, this work shifts the focus from tile selection to cache optimization, with the pre-defined tiling selection located in the "tileDir" directory. The modifications primarily involve various cache schemes and prefetching techniques.
 
 The changes are mainly found in the `cache.cpp` and `simulator.cpp` files. The proposed mapping scheme from Section 4.1 of the paper, along with the baseline mapping schemes, are implemented within different branches of the `cacheAccessFiber()` function in `cache.cpp`. The corresponding replacement policies, as described in Section 4.2, are invoked by the different cache schemes. For the guided replacement policies, the prefetch logic and maintenance of the prefetched metadata are implemented in the `prefetchRow()` function, which is iterated during simulation in `simulator.cpp`. The adaptive prefetch size introduced in Section 4.3 is also implemented in `simulator.cpp` and is called during the calculation process.
 
 
-## Reference
+### Reference
 
 If you use this tool in your research, please kindly cite the following paper.
 

diff --git a/config/config.json b/config/config.json
@@ -4,8 +4,8 @@
     "memorybandwidth": 68,
     "PEcnt": 32,
     "srambank": 32,
-    "baselinetest": 0,
+    "baselinetest": 1,
     "condensedOP": false,
     "tileDir": "./tiles/",
     "outputDir": "./output/"
-}
+}
diff --git a/generate_trace.py b/generate_trace.py
@@ -0,0 +1,170 @@
+from scipy.io import mmread
+import numpy as np
+import os
+import argparse
+
+
+def load_matrix(mtx_path):
+    print(f"[INFO] loading matrix from: {mtx_path}")
+    A = mmread(mtx_path).tocsr()
+    print("[INFO] load done")
+    return A
+
+
+def validate_matrix(A):
+    rows, cols = A.shape
+    nnz = A.nnz
+    row_nnz = np.diff(A.indptr)
+
+    stats = {
+        "rows": rows,
+        "cols": cols,
+        "nnz": nnz,
+        "min_row_nnz": int(row_nnz.min()) if len(row_nnz) > 0 else 0,
+        "max_row_nnz": int(row_nnz.max()) if len(row_nnz) > 0 else 0,
+        "avg_row_nnz": float(row_nnz.mean()) if len(row_nnz) > 0 else 0.0,
+        "empty_rows": int(np.sum(row_nnz == 0)),
+    }
+    return stats
+
+
+def print_stats(stats):
+    print("\n[VALIDATE] matrix statistics")
+    print(f"shape: ({stats['rows']}, {stats['cols']})")
+    print(f"nonzeros: {stats['nnz']}")
+    print(f"min row nnz: {stats['min_row_nnz']}")
+    print(f"max row nnz: {stats['max_row_nnz']}")
+    print(f"avg row nnz: {stats['avg_row_nnz']:.4f}")
+    print(f"empty rows: {stats['empty_rows']}")
+
+
+def generate_fiber_trace(A, out_path, include_write=True):
+    """
+    教学版 trace:
+      READ_B_FIBER k
+      WRITE_C_ROW i
+
+    含义：
+      对于 A 的第 i 行，若该行非零列号为 k1, k2, ...
+      则 Gust 风格下会去访问 B 的对应 fibers: B[k1], B[k2], ...
+    """
+    total_reads = 0
+    total_writes = 0
+
+    with open(out_path, "w") as f:
+        f.write("# Teaching/demo fiber trace for Gustavson-style sparse processing\n")
+        f.write("# Format:\n")
+        f.write("#   ROW <i>\n")
+        f.write("#   READ_B_FIBER <k>\n")
+        f.write("#   WRITE_C_ROW <i>\n\n")
+
+        for i in range(A.shape[0]):
+            start = A.indptr[i]
+            end = A.indptr[i + 1]
+            ks = A.indices[start:end]
+
+            f.write(f"ROW {i}\n")
+            for k in ks:
+                f.write(f"READ_B_FIBER {int(k)}\n")
+                total_reads += 1
+
+            if include_write:
+                f.write(f"WRITE_C_ROW {i}\n")
+                total_writes += 1
+
+    return total_reads, total_writes
+
+
+def generate_address_trace(A, out_path, elem_bytes=8, include_write=True):
+    """
+    更像传统 cache trace 的版本：
+    用虚拟基地址把 B fibers / C rows 映射成“伪地址”。
+
+    注意：
+    这只是学习和比较用，不是 SeaCache 官方精确地址格式。
+    """
+    # 给 B fibers 和 C rows 各分一个大的地址空间
+    base_B = 0x10000000
+    base_C = 0x20000000
+
+    total_reads = 0
+    total_writes = 0
+
+    with open(out_path, "w") as f:
+        f.write("# Teaching/demo address trace\n")
+        f.write("# Format: R/W <hex_addr>\n\n")
+
+        for i in range(A.shape[0]):
+            start = A.indptr[i]
+            end = A.indptr[i + 1]
+            ks = A.indices[start:end]
+
+            for k in ks:
+                addr = base_B + int(k) * elem_bytes
+                f.write(f"R {hex(addr)}\n")
+                total_reads += 1
+
+            if include_write:
+                c_addr = base_C + int(i) * elem_bytes
+                f.write(f"W {hex(c_addr)}\n")
+                total_writes += 1
+
+    return total_reads, total_writes
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--mtx", required=True, help="path to input .mtx file")
+    parser.add_argument("--out", required=True, help="output trace path")
+    parser.add_argument(
+        "--mode",
+        choices=["fiber", "addr"],
+        default="fiber",
+        help="fiber: READ_B_FIBER / WRITE_C_ROW; addr: R/W hex_addr",
+    )
+    parser.add_argument(
+        "--no-write",
+        action="store_true",
+        help="do not emit WRITE_C_ROW / W lines",
+    )
+    parser.add_argument(
+        "--elem-bytes",
+        type=int,
+        default=8,
+        help="element size in bytes for addr mode",
+    )
+    args = parser.parse_args()
+
+    if not os.path.exists(args.mtx):
+        raise FileNotFoundError(f"input mtx not found: {args.mtx}")
+
+    A = load_matrix(args.mtx)
+
+    stats = validate_matrix(A)
+    print_stats(stats)
+
+    include_write = not args.no_write
+
+    print(f"\n[INFO] generating trace in mode = {args.mode}")
+    if args.mode == "fiber":
+        reads, writes = generate_fiber_trace(A, args.out, include_write=include_write)
+    else:
+        reads, writes = generate_address_trace(
+            A, args.out, elem_bytes=args.elem_bytes, include_write=include_write
+        )
+
+    print(f"[INFO] trace saved to: {args.out}")
+    print(f"[INFO] read events: {reads}")
+    print(f"[INFO] write events: {writes}")
+    print(f"[INFO] total events: {reads + writes}")
+
+    print("\n[INFO] first 10 lines of trace:")
+    with open(args.out, "r") as f:
+        for idx, line in enumerate(f):
+            print(line.rstrip())
+            if idx >= 9:
+                break
+
+
+if __name__ == "__main__":
+    main()