Skip to content

Commit 5b3de66

Browse files
committed
Fix KV cache size calculation to use per-GPU memory for accurate profiling
1 parent e9cf598 commit 5b3de66

1 file changed

Lines changed: 1 addition & 1 deletion

File tree

logos/logos-workernode/logos_worker_node/calibration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ def calibrate_model(
512512
gpu_snap = query_gpu_vram(gpu_indices)
513513
per_gpu_mb = min(v["total_mb"] for v in gpu_snap.values())
514514
effective_gpu_mb = per_gpu_mb * tp
515-
max_kv_mb = effective_gpu_mb * _KV_CACHE_VRAM_CAP_RATIO
515+
max_kv_mb = per_gpu_mb * _KV_CACHE_VRAM_CAP_RATIO
516516
logger.info(
517517
" GPU VRAM = %.0f MB/GPU × tp=%d = %.0f MB effective, "
518518
"KV cache search cap (%.0f%%) = %.0f MB",

0 commit comments

Comments
 (0)