We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e9cf598 commit 5b3de66Copy full SHA for 5b3de66
1 file changed
logos/logos-workernode/logos_worker_node/calibration.py
@@ -512,7 +512,7 @@ def calibrate_model(
512
gpu_snap = query_gpu_vram(gpu_indices)
513
per_gpu_mb = min(v["total_mb"] for v in gpu_snap.values())
514
effective_gpu_mb = per_gpu_mb * tp
515
- max_kv_mb = effective_gpu_mb * _KV_CACHE_VRAM_CAP_RATIO
+ max_kv_mb = per_gpu_mb * _KV_CACHE_VRAM_CAP_RATIO
516
logger.info(
517
" GPU VRAM = %.0f MB/GPU × tp=%d = %.0f MB effective, "
518
"KV cache search cap (%.0f%%) = %.0f MB",
0 commit comments