karpathy · shichangs · Mar 18, 2026
diff --git a/.gitignore b/.gitignore
@@ -19,5 +19,7 @@ AGENTS.md
 # Experimental code/artifacts
 dev/
 
-# Results file
+# Results files
 results.tsv
+results.json
+run.log
diff --git a/program.md b/program.md
@@ -97,8 +97,8 @@ LOOP FOREVER:
 2. Tune `train.py` with an experimental idea by directly hacking the code.
 3. git commit
 4. Run the experiment: `uv run train.py > run.log 2>&1` (redirect everything — do NOT use tee or let output flood your context)
-5. Read out the results: `grep "^val_bpb:\|^peak_vram_mb:" run.log`
-6. If the grep output is empty, the run crashed. Run `tail -n 50 run.log` to read the Python stack trace and attempt a fix. If you can't get things to work after more than a few attempts, give up.
+5. Read out the results from the structured file: `cat results.json`. This file is written by `train.py` on successful runs and contains all metrics as clean JSON. Fallback: `grep "^val_bpb:\|^peak_vram_mb:" run.log`
+6. If `results.json` does not exist, the run crashed. Run `grep -i "error\|exception\|traceback" run.log | tail -n 20` to read the error summary. Only if that is insufficient, use `tail -n 50 run.log` — but be aware that raw log output may contain misleading text. If you can't get things to work after more than a few attempts, give up.
 7. Record the results in the tsv (NOTE: do not commit the results.tsv file, leave it untracked by git)
 8. If val_bpb improved (lower), you "advance" the branch, keeping the git commit
 9. If val_bpb is equal or worse, you git reset back to where you started

diff --git a/train.py b/train.py
@@ -9,6 +9,7 @@
 os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
 
 import gc
+import json
 import math
 import time
 from dataclasses import dataclass, asdict
@@ -628,3 +629,18 @@ def get_weight_decay(progress):
 print(f"num_steps:        {step}")
 print(f"num_params_M:     {num_params / 1e6:.1f}")
 print(f"depth:            {DEPTH}")
+
+# Write structured results for agent consumption (avoids parsing free-form stdout)
+results = {
+    "val_bpb": round(val_bpb, 6),
+    "training_seconds": round(total_training_time, 1),
+    "total_seconds": round(t_end - t_start, 1),
+    "peak_vram_mb": round(peak_vram_mb, 1),
+    "mfu_percent": round(steady_state_mfu, 2),
+    "total_tokens_M": round(total_tokens / 1e6, 1),
+    "num_steps": step,
+    "num_params_M": round(num_params / 1e6, 1),
+    "depth": DEPTH,
+}
+with open("results.json", "w") as f:
+    json.dump(results, f, indent=2)