From b0d51c5ef91bddeab5cfe7ea2f36812dcc641e2f Mon Sep 17 00:00:00 2001
From: SC-Claw <sc-claw@SC-ClawdeMac-mini.local>
Date: Thu, 19 Mar 2026 00:36:41 +0800
Subject: [PATCH 1/2] write results.json for structured agent consumption,
 harden crash diagnostics

train.py now writes a results.json file after evaluation with the same
metrics already printed to stdout. This gives agents a structured, parseable
results channel instead of relying on grepping free-form stdout.

program.md updated to read results.json first (fallback to grep), and to
use filtered grep for crash diagnostics instead of raw tail, reducing the
surface for indirect prompt injection via training output.

Fixes #64

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .gitignore |  4 +++-
 program.md |  4 ++--
 train.py   | 16 ++++++++++++++++
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 99c30f52f..1f7fd4fe7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,5 +19,7 @@ AGENTS.md
 # Experimental code/artifacts
 dev/
 
-# Results file
+# Results files
 results.tsv
+results.json
+run.log
diff --git a/program.md b/program.md
index dea9bcc01..e461ecfa3 100644
--- a/program.md
+++ b/program.md
@@ -97,8 +97,8 @@ LOOP FOREVER:
 2. Tune `train.py` with an experimental idea by directly hacking the code.
 3. git commit
 4. Run the experiment: `uv run train.py > run.log 2>&1` (redirect everything — do NOT use tee or let output flood your context)
-5. Read out the results: `grep "^val_bpb:\|^peak_vram_mb:" run.log`
-6. If the grep output is empty, the run crashed. Run `tail -n 50 run.log` to read the Python stack trace and attempt a fix. If you can't get things to work after more than a few attempts, give up.
+5. Read out the results from the structured file: `cat results.json`. This file is written by `train.py` on successful runs and contains all metrics as clean JSON. Fallback: `grep "^val_bpb:\|^peak_vram_mb:" run.log`
+6. If `results.json` does not exist, the run crashed. Run `grep -i "error\|exception\|traceback" run.log | tail -n 20` to read the error summary. Only if that is insufficient, use `tail -n 50 run.log` — but be aware that raw log output may contain misleading text. If you can't get things to work after more than a few attempts, give up.
 7. Record the results in the tsv (NOTE: do not commit the results.tsv file, leave it untracked by git)
 8. If val_bpb improved (lower), you "advance" the branch, keeping the git commit
 9. If val_bpb is equal or worse, you git reset back to where you started
diff --git a/train.py b/train.py
index 2e743974c..4b6f08c77 100644
--- a/train.py
+++ b/train.py
@@ -9,6 +9,7 @@
 os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
 
 import gc
+import json
 import math
 import time
 from dataclasses import dataclass, asdict
@@ -628,3 +629,18 @@ def get_weight_decay(progress):
 print(f"num_steps:        {step}")
 print(f"num_params_M:     {num_params / 1e6:.1f}")
 print(f"depth:            {DEPTH}")
+
+# Write structured results for agent consumption (avoids parsing free-form stdout)
+results = {
+    "val_bpb": round(val_bpb, 6),
+    "training_seconds": round(total_training_time, 1),
+    "total_seconds": round(t_end - t_start, 1),
+    "peak_vram_mb": round(peak_vram_mb, 1),
+    "mfu_percent": round(steady_state_mfu, 2),
+    "total_tokens_M": round(total_tokens / 1e6, 1),
+    "num_steps": step,
+    "num_params_M": round(num_params / 1e6, 1),
+    "depth": DEPTH,
+}
+with open("results.json", "w") as f:
+    json.dump(results, f, indent=2)

From 4318f4323ac2d53f4d28c357d8f9935a082d00a6 Mon Sep 17 00:00:00 2001
From: SC-Claw <sc-claw@SC-ClawdeMac-mini.local>
Date: Fri, 20 Mar 2026 01:42:09 +0800
Subject: [PATCH 2/2] clear stale results.json at startup to prevent silent
 wrong-answer bug

If train.py is killed mid-run (e.g. timeout), a results.json from a
previous successful run would still exist on disk, causing the agent
to read valid JSON with stale metrics. Delete it early so that
"no file = crash" contract holds.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 train.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/train.py b/train.py
index 4b6f08c77..6ca6fb2ff 100644
--- a/train.py
+++ b/train.py
@@ -26,6 +26,10 @@
 
 from prepare import MAX_SEQ_LEN, TIME_BUDGET, Tokenizer, make_dataloader, evaluate_bpb
 
+# Clear previous results so a killed run leaves no file
+if os.path.exists("results.json"):
+    os.remove("results.json")
+
 # ---------------------------------------------------------------------------
 # GPT Model
 # ---------------------------------------------------------------------------