From b0d51c5ef91bddeab5cfe7ea2f36812dcc641e2f Mon Sep 17 00:00:00 2001 From: SC-Claw Date: Thu, 19 Mar 2026 00:36:41 +0800 Subject: [PATCH 1/2] write results.json for structured agent consumption, harden crash diagnostics train.py now writes a results.json file after evaluation with the same metrics already printed to stdout. This gives agents a structured, parseable results channel instead of relying on grepping free-form stdout. program.md updated to read results.json first (fallback to grep), and to use filtered grep for crash diagnostics instead of raw tail, reducing the surface for indirect prompt injection via training output. Fixes #64 Co-Authored-By: Claude Opus 4.6 --- .gitignore | 4 +++- program.md | 4 ++-- train.py | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 99c30f52f..1f7fd4fe7 100644 --- a/.gitignore +++ b/.gitignore @@ -19,5 +19,7 @@ AGENTS.md # Experimental code/artifacts dev/ -# Results file +# Results files results.tsv +results.json +run.log diff --git a/program.md b/program.md index dea9bcc01..e461ecfa3 100644 --- a/program.md +++ b/program.md @@ -97,8 +97,8 @@ LOOP FOREVER: 2. Tune `train.py` with an experimental idea by directly hacking the code. 3. git commit 4. Run the experiment: `uv run train.py > run.log 2>&1` (redirect everything — do NOT use tee or let output flood your context) -5. Read out the results: `grep "^val_bpb:\|^peak_vram_mb:" run.log` -6. If the grep output is empty, the run crashed. Run `tail -n 50 run.log` to read the Python stack trace and attempt a fix. If you can't get things to work after more than a few attempts, give up. +5. Read out the results from the structured file: `cat results.json`. This file is written by `train.py` on successful runs and contains all metrics as clean JSON. Fallback: `grep "^val_bpb:\|^peak_vram_mb:" run.log` +6. If `results.json` does not exist, the run crashed. Run `grep -i "error\|exception\|traceback" run.log | tail -n 20` to read the error summary. Only if that is insufficient, use `tail -n 50 run.log` — but be aware that raw log output may contain misleading text. If you can't get things to work after more than a few attempts, give up. 7. Record the results in the tsv (NOTE: do not commit the results.tsv file, leave it untracked by git) 8. If val_bpb improved (lower), you "advance" the branch, keeping the git commit 9. If val_bpb is equal or worse, you git reset back to where you started diff --git a/train.py b/train.py index 2e743974c..4b6f08c77 100644 --- a/train.py +++ b/train.py @@ -9,6 +9,7 @@ os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1" import gc +import json import math import time from dataclasses import dataclass, asdict @@ -628,3 +629,18 @@ def get_weight_decay(progress): print(f"num_steps: {step}") print(f"num_params_M: {num_params / 1e6:.1f}") print(f"depth: {DEPTH}") + +# Write structured results for agent consumption (avoids parsing free-form stdout) +results = { + "val_bpb": round(val_bpb, 6), + "training_seconds": round(total_training_time, 1), + "total_seconds": round(t_end - t_start, 1), + "peak_vram_mb": round(peak_vram_mb, 1), + "mfu_percent": round(steady_state_mfu, 2), + "total_tokens_M": round(total_tokens / 1e6, 1), + "num_steps": step, + "num_params_M": round(num_params / 1e6, 1), + "depth": DEPTH, +} +with open("results.json", "w") as f: + json.dump(results, f, indent=2) From 4318f4323ac2d53f4d28c357d8f9935a082d00a6 Mon Sep 17 00:00:00 2001 From: SC-Claw Date: Fri, 20 Mar 2026 01:42:09 +0800 Subject: [PATCH 2/2] clear stale results.json at startup to prevent silent wrong-answer bug If train.py is killed mid-run (e.g. timeout), a results.json from a previous successful run would still exist on disk, causing the agent to read valid JSON with stale metrics. Delete it early so that "no file = crash" contract holds. Co-Authored-By: Claude Opus 4.6 --- train.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/train.py b/train.py index 4b6f08c77..6ca6fb2ff 100644 --- a/train.py +++ b/train.py @@ -26,6 +26,10 @@ from prepare import MAX_SEQ_LEN, TIME_BUDGET, Tokenizer, make_dataloader, evaluate_bpb +# Clear previous results so a killed run leaves no file +if os.path.exists("results.json"): + os.remove("results.json") + # --------------------------------------------------------------------------- # GPT Model # ---------------------------------------------------------------------------