IgorTavcar
diff --git a/‎.claude/hooks/cage.sh‎
Lines changed: 73 additions & 0 deletions b/‎.claude/hooks/cage.sh‎
Lines changed: 73 additions & 0 deletions
diff --git a/‎.claude/settings.json‎
Lines changed: 15 additions & 0 deletions b/‎.claude/settings.json‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎HOW_IT_WORKS.md‎
Lines changed: 54 additions & 54 deletions b/‎HOW_IT_WORKS.md‎
Lines changed: 54 additions & 54 deletions
diff --git a/‎baseline/prepare.py‎
Lines changed: 2 additions & 1 deletion b/‎baseline/prepare.py‎
Lines changed: 2 additions & 1 deletion
@@ -0,0 +1,73 @@
+#!/bin/bash
+# PreToolUse hook: confine the agent to the project directory.
+# - Blocks directory changes (cd, pushd, popd, chdir)
+# - Blocks file reads/writes outside project dir
+# - Blocks searches outside project dir
+
+INPUT=$(cat)
+TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty')
+PROJECT_DIR="$CLAUDE_PROJECT_DIR"
+
+deny() {
+  jq -n --arg reason "$1" '{
+    hookSpecificOutput: {
+      hookEventName: "PreToolUse",
+      permissionDecision: "deny",
+      permissionDecisionReason: $reason
+    }
+  }'
+  exit 0
+}
+
+# Resolve a path without requiring it to exist, normalizing .. and symlinks.
+# Tries GNU realpath -m (Linux), then Python 3 (macOS/Linux), then raw path.
+resolve_path() {
+  local path="$1"
+  realpath -m "$path" 2>/dev/null && return
+  python3 -c "import os, sys; print(os.path.normpath(os.path.abspath(sys.argv[1])))" "$path" 2>/dev/null && return
+  echo "$path"
+}
+
+# Check if a path is within the project directory.
+check_path() {
+  local path="$1"
+  # Empty/null path means the tool defaults to cwd, which is fine
+  [ -z "$path" ] && return 0
+
+  local resolved
+  resolved=$(resolve_path "$path")
+
+  case "$resolved" in
+    "$PROJECT_DIR/.claude"|"$PROJECT_DIR/.claude"/*) return 1 ;;
+    "$PROJECT_DIR"|"$PROJECT_DIR"/*) return 0 ;;
+    *) return 1 ;;
+  esac
+}
+
+case "$TOOL_NAME" in
+  Bash)
+    COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty')
+    [ -z "$COMMAND" ] && exit 0
+
+    # Block directory changes
+    if echo "$COMMAND" | grep -qE '(^|[;&|`(]|&&|\|\||\$\()\s*(cd|pushd|popd|chdir)(\s|$|;|&|\||\))'; then
+      deny "Changing the working directory is not allowed."
+    fi
+    ;;
+
+  Read|Write|Edit)
+    FILE_PATH=$(echo "$INPUT" | jq -r '.tool_input.file_path // empty')
+    if ! check_path "$FILE_PATH"; then
+      deny "Access denied: $FILE_PATH is outside the project directory ($PROJECT_DIR)."
+    fi
+    ;;
+
+  Glob|Grep)
+    SEARCH_PATH=$(echo "$INPUT" | jq -r '.tool_input.path // empty')
+    if ! check_path "$SEARCH_PATH"; then
+      deny "Access denied: $SEARCH_PATH is outside the project directory ($PROJECT_DIR)."
+    fi
+    ;;
+esac
+
+exit 0
@@ -0,0 +1,15 @@
+{
+  "hooks": {
+    "PreToolUse": [
+      {
+        "matcher": "Bash|Read|Write|Edit|Glob|Grep",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/cage.sh"
+          }
+        ]
+      }
+    ]
+  }
+}
@@ -28,24 +28,24 @@ An AI agent **edits a training script, runs it for 5 minutes, checks if the mode
  │                                                              │
  │   Reads program.md for instructions, then loops:             │
  │                                                              │
- │   ┌─────────────┐    ┌─────────────┐    ┌────────────────┐  │
- │   │ Think of an  │───▶│ Edit        │───▶│ Run train.py   │  │
- │   │ experiment   │    │ train.py    │    │ (5 min)        │  │
- │   └─────────────┘    └─────────────┘    └───────┬────────┘  │
+ │   ┌─────────────┐    ┌─────────────┐    ┌────────────────┐   │
+ │   │ Think of an │───▶│ Edit        │───▶│ Run train.py   │   │
+ │   │ experiment  │    │ train.py    │    │ (5 min)        │   │
+ │   └─────────────┘    └─────────────┘    └────────┬───────┘   │
  │                                                  │           │
  │                          ┌───────────────────────┘           │
  │                          ▼                                   │
- │                  ┌───────────────┐                            │
- │                  │ Got better?   │                            │
- │                  └───┬───────┬───┘                            │
- │                yes   │       │  no                            │
- │                      ▼       ▼                                │
+ │                  ┌───────────────┐                           │
+ │                  │ Got better?   │                           │
+ │                  └───┬───────┬───┘                           │
+ │                yes   │       │  no                           │
+ │                      ▼       ▼                               │
  │               ┌────────┐ ┌─────────┐                         │
  │               │ KEEP   │ │ DISCARD │                         │
  │               │ commit │ │ revert  │                         │
  │               └────┬───┘ └────┬────┘                         │
- │                    │          │                               │
- │                    ▼          ▼                               │
+ │                    │          │                              │
+ │                    ▼          ▼                              │
  │               ┌────────────────────┐                         │
  │               │ Log to results.tsv │──▶ loop back            │
  │               └────────────────────┘                         │
@@ -71,10 +71,10 @@ An AI agent **edits a training script, runs it for 5 minutes, checks if the mode
 ### Phase 1: Setup (one-time, by you)
 
 ```
- ┌──────────┐      ┌────────────────────────────────┐
- │ prepare.py│─────▶│  ~/.cache/autoresearch/         │
- └──────────┘      │                                  │
-                   │  data/                            │
+ ┌──────────┐      ┌────────────────────────────────────┐
+ │prepare.py│─────▶│  ~/.cache/autoresearch/            │
+ └──────────┘      │                                    │
+                   │  data/                             │
                    │    shard_00000.parquet             │
                    │    shard_00001.parquet             │
                    │    ... (10 training shards)        │
@@ -123,7 +123,7 @@ The agent then:
  │     │       peak_vram_mb: 44100   ← memory used               │ │
  │     │                                                         │ │
  │     │  f. Compare to previous best:                           │ │
- │     │       0.9821 < 0.9979 → BETTER! Keep the commit.       │ │
+ │     │       0.9821 < 0.9979 → BETTER! Keep the commit.        │ │
  │     │                                                         │ │
  │     │  g. Append to results.tsv                               │ │
  │     │                                                         │ │
@@ -161,7 +161,7 @@ When you come back:
  │  ✗ Edit prepare.py, program.md, or any other file            │
  │  ✗ Add new dependencies                                      │
  │  ✗ Change the tokenizer or data pipeline                     │
- │  ✗ Exceed available GPU memory                                │
+ │  ✗ Exceed available GPU memory                               │
  │  ✗ Stop (the agent runs until you interrupt it)              │
  └──────────────────────────────────────────────────────────────┘
 ```
@@ -176,31 +176,31 @@ A small GPT-style transformer, trained from scratch on text data:
   Input tokens (sequence of 2048)
          │
          ▼
-  ┌──────────────┐
-  │  Token        │  Converts token IDs → vectors
-  │  Embedding    │
-  └──────┬───────┘
+  ┌────────────────┐
+  │  Token         │  Converts token IDs → vectors
+  │  Embedding     │
+  └──────┬─────────┘
          │
          ▼
-  ┌──────────────┐
-  │  Transformer  │ ×8-12 layers, each containing:
-  │  Block        │
-  │  ┌──────────┐ │   • RMS Normalization
-  │  │Attention  │ │   • Multi-head self-attention (with RoPE)
-  │  │(sliding   │ │   • Sliding window: short/long pattern (SSSL)
-  │  │ window)   │ │   • Flash Attention 3 kernel
-  │  └──────────┘ │
-  │  ┌──────────┐ │   • RMS Normalization
-  │  │ MLP      │ │   • Linear → ReLU² → Linear
-  │  │(feedfwd) │ │
-  │  └──────────┘ │
-  │  + residual   │   • Skip connections with learnable scaling
-  └──────┬───────┘
+  ┌────────────────┐
+  │  Transformer   │ ×8-12 layers, each containing:
+  │  Block         │
+  │  ┌──────────┐  │   • RMS Normalization
+  │  │Attention │  │   • Multi-head self-attention (with RoPE)
+  │  │(sliding  │  │   • Sliding window: short/long pattern (SSSL)
+  │  │ window)  │  │   • Flash Attention 3 kernel
+  │  └──────────┘  │
+  │  ┌──────────┐  │   • RMS Normalization
+  │  │ MLP      │  │   • Linear → ReLU² → Linear
+  │  │(feedfwd) │  │
+  │  └──────────┘  │
+  │  + residual    │   • Skip connections with learnable scaling
+  └──────┬─────────┘
          │
          ▼
   ┌──────────────┐
-  │  LM Head      │  Vectors → vocabulary probabilities
-  │  (unembedding)│
+  │ LM Head      │  Vectors → vocabulary probabilities
+  │ (unembedding)│
   └──────┬───────┘
          │
          ▼
@@ -293,37 +293,37 @@ Failed experiments are reverted with `git reset` — they leave no trace in git,
 
 ```
  ┌────────────┐         ┌─────────────────────────────────────┐
- │ HuggingFace│────────▶│  ~/.cache/autoresearch/              │
- │ (remote)   │  data   │  ├── data/*.parquet                  │
- └────────────┘         │  └── tokenizer/                      │
-                        │      ├── tokenizer.pkl                │
-                        │      ├── token_bytes.pt               │
-       prepare.py ──────│      └── metadata.json                │
+ │ HuggingFace│────────▶│  ~/.cache/autoresearch/             │
+ │ (remote)   │  data   │  ├── data/*.parquet                 │
+ └────────────┘         │  └── tokenizer/                     │
+                        │      ├── tokenizer.pkl              │
+                        │      ├── token_bytes.pt             │
+       prepare.py ──────│      └── metadata.json              │
        (runs once)      └──────────────┬──────────────────────┘
                                        │
                                        │ loaded at runtime
                                        ▼
  ┌────────────┐  edits   ┌─────────────────────┐   outputs
- │ AI Agent   │─────────▶│     train.py         │──────────────┐
- │ (Claude)   │          │  (model + training)  │              │
- │            │◀─────────│                      │              │
+ │ AI Agent   │─────────▶│     train.py        │──────────────┐
+ │ (Claude)   │          │  (model + training) │              │
+ │            │◀─────────│                     │              │
  │            │ reads    └─────────────────────┘              │
- │            │ output                                         │
- │            │                                                ▼
+ │            │ output                                        │
+ │            │                                               ▼
  │            │  appends  ┌─────────────────────┐    val_bpb: 0.982
- │            │──────────▶│   results.tsv        │    peak_vram_mb: 44100
+ │            │──────────▶│   results.tsv       │    peak_vram_mb: 44100
  │            │           └─────────────────────┘
  │            │
- │            │  commits  ┌─────────────────────┐
- │            │──────────▶│   git branch         │
+ │            │  commits  ┌───────────────────────┐
+ │            │──────────▶│   git branch          │
  │            │  /resets  │  autoresearch/<tag>   │
- └────────────┘           └─────────────────────┘
+ └────────────┘           └───────────────────────┘
                                     │
                                     │  reviewed by human
                                     ▼
                           ┌─────────────────────┐
-                          │  analysis.ipynb      │
-                          │  (plots & insights)  │
+                          │  analysis.ipynb     │
+                          │  (plots & insights) │
                           └─────────────────────┘
 ```
 
 
@@ -38,7 +38,8 @@
 CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "autoresearch")
 DATA_DIR = os.path.join(CACHE_DIR, "data")
 TOKENIZER_DIR = os.path.join(CACHE_DIR, "tokenizer")
-BASE_URL = "https://huggingface.co/datasets/karpathy/climbmix-400b-shuffle/resolve/main"
+_HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co")
+BASE_URL = f"{_HF_ENDPOINT}/datasets/karpathy/climbmix-400b-shuffle/resolve/main"
 MAX_SHARD = 6542 # the last datashard is shard_06542.parquet
 VAL_SHARD = MAX_SHARD  # pinned validation shard (shard_06542)
 VAL_FILENAME = f"shard_{VAL_SHARD:05d}.parquet"