Skip to content

Commit 6c1998d

Browse files
committed
Add sandboxed code migration agent cookbook
1 parent 7ba0d4d commit 6c1998d

29 files changed

Lines changed: 1735 additions & 0 deletions

authors.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
# You can optionally customize how your information shows up cookbook.openai.com over here.
44
# If your information is not present here, it will be pulled from your GitHub profile.
55

6+
kkahadze-oai:
7+
name: "Konstantine Kahadze"
8+
website: "https://www.linkedin.com/in/kahadze/"
9+
avatar: "https://avatars.githubusercontent.com/kkahadze-oai"
10+
611
zhenweig-cerebras:
712
name: "Zhenwei Gao"
813
website: "https://www.linkedin.com/in/zhenwei-gao/"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
outputs/*
2+
!outputs/.gitkeep
3+
.env.local
4+
__pycache__/
5+
.pytest_cache/
6+
.venv/

examples/agents_sdk/sandboxed-code-migration/assets/sandbox-as-a-tool-architecture.svg

Lines changed: 1 addition & 0 deletions
Loading
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
from __future__ import annotations
2+
3+
import argparse
4+
import json
5+
from pathlib import Path
6+
from typing import Any
7+
8+
EXAMPLE_ROOT = Path(__file__).resolve().parent
9+
10+
EXPECTED_PATCH_MARKERS = {
11+
"support_reply_service": [
12+
"customer_support_bot/client.py",
13+
"customer_support_bot/replies.py",
14+
],
15+
"case_summary_service": [
16+
"case_summary_service/client.py",
17+
"case_summary_service/summaries.py",
18+
],
19+
}
20+
21+
22+
def read_json(path: Path) -> dict[str, Any]:
23+
with path.open(encoding="utf-8") as handle:
24+
payload = json.load(handle)
25+
if not isinstance(payload, dict):
26+
raise ValueError(f"{path} must contain a JSON object.")
27+
return payload
28+
29+
30+
def read_jsonl(path: Path) -> list[dict[str, Any]]:
31+
events: list[dict[str, Any]] = []
32+
for line_number, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1):
33+
if not line.strip():
34+
continue
35+
payload = json.loads(line)
36+
if not isinstance(payload, dict):
37+
raise ValueError(f"{path}:{line_number} must contain a JSON object.")
38+
events.append(payload)
39+
return events
40+
41+
42+
def require_output(path: Path) -> None:
43+
if not path.exists():
44+
raise FileNotFoundError(
45+
f"Missing {path}. Run the full migration agent before running artifact evals."
46+
)
47+
if path.stat().st_size == 0:
48+
raise ValueError(f"{path} is empty.")
49+
50+
51+
def require_contains(text: str, marker: str, *, artifact: str) -> None:
52+
if marker not in text:
53+
raise ValueError(f"Expected {artifact} to contain {marker!r}.")
54+
55+
56+
def require_result_value(result: dict[str, Any], field: str, marker: str) -> None:
57+
value = result.get(field)
58+
if not isinstance(value, str) or marker not in value:
59+
raise ValueError(f"Expected result[{field!r}] to contain {marker!r}.")
60+
61+
62+
def validate_migration_artifacts(output_dir: Path, *, task_name: str | None = None) -> None:
63+
report_path = output_dir / "migration_report.md"
64+
patch_path = output_dir / "migration.patch"
65+
result_path = output_dir / "migration_result.json"
66+
audit_path = output_dir / "migration_audit.jsonl"
67+
68+
for path in [report_path, patch_path, result_path, audit_path]:
69+
require_output(path)
70+
71+
result = read_json(result_path)
72+
patch = patch_path.read_text(encoding="utf-8")
73+
report = report_path.read_text(encoding="utf-8")
74+
audit_events = read_jsonl(audit_path)
75+
76+
require_result_value(result, "baseline_test_command", "unittest")
77+
require_result_value(result, "check_command", "compileall")
78+
require_result_value(result, "final_test_command", "unittest")
79+
80+
final_test_result = str(result.get("final_test_result", "")).lower()
81+
if "pass" not in final_test_result and "ok" not in final_test_result:
82+
raise ValueError("Expected final_test_result to describe a passing test run.")
83+
84+
changed_files = result.get("changed_files")
85+
if not isinstance(changed_files, list) or not changed_files:
86+
raise ValueError("Expected result['changed_files'] to be a non-empty list.")
87+
88+
require_contains(patch, "responses.create", artifact=str(patch_path))
89+
require_contains(patch, "output_text", artifact=str(patch_path))
90+
for marker in EXPECTED_PATCH_MARKERS.get(task_name or "", []):
91+
require_contains(patch, marker, artifact=str(patch_path))
92+
require_contains(patch, "tests/", artifact=str(patch_path))
93+
94+
require_contains(report.lower(), "responses", artifact=str(report_path))
95+
require_contains(report.lower(), "test", artifact=str(report_path))
96+
97+
if not any(event.get("event") == "host_artifacts_written" for event in audit_events):
98+
raise ValueError("Expected audit log to include a host_artifacts_written event.")
99+
100+
101+
def validate_output_root(output_dir: Path) -> None:
102+
batch_summary_path = output_dir / "batch_summary.json"
103+
if not batch_summary_path.exists():
104+
validate_migration_artifacts(output_dir)
105+
return
106+
107+
batch_summary = read_json(batch_summary_path)
108+
task_summaries = batch_summary.get("task_summaries")
109+
if not isinstance(task_summaries, list) or not task_summaries:
110+
raise ValueError("Expected batch_summary.json to include task_summaries.")
111+
112+
for summary in task_summaries:
113+
if not isinstance(summary, dict):
114+
raise ValueError("Each task summary must be a JSON object.")
115+
task_name = summary.get("task_name")
116+
task_output_dir = summary.get("output_dir")
117+
if not isinstance(task_name, str) or not isinstance(task_output_dir, str):
118+
raise ValueError("Each task summary must include task_name and output_dir.")
119+
validate_migration_artifacts(Path(task_output_dir), task_name=task_name)
120+
121+
122+
def main() -> None:
123+
parser = argparse.ArgumentParser(description="Validate generated migration-agent artifacts.")
124+
parser.add_argument(
125+
"--output-dir",
126+
type=Path,
127+
default=EXAMPLE_ROOT / "outputs",
128+
help=(
129+
"Directory containing batch_summary.json or a single task's "
130+
"migration_report.md, migration.patch, migration_result.json, "
131+
"and migration_audit.jsonl."
132+
),
133+
)
134+
args = parser.parse_args()
135+
136+
try:
137+
validate_output_root(args.output_dir)
138+
except Exception as exc:
139+
raise SystemExit(f"Artifact eval failed: {exc}") from exc
140+
141+
print(f"Migration artifact evals passed for {args.output_dir}")
142+
143+
144+
if __name__ == "__main__":
145+
main()
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Code migration agent
2+
3+
You are migrating the mounted repo under `repo/`.
4+
5+
## Mission
6+
7+
- Migrate the repo according to `repo/MIGRATION.md`.
8+
- Preserve the public function signatures and behavior.
9+
- Run the baseline test command before editing.
10+
- Edit the app code and its tests.
11+
- Run the check command named in `repo/MIGRATION.md` after editing.
12+
- Run the final test command named in `repo/MIGRATION.md` after editing.
13+
- Return structured output that includes the exact commands, pass/fail summaries,
14+
changed files, a Markdown migration report, and the patch you applied.
15+
16+
## Required command pattern
17+
18+
Each migration brief in `repo/MIGRATION.md` includes a validation pipeline.
19+
Use the exact baseline, check, and final test commands from that brief.
20+
21+
Run all three commands from `repo/`.
22+
23+
## Editing rules
24+
25+
- Keep the migration narrow. Do not rewrite the sample app.
26+
- Prefer `apply_patch` for edits.
27+
- When using `apply_patch`, use workspace-relative paths such as `repo/customer_support_bot/replies.py`.
28+
- Do not edit files outside `repo/`.
29+
- Do not install packages.
30+
- Do not place API keys, environment variables, or real OpenAI calls in tests.
31+
- The final tests must use a fake client; they should not call the network.
32+
- Include a patch in `migration_patch`. If you use `apply_patch`, you may return the same patch text.
33+
- The sandbox image may not have `git`. Do not require `git diff`; keep enough
34+
patch text from your `apply_patch` calls to return the migration diff.
35+
36+
## Suggested loop
37+
38+
1. Inspect `repo/MIGRATION.md`, the app files it names, and the tests it names.
39+
2. Run the baseline test command from the migration brief.
40+
3. Patch the client wrapper and reply call site.
41+
4. Patch tests.
42+
5. Run the check command from the migration brief.
43+
6. Run the final test command from the migration brief.
44+
7. Inspect the changed files and assemble the migration patch from the patch text you applied.
45+
8. Return the structured result.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Migration request: Chat Completions to Responses
2+
3+
Migrate this case summary service from the legacy Chat Completions call shape to
4+
the Responses API call shape.
5+
6+
## Current structure
7+
8+
- `case_summary_service/client.py` contains the OpenAI client wrapper.
9+
- `case_summary_service/summaries.py` builds the summary prompt and calls the wrapper.
10+
- `tests/` contains offline fakes for the legacy Chat Completions shape.
11+
12+
## Target shape
13+
14+
- In `case_summary_service/client.py`, call `client.responses.create(...)`
15+
instead of `client.chat.completions.create(...)`.
16+
- Keep the same `model` argument.
17+
- Replace the wrapper's `messages` argument with an `input_items` argument.
18+
- In `case_summary_service/summaries.py`, pass the two-message system/user
19+
conversation as `input_items`.
20+
- Forward `input_items` as the Responses API `input` argument.
21+
- Keep `temperature=0`.
22+
- Return `response.output_text` instead of `completion.choices[0].message.content`.
23+
- Preserve the `summarize_case(client, *, model, case_notes)` function signature.
24+
- Update client-wrapper and summary tests to fake the Responses API instead of
25+
Chat Completions.
26+
- Tests must remain offline; do not import or instantiate the real OpenAI client.
27+
28+
## Required validation pipeline
29+
30+
- Before editing, run baseline tests: `python -m unittest discover -s tests -t .`.
31+
- After editing, run the compile/check command: `python -m compileall -q case_summary_service tests`.
32+
- After the compile/check command passes, run final tests: `python -m unittest discover -s tests -t .`.
33+
- Validate with `python -m unittest discover -s tests -t .`.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Case summary service
2+
3+
Small offline fixture for the sandboxed migration cookbook.
4+
5+
The pre-migration service wraps a Chat Completions call and uses it to summarize
6+
internal case notes. Tests use fakes; they should never call the network.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .summaries import summarize_case
2+
3+
__all__ = ["summarize_case"]
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from __future__ import annotations
2+
3+
from typing import Any
4+
5+
6+
def complete_summary_prompt(
7+
client: Any,
8+
*,
9+
model: str,
10+
messages: list[dict[str, str]],
11+
) -> str:
12+
completion = client.chat.completions.create(
13+
model=model,
14+
messages=messages,
15+
temperature=0,
16+
)
17+
return completion.choices[0].message.content

0 commit comments

Comments
 (0)