|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import argparse |
| 4 | +import json |
| 5 | +from pathlib import Path |
| 6 | +from typing import Any |
| 7 | + |
| 8 | +EXAMPLE_ROOT = Path(__file__).resolve().parent |
| 9 | + |
| 10 | +EXPECTED_PATCH_MARKERS = { |
| 11 | + "support_reply_service": [ |
| 12 | + "customer_support_bot/client.py", |
| 13 | + "customer_support_bot/replies.py", |
| 14 | + ], |
| 15 | + "case_summary_service": [ |
| 16 | + "case_summary_service/client.py", |
| 17 | + "case_summary_service/summaries.py", |
| 18 | + ], |
| 19 | +} |
| 20 | + |
| 21 | + |
| 22 | +def read_json(path: Path) -> dict[str, Any]: |
| 23 | + with path.open(encoding="utf-8") as handle: |
| 24 | + payload = json.load(handle) |
| 25 | + if not isinstance(payload, dict): |
| 26 | + raise ValueError(f"{path} must contain a JSON object.") |
| 27 | + return payload |
| 28 | + |
| 29 | + |
| 30 | +def read_jsonl(path: Path) -> list[dict[str, Any]]: |
| 31 | + events: list[dict[str, Any]] = [] |
| 32 | + for line_number, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1): |
| 33 | + if not line.strip(): |
| 34 | + continue |
| 35 | + payload = json.loads(line) |
| 36 | + if not isinstance(payload, dict): |
| 37 | + raise ValueError(f"{path}:{line_number} must contain a JSON object.") |
| 38 | + events.append(payload) |
| 39 | + return events |
| 40 | + |
| 41 | + |
| 42 | +def require_output(path: Path) -> None: |
| 43 | + if not path.exists(): |
| 44 | + raise FileNotFoundError( |
| 45 | + f"Missing {path}. Run the full migration agent before running artifact evals." |
| 46 | + ) |
| 47 | + if path.stat().st_size == 0: |
| 48 | + raise ValueError(f"{path} is empty.") |
| 49 | + |
| 50 | + |
| 51 | +def require_contains(text: str, marker: str, *, artifact: str) -> None: |
| 52 | + if marker not in text: |
| 53 | + raise ValueError(f"Expected {artifact} to contain {marker!r}.") |
| 54 | + |
| 55 | + |
| 56 | +def require_result_value(result: dict[str, Any], field: str, marker: str) -> None: |
| 57 | + value = result.get(field) |
| 58 | + if not isinstance(value, str) or marker not in value: |
| 59 | + raise ValueError(f"Expected result[{field!r}] to contain {marker!r}.") |
| 60 | + |
| 61 | + |
| 62 | +def validate_migration_artifacts(output_dir: Path, *, task_name: str | None = None) -> None: |
| 63 | + report_path = output_dir / "migration_report.md" |
| 64 | + patch_path = output_dir / "migration.patch" |
| 65 | + result_path = output_dir / "migration_result.json" |
| 66 | + audit_path = output_dir / "migration_audit.jsonl" |
| 67 | + |
| 68 | + for path in [report_path, patch_path, result_path, audit_path]: |
| 69 | + require_output(path) |
| 70 | + |
| 71 | + result = read_json(result_path) |
| 72 | + patch = patch_path.read_text(encoding="utf-8") |
| 73 | + report = report_path.read_text(encoding="utf-8") |
| 74 | + audit_events = read_jsonl(audit_path) |
| 75 | + |
| 76 | + require_result_value(result, "baseline_test_command", "unittest") |
| 77 | + require_result_value(result, "check_command", "compileall") |
| 78 | + require_result_value(result, "final_test_command", "unittest") |
| 79 | + |
| 80 | + final_test_result = str(result.get("final_test_result", "")).lower() |
| 81 | + if "pass" not in final_test_result and "ok" not in final_test_result: |
| 82 | + raise ValueError("Expected final_test_result to describe a passing test run.") |
| 83 | + |
| 84 | + changed_files = result.get("changed_files") |
| 85 | + if not isinstance(changed_files, list) or not changed_files: |
| 86 | + raise ValueError("Expected result['changed_files'] to be a non-empty list.") |
| 87 | + |
| 88 | + require_contains(patch, "responses.create", artifact=str(patch_path)) |
| 89 | + require_contains(patch, "output_text", artifact=str(patch_path)) |
| 90 | + for marker in EXPECTED_PATCH_MARKERS.get(task_name or "", []): |
| 91 | + require_contains(patch, marker, artifact=str(patch_path)) |
| 92 | + require_contains(patch, "tests/", artifact=str(patch_path)) |
| 93 | + |
| 94 | + require_contains(report.lower(), "responses", artifact=str(report_path)) |
| 95 | + require_contains(report.lower(), "test", artifact=str(report_path)) |
| 96 | + |
| 97 | + if not any(event.get("event") == "host_artifacts_written" for event in audit_events): |
| 98 | + raise ValueError("Expected audit log to include a host_artifacts_written event.") |
| 99 | + |
| 100 | + |
| 101 | +def validate_output_root(output_dir: Path) -> None: |
| 102 | + batch_summary_path = output_dir / "batch_summary.json" |
| 103 | + if not batch_summary_path.exists(): |
| 104 | + validate_migration_artifacts(output_dir) |
| 105 | + return |
| 106 | + |
| 107 | + batch_summary = read_json(batch_summary_path) |
| 108 | + task_summaries = batch_summary.get("task_summaries") |
| 109 | + if not isinstance(task_summaries, list) or not task_summaries: |
| 110 | + raise ValueError("Expected batch_summary.json to include task_summaries.") |
| 111 | + |
| 112 | + for summary in task_summaries: |
| 113 | + if not isinstance(summary, dict): |
| 114 | + raise ValueError("Each task summary must be a JSON object.") |
| 115 | + task_name = summary.get("task_name") |
| 116 | + task_output_dir = summary.get("output_dir") |
| 117 | + if not isinstance(task_name, str) or not isinstance(task_output_dir, str): |
| 118 | + raise ValueError("Each task summary must include task_name and output_dir.") |
| 119 | + validate_migration_artifacts(Path(task_output_dir), task_name=task_name) |
| 120 | + |
| 121 | + |
| 122 | +def main() -> None: |
| 123 | + parser = argparse.ArgumentParser(description="Validate generated migration-agent artifacts.") |
| 124 | + parser.add_argument( |
| 125 | + "--output-dir", |
| 126 | + type=Path, |
| 127 | + default=EXAMPLE_ROOT / "outputs", |
| 128 | + help=( |
| 129 | + "Directory containing batch_summary.json or a single task's " |
| 130 | + "migration_report.md, migration.patch, migration_result.json, " |
| 131 | + "and migration_audit.jsonl." |
| 132 | + ), |
| 133 | + ) |
| 134 | + args = parser.parse_args() |
| 135 | + |
| 136 | + try: |
| 137 | + validate_output_root(args.output_dir) |
| 138 | + except Exception as exc: |
| 139 | + raise SystemExit(f"Artifact eval failed: {exc}") from exc |
| 140 | + |
| 141 | + print(f"Migration artifact evals passed for {args.output_dir}") |
| 142 | + |
| 143 | + |
| 144 | +if __name__ == "__main__": |
| 145 | + main() |
0 commit comments