Skip to content

Commit 5cff3bf

Browse files
committed
chore:updated evaluator_design.md
1 parent 1f173c4 commit 5cff3bf

1 file changed

Lines changed: 265 additions & 9 deletions

File tree

evaluators-design.md

Lines changed: 265 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,13 @@ packages/
1616
│ │ ├── evaluators/
1717
│ │ │ ├── install.ts
1818
│ │ │ ├── test.ts
19+
│ │ │ ├── build.ts
20+
│ │ │ ├── lint.ts
21+
│ │ │ ├── typecheck.ts
1922
│ │ │ ├── package-manager.ts
2023
│ │ │ ├── dependency-targets.ts
24+
│ │ │ ├── companion-alignment.ts
25+
│ │ │ ├── namespace-migration.ts
2126
│ │ │ └── integrity-guard.ts
2227
│ │ └── utils/
2328
│ │ ├── package-json.ts
@@ -92,11 +97,16 @@ The baseline implementation produces a deterministic score card with the followi
9297
| ----------------------- | --------------------------------- | ------------------------------------------------------------- |
9398
| `install_success` | `InstallEvaluator` | Exit status from the scenario's install command. |
9499
| `tests_nonregression` | `TestEvaluator` | Exit status from the configured test command. |
100+
| `build_success` | `BuildEvaluator` | Exit status from the configured build command. |
101+
| `lint_success` | `LintEvaluator` | Exit status from the configured lint command. |
102+
| `typecheck_success` | `TypecheckEvaluator` | Exit status from the configured typecheck command. |
95103
| `manager_correctness` | `PackageManagerEvaluator` | Presence of the expected package-manager artifacts (e.g. pnpm). |
96104
| `dependency_targets` | `DependencyTargetsEvaluator` | Adherence to required dependency ranges defined in the scenario. |
105+
| `companion_alignment` | `CompanionAlignmentEvaluator` | Version alignment of companion packages (e.g. react ↔ @types/react). |
106+
| `namespace_migrations` | `NamespaceMigrationEvaluator` | Completeness of namespace migrations (e.g. xterm → @xterm/xterm). |
97107
| `integrity_guard` | `IntegrityGuardEvaluator` | Safeguards against integrity regressions (e.g. skipped tests, relaxed lint). |
98108

99-
Each metric returns a normalized value in `[0, 1]` and is combined into a weighted total (see below). Additional categories such as build success, lint/type safety, semantic quality, or agent efficiency remain on the roadmap and are documented later in this file.
109+
Each metric returns a normalized value in `[0, 1]` and is combined into a weighted total (see below).
100110

101111
### 3. Scoring System
102112

@@ -108,8 +118,13 @@ weighted = clamp0to10(
108118
const baseWeights = {
109119
install_success: 1.5,
110120
tests_nonregression: 2.5,
121+
build_success: 1.0,
122+
lint_success: 1.0,
123+
typecheck_success: 1.0,
111124
manager_correctness: 1.0,
112125
dependency_targets: 2.0,
126+
companion_alignment: 0.7,
127+
namespace_migrations: 0.7,
113128
integrity_guard: 1.5,
114129
};
115130

@@ -124,12 +139,13 @@ The harness calls `computeWeightedTotals`, which applies the base weights (or sc
124139
- `Evaluator` interface, shared types, and registry (`packages/evaluators/src/index.ts`).
125140
- Diff and dependency delta capture (`packages/harness/src/runtime/diff.ts`).
126141
- Command log capture for install/test/lint/typecheck (`runtime/validation.ts`).
127-
- Implemented evaluators: `InstallEvaluator`, `TestEvaluator`, `PackageManagerEvaluator`, `DependencyTargetsEvaluator`, `IntegrityGuardEvaluator`.
142+
- Implemented evaluators: `InstallEvaluator`, `TestEvaluator`, `BuildEvaluator`, `LintEvaluator`, `TypecheckEvaluator`, `PackageManagerEvaluator`, `DependencyTargetsEvaluator`, `CompanionAlignmentEvaluator`, `NamespaceMigrationEvaluator`, `IntegrityGuardEvaluator`.
128143
- Weighted aggregation in the harness (`computeWeightedTotals`).
129144

130-
### Phase 2: Advanced Analysis (🚧 Planned)
131-
- Additional deterministic checks: build/lint/typecheck evaluators, richer dependency insights.
132-
- Companion alignment and namespace migration evaluators that consume diff artifacts.
145+
### Phase 2: Advanced Analysis (✅ Complete)
146+
- ✅ Build/lint/typecheck evaluators for tooling validation.
147+
- ✅ Companion alignment evaluator for package version pairing.
148+
- ✅ Namespace migration evaluator that consumes diff artifacts.
133149
- Enhanced diff/metrics tooling (JSON deltas, per-file stats) for downstream use.
134150

135151
### Phase 3: Intelligence Layer (🚧 Planned)
@@ -159,12 +175,17 @@ node packages/harness/dist/cli.js run update-deps nx-pnpm-monorepo --tier L1 --a
159175
"scores": {
160176
"install_success": 1,
161177
"tests_nonregression": 1,
178+
"build_success": 1,
179+
"lint_success": 1,
180+
"typecheck_success": 1,
162181
"manager_correctness": 1,
163182
"dependency_targets": 0.3333333333,
183+
"companion_alignment": 1,
184+
"namespace_migrations": 1,
164185
"integrity_guard": 1
165186
},
166187
"totals": {
167-
"weighted": 8.0952,
188+
"weighted": 8.83,
168189
"max": 10
169190
},
170191
"telemetry": {
@@ -175,7 +196,15 @@ node packages/harness/dist/cli.js run update-deps nx-pnpm-monorepo --tier L1 --a
175196
},
176197
"evaluator_results": [
177198
{ "name": "InstallEvaluator", "score": 1, "details": "Install succeeded" },
178-
{ "name": "TestEvaluator", "score": 1, "details": "Tests passed (or none present)" }
199+
{ "name": "TestEvaluator", "score": 1, "details": "Tests passed (or none present)" },
200+
{ "name": "BuildEvaluator", "score": 1, "details": "No build command configured" },
201+
{ "name": "LintEvaluator", "score": 1, "details": "Lint passed" },
202+
{ "name": "TypecheckEvaluator", "score": 1, "details": "Typecheck passed" },
203+
{ "name": "PackageManagerEvaluator", "score": 1, "details": "Correct manager artifacts" },
204+
{ "name": "DependencyTargetsEvaluator", "score": 0.33, "details": "apps\\app\\package.json:typescript@missing !-> >=5.5 <6; apps\\app\\package.json:nx@missing !-> ~20.0" },
205+
{ "name": "CompanionAlignmentEvaluator", "score": 1, "details": "All companions aligned" },
206+
{ "name": "NamespaceMigrationEvaluator", "score": 1, "details": "All namespace migrations completed" },
207+
{ "name": "IntegrityGuardEvaluator", "score": 1, "details": "No integrity issues detected" }
179208
],
180209
"diff_summary": [
181210
{ "file": "pnpm-lock.yaml", "changeType": "added" }
@@ -396,11 +425,238 @@ class IntegrityGuardEvaluator implements Evaluator {
396425
}
397426
```
398427

428+
#### BuildEvaluator
429+
Validates that the build command (if configured) succeeds.
430+
431+
```typescript
432+
class BuildEvaluator implements Evaluator {
433+
meta = { name: 'BuildEvaluator' } as const;
434+
435+
async evaluate(ctx: EvaluationContext): Promise<EvaluatorResult> {
436+
const entry = (ctx.commandLog || []).find((command) => command.type === 'build');
437+
if (!entry) {
438+
return { name: this.meta.name, score: 1, details: 'No build command configured' };
439+
}
440+
const ok = entry.exitCode === 0;
441+
const details = ok ? 'Build succeeded' : `Build failed: exit=${entry.exitCode}`;
442+
return { name: this.meta.name, score: ok ? 1 : 0, details };
443+
}
444+
}
445+
```
446+
447+
#### LintEvaluator
448+
Validates that the lint command (if configured) passes.
449+
450+
```typescript
451+
class LintEvaluator implements Evaluator {
452+
meta = { name: 'LintEvaluator' } as const;
453+
454+
async evaluate(ctx: EvaluationContext): Promise<EvaluatorResult> {
455+
const entry = (ctx.commandLog || []).find((command) => command.type === 'lint');
456+
if (!entry) {
457+
return { name: this.meta.name, score: 1, details: 'No lint command configured' };
458+
}
459+
const ok = entry.exitCode === 0;
460+
const details = ok ? 'Lint passed' : `Lint failed: exit=${entry.exitCode}`;
461+
return { name: this.meta.name, score: ok ? 1 : 0, details };
462+
}
463+
}
464+
```
465+
466+
#### TypecheckEvaluator
467+
Validates that the typecheck command (if configured) succeeds.
468+
469+
```typescript
470+
class TypecheckEvaluator implements Evaluator {
471+
meta = { name: 'TypecheckEvaluator' } as const;
472+
473+
async evaluate(ctx: EvaluationContext): Promise<EvaluatorResult> {
474+
const entry = (ctx.commandLog || []).find((command) => command.type === 'typecheck');
475+
if (!entry) {
476+
return { name: this.meta.name, score: 1, details: 'No typecheck command configured' };
477+
}
478+
const ok = entry.exitCode === 0;
479+
const details = ok ? 'Typecheck passed' : `Typecheck failed: exit=${entry.exitCode}`;
480+
return { name: this.meta.name, score: ok ? 1 : 0, details };
481+
}
482+
}
483+
```
484+
485+
#### CompanionAlignmentEvaluator
486+
Ensures companion packages (e.g., `react` and `@types/react`) have aligned major versions.
487+
488+
```typescript
489+
function getMajorVersion(version: string | undefined): number | null {
490+
if (!version) return null;
491+
const cleaned = version.trim().replace(/^[^0-9]*/, '');
492+
const match = cleaned.match(/^(\d+)/);
493+
return match ? parseInt(match[1], 10) : null;
494+
}
495+
496+
class CompanionAlignmentEvaluator implements Evaluator {
497+
meta = { name: 'CompanionAlignmentEvaluator' } as const;
498+
499+
async evaluate(ctx: EvaluationContext): Promise<EvaluatorResult> {
500+
const companionRules = ctx.scenario.constraints?.companion_versions || [];
501+
if (!companionRules.length) {
502+
return { name: this.meta.name, score: 1, details: 'No companion version rules defined' };
503+
}
504+
505+
const pkgPaths = getAllPackageJsonPaths(ctx.workspaceDir);
506+
let total = 0;
507+
let aligned = 0;
508+
const misalignments: string[] = [];
509+
510+
for (const pkgPath of pkgPaths) {
511+
const rel = relative(ctx.workspaceDir, pkgPath) || '.';
512+
const pkg = readJson(pkgPath);
513+
514+
for (const rule of companionRules) {
515+
const mainVersion =
516+
pkg.dependencies?.[rule.main] ??
517+
pkg.devDependencies?.[rule.main] ??
518+
pkg.peerDependencies?.[rule.main];
519+
520+
if (!mainVersion) continue;
521+
522+
const mainMajor = getMajorVersion(mainVersion);
523+
524+
for (const companion of rule.companions) {
525+
total++;
526+
const companionVersion =
527+
pkg.dependencies?.[companion.name] ??
528+
pkg.devDependencies?.[companion.name] ??
529+
pkg.peerDependencies?.[companion.name];
530+
531+
const companionMajor = getMajorVersion(companionVersion);
532+
533+
if (companion.rule === 'major must match') {
534+
if (mainMajor !== null && companionMajor !== null && mainMajor === companionMajor) {
535+
aligned++;
536+
} else {
537+
misalignments.push(
538+
`${rel}: ${rule.main}@${mainVersion} vs ${companion.name}@${companionVersion ?? 'missing'} (major mismatch)`
539+
);
540+
}
541+
}
542+
}
543+
}
544+
}
545+
546+
const score = total > 0 ? aligned / total : 1;
547+
return {
548+
name: this.meta.name,
549+
score,
550+
details: misalignments.length ? misalignments.join('; ') : 'All companions aligned',
551+
};
552+
}
553+
}
554+
```
555+
556+
Configuration example in `scenario.yaml`:
557+
```yaml
558+
constraints:
559+
companion_versions:
560+
- main: node
561+
companions:
562+
- name: '@types/node'
563+
rule: 'major must match'
564+
- main: react
565+
companions:
566+
- name: '@types/react'
567+
rule: 'major must match'
568+
```
569+
570+
#### NamespaceMigrationEvaluator
571+
Verifies namespace migrations are complete in both dependencies and code imports.
572+
573+
```typescript
574+
class NamespaceMigrationEvaluator implements Evaluator {
575+
meta = { name: 'NamespaceMigrationEvaluator' } as const;
576+
577+
async evaluate(ctx: EvaluationContext): Promise<EvaluatorResult> {
578+
const migrations = ctx.scenario.constraints?.namespace_migrations || [];
579+
if (!migrations.length) {
580+
return { name: this.meta.name, score: 1, details: 'No namespace migrations defined' };
581+
}
582+
583+
const pkgPaths = getAllPackageJsonPaths(ctx.workspaceDir);
584+
let total = 0;
585+
let completed = 0;
586+
const issues: string[] = [];
587+
588+
for (const migration of migrations) {
589+
total++;
590+
let oldFound = false;
591+
let newFound = false;
592+
let codeIssues: string[] = [];
593+
594+
// Check package.json files
595+
for (const pkgPath of pkgPaths) {
596+
const rel = relative(ctx.workspaceDir, pkgPath) || '.';
597+
const pkg = readJson(pkgPath);
598+
599+
const oldInDeps =
600+
pkg.dependencies?.[migration.from] ??
601+
pkg.devDependencies?.[migration.from] ??
602+
pkg.peerDependencies?.[migration.from];
603+
604+
const newInDeps =
605+
pkg.dependencies?.[migration.to] ??
606+
pkg.devDependencies?.[migration.to] ??
607+
pkg.peerDependencies?.[migration.to];
608+
609+
if (oldInDeps) {
610+
oldFound = true;
611+
issues.push(`${rel} still has ${migration.from}`);
612+
}
613+
if (newInDeps) {
614+
newFound = true;
615+
}
616+
}
617+
618+
// Check code imports in diff
619+
for (const diff of ctx.diffSummary || []) {
620+
if (diff.textPatch?.includes(`from '${migration.from}'`) ||
621+
diff.textPatch?.includes(`from "${migration.from}"`) ||
622+
diff.textPatch?.includes(`require('${migration.from}')`)) {
623+
codeIssues.push(`${diff.file} imports ${migration.from}`);
624+
}
625+
}
626+
627+
if (!oldFound && newFound && codeIssues.length === 0) {
628+
completed++;
629+
} else {
630+
if (codeIssues.length) {
631+
issues.push(...codeIssues);
632+
}
633+
}
634+
}
635+
636+
const score = total > 0 ? completed / total : 1;
637+
return {
638+
name: this.meta.name,
639+
score,
640+
details: issues.length ? issues.join('; ') : 'All namespace migrations completed',
641+
};
642+
}
643+
}
644+
```
645+
646+
Configuration example in `scenario.yaml`:
647+
```yaml
648+
constraints:
649+
namespace_migrations:
650+
- from: 'xterm'
651+
to: '@xterm/xterm'
652+
- from: '@nrwl/js'
653+
to: '@nx/js'
654+
```
655+
399656
### Roadmap Evaluators
400657
- **EfficiencyEvaluator**: Would score turn count, duration, and tier bonuses once telemetry wiring lands.
401-
- **CompanionAlignmentEvaluator**: Pair companion packages (e.g., `@types/node``node`).
402-
- **NamespaceMigrationEvaluator**: Verify namespace migrations in both dependencies and code imports.
403658
- **Diff/Security/Performance Evaluators**: Analyze diffs and external advisories for qualitative assessments.
659+
- **Semantic Quality Evaluators**: Agentic evaluators for code quality, upgrade strategy, and migration semantics (see Phase 3 below).
404660
405661
## Testing Strategy
406662

0 commit comments

Comments
 (0)