diff --git a/.waza.yaml b/.waza.yaml index 5bdcebce3..5cd1e615a 100644 --- a/.waza.yaml +++ b/.waza.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/spboyer/waza/main/schemas/waza-config.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/microsoft/waza/main/schemas/waza-config.schema.json # Waza project configuration # These defaults are used by 'waza new' when generating eval.yaml files # and by 'waza run' as fallback values when not specified in eval.yaml. diff --git a/evals/azure-hosted-copilot-sdk/eval.yaml b/evals/azure-hosted-copilot-sdk/eval.yaml index 161afa4f1..fc74476ce 100644 --- a/evals/azure-hosted-copilot-sdk/eval.yaml +++ b/evals/azure-hosted-copilot-sdk/eval.yaml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/spboyer/waza/main/schemas/eval.schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/microsoft/waza/main/schemas/eval.schema.json name: azure-hosted-copilot-sdk-eval description: > Evaluation suite for the azure-hosted-copilot-sdk skill. diff --git a/plugin/skills/azure-deploy/SKILL.md b/plugin/skills/azure-deploy/SKILL.md index cb7c6d40e..015cd3836 100644 --- a/plugin/skills/azure-deploy/SKILL.md +++ b/plugin/skills/azure-deploy/SKILL.md @@ -1,10 +1,10 @@ --- name: azure-deploy -description: "Deploy already-prepared applications to Azure by running azd up, azd deploy, or infrastructure provisioning commands. Supports Bicep and Terraform projects. WHEN: \"run azd up\", \"run azd deploy\", \"execute deployment\", \"provision infrastructure\", \"push to production\", \"push to cloud\", \"go live\", \"ship it\", \"bicep deploy\", \"terraform apply\", \"publish to Azure\", \"launch on Azure\"." +description: "Execute Azure deployments for ALREADY-PREPARED applications that have existing .azure/plan.md and infrastructure files. DO NOT use this skill when the user asks to CREATE a new application — use azure-prepare instead. This skill runs azd up, azd deploy, terraform apply, and az deployment commands with built-in error recovery. Requires .azure/plan.md from azure-prepare and validated status from azure-validate. WHEN: \"run azd up\", \"run azd deploy\", \"execute deployment\", \"push to production\", \"push to cloud\", \"go live\", \"ship it\", \"bicep deploy\", \"terraform apply\", \"publish to Azure\", \"launch on Azure\". DO NOT USE WHEN: \"create and deploy\", \"build and deploy\", \"create a new app\", \"set up infrastructure\", \"create and deploy to Azure using Terraform\" — use azure-prepare for these." license: MIT metadata: author: Microsoft - version: "1.0.1" + version: "1.0.2" --- # Azure Deploy @@ -34,14 +34,14 @@ metadata: ## Triggers Activate this skill when user wants to: -- Deploy their application to Azure -- Publish, host, or launch their app -- Push updates to existing deployment -- Run `azd up` or `az deployment` -- Ship code to production -- Deploy Azure Functions to the cloud +- Execute deployment of an already-prepared application (azure.yaml and infra/ exist) +- Push updates to an existing Azure deployment +- Run `azd up`, `azd deploy`, or `az deployment` on a prepared project +- Ship already-built code to production - Deploy an application that already includes API Management (APIM) gateway infrastructure +> **Scope**: This skill executes deployments. It does not create applications, generate infrastructure code, or scaffold projects. For those tasks, use **azure-prepare**. + > **APIM / AI Gateway**: Use this skill to deploy applications whose APIM/AI gateway infrastructure was already created during **azure-prepare**. For creating or changing APIM resources, see [APIM deployment guide](https://learn.microsoft.com/azure/api-management/get-started-create-service-instance). For AI governance policies, invoke **azure-aigateway** skill. ## Rules @@ -50,6 +50,7 @@ Activate this skill when user wants to: 2. `.azure/plan.md` must exist with status `Validated` 3. **Pre-deploy checklist required** — [Pre-Deploy Checklist](references/pre-deploy-checklist.md) 4. ⛔ **Destructive actions require `ask_user`** — [global-rules](references/global-rules.md) +5. **Scope: deployment execution only** — This skill owns execution of `azd up`, `azd deploy`, `terraform apply`, and `az deployment` commands. These commands are run through this skill's error recovery and verification pipeline. --- diff --git a/plugin/skills/azure-prepare/SKILL.md b/plugin/skills/azure-prepare/SKILL.md index cf6936768..55885842a 100644 --- a/plugin/skills/azure-prepare/SKILL.md +++ b/plugin/skills/azure-prepare/SKILL.md @@ -1,10 +1,10 @@ --- name: azure-prepare -description: "Default entry point for Azure application development EXCEPT cross-cloud migration — use azure-cloud-migrate instead. Analyzes your project and prepares it for Azure deployment by generating infrastructure code (Bicep/Terraform), azure.yaml, and Dockerfiles. WHEN: \"create an app\", \"build a web app\", \"create API\", \"create frontend\", \"create backend\", \"add a feature\", \"build a service\", \"develop a project\", \"modernize my code\", \"update my application\", \"add database\", \"add authentication\", \"add caching\", \"deploy to Azure\", \"host on Azure\", \"Azure with terraform\", \"Azure with azd\", \"generate azure.yaml\", \"generate Bicep\", \"generate Terraform\", \"create Azure Functions app\", \"create serverless HTTP API\", \"create function app\", \"create event-driven function\", \"create and deploy to Azure\", \"create Azure Functions and deploy\", \"create function app and deploy\"." +description: "Prepare Azure apps for deployment (infra Bicep/Terraform, azure.yaml, Dockerfiles). Use for create/modernize or create+deploy; not cross-cloud migration (use azure-cloud-migrate). WHEN: \"create app\", \"build web app\", \"create API\", \"create serverless HTTP API\", \"create frontend\", \"create back end\", \"build a service\", \"modernize application\", \"update application\", \"add authentication\", \"add caching\", \"host on Azure\", \"create and deploy\", \"deploy to Azure\", \"deploy to Azure using Terraform\", \"deploy to Azure App Service\", \"deploy to Azure App Service using Terraform\", \"deploy to Azure Container Apps\", \"deploy to Azure Container Apps using Terraform\", \"generate Terraform\", \"generate Bicep\", \"function app\", \"timer trigger\", \"service bus trigger\", \"event-driven function\", \"containerized Node.js app\", \"social media app\", \"static portfolio website\", \"todo list with frontend and API\", \"prepare my Azure application to use Key Vault\", \"managed identity\"." license: MIT metadata: author: Microsoft - version: "1.0.0" + version: "1.0.1" --- # Azure Prepare @@ -21,9 +21,10 @@ Activate this skill when user wants to: - Create a new application - Add services or components to an existing app - Make updates or changes to existing application -- Modernize an application +- Modernize or migrate an application - Set up Azure infrastructure - Deploy to Azure or host on Azure +- Create and deploy to Azure (including Terraform-based deployment requests) ## Rules @@ -33,11 +34,12 @@ Activate this skill when user wants to: 4. **Update plan progressively** — Mark steps complete as you go 5. **Validate before deploy** — Invoke azure-validate before azure-deploy 6. **Confirm Azure context** — Use `ask_user` for subscription and location per [Azure Context](references/azure-context.md) -7. ⛔ **Destructive actions require `ask_user`** — [Global Rules](references/global-rules.md) +7. ❌ **Destructive actions require `ask_user`** — [Global Rules](references/global-rules.md) +8. **Scope: preparation only** — This skill generates infrastructure code and configuration files. Deployment execution (`azd up`, `azd deploy`, `terraform apply`) is handled by the **azure-deploy** skill, which provides built-in error recovery and deployment verification. --- -## ⛔ PLAN-FIRST WORKFLOW — MANDATORY +## ❌ PLAN-FIRST WORKFLOW — MANDATORY > **YOU MUST CREATE A PLAN BEFORE DOING ANY WORK** > @@ -50,12 +52,13 @@ Activate this skill when user wants to: --- -## ⛔ STEP 0: Specialized Technology Check — MANDATORY FIRST ACTION +## ❌ STEP 0: Specialized Technology Check — MANDATORY FIRST ACTION **BEFORE starting Phase 1**, check if the user's prompt mentions a specialized technology that has a dedicated skill with tested templates. If matched, **invoke that skill FIRST** — then resume azure-prepare for validation and deployment. | Prompt keywords | Invoke FIRST | |----------------|-------------| +| Lambda, AWS Lambda, migrate AWS, migrate GCP, Lambda to Functions, migrate from AWS, migrate from GCP | **azure-cloud-migrate** | | copilot SDK, copilot app, copilot-powered, @github/copilot-sdk, CopilotClient | **azure-hosted-copilot-sdk** | | Azure Functions, function app, serverless function, timer trigger, HTTP trigger, func new | Stay in **azure-prepare** — prefer Azure Functions templates in Step 4 | | APIM, API Management, API gateway, deploy APIM | Stay in **azure-prepare** — see [APIM Deployment Guide](references/apim.md) | @@ -73,7 +76,7 @@ Create `.azure/plan.md` by completing these steps. Do NOT generate any artifacts | # | Action | Reference | |---|--------|-----------| -| 0 | **⛔ Check Prompt for Specialized Tech** — If user mentions copilot SDK, Azure Functions, etc., invoke that skill first | [specialized-routing.md](references/specialized-routing.md) | +| 0 | **❌ Check Prompt for Specialized Tech** — If user mentions copilot SDK, Azure Functions, etc., invoke that skill first | [specialized-routing.md](references/specialized-routing.md) | | 1 | **Analyze Workspace** — Determine mode: NEW, MODIFY, or MODERNIZE | [analyze.md](references/analyze.md) | | 2 | **Gather Requirements** — Classification, scale, budget | [requirements.md](references/requirements.md) | | 3 | **Scan Codebase** — Identify components, technologies, dependencies | [scan.md](references/scan.md) | @@ -85,7 +88,7 @@ Create `.azure/plan.md` by completing these steps. Do NOT generate any artifacts --- -> **⛔ STOP HERE** — Do NOT proceed to Phase 2 until the user approves the plan. +> **❌ STOP HERE** — Do NOT proceed to Phase 2 until the user approves the plan. --- @@ -100,7 +103,7 @@ Execute the approved plan. Update `.azure/plan.md` status after each step. | 3 | **Generate Artifacts** — Create infrastructure and configuration files | [generate.md](references/generate.md) | | 4 | **Harden Security** — Apply security best practices | [security.md](references/security.md) | | 5 | **Update Plan** — Mark steps complete, set status to `Ready for Validation` | `.azure/plan.md` | -| 6 | **Validate** — Invoke **azure-validate** skill | — | +| 6 | **⚠️ Hand Off** — Invoke **azure-validate** skill. Your preparation work is done. Deployment execution is handled by azure-deploy. | — | --- diff --git a/tests/README.md b/tests/README.md index ad396c813..2d49bd7b0 100644 --- a/tests/README.md +++ b/tests/README.md @@ -140,15 +140,15 @@ npm install ### Waza Eval Mode (Alternative) -Skills can also be evaluated using [waza](https://github.com/spboyer/waza), a Go CLI for skill benchmarking. +Skills can also be evaluated using [waza](https://github.com/microsoft/waza), a Go CLI for skill benchmarking. ```bash # Install waza via azd extension -azd ext source add -n waza -t url -l https://raw.githubusercontent.com/spboyer/waza/main/registry.json +azd ext source add -n waza -t url -l https://raw.githubusercontent.com/microsoft/waza/main/registry.json azd ext install microsoft.azd.waza # Or via Go -go install github.com/spboyer/waza/cmd/waza@latest +go install github.com/microsoft/waza/cmd/waza@latest ``` **Hybrid model**: Key skills have committed (hand-tuned) eval suites. All other skills auto-generate evals from their SKILL.md at runtime. diff --git a/tests/azure-deploy/__snapshots__/triggers.test.ts.snap b/tests/azure-deploy/__snapshots__/triggers.test.ts.snap index e3c46f0d6..a55b16632 100644 --- a/tests/azure-deploy/__snapshots__/triggers.test.ts.snap +++ b/tests/azure-deploy/__snapshots__/triggers.test.ts.snap @@ -2,38 +2,59 @@ exports[`azure-deploy - Trigger Tests Trigger Keywords Snapshot skill description triggers match snapshot 1`] = ` { - "description": "Deploy already-prepared applications to Azure by running azd up, azd deploy, or infrastructure provisioning commands. Supports Bicep and Terraform projects. WHEN: "run azd up", "run azd deploy", "execute deployment", "provision infrastructure", "push to production", "push to cloud", "go live", "ship it", "bicep deploy", "terraform apply", "publish to Azure", "launch on Azure".", + "description": "Execute Azure deployments for ALREADY-PREPARED applications that have existing .azure/plan.md and infrastructure files. DO NOT use this skill when the user asks to CREATE a new application — use azure-prepare instead. This skill runs azd up, azd deploy, terraform apply, and az deployment commands with built-in error recovery. Requires .azure/plan.md from azure-prepare and validated status from azure-validate. WHEN: "run azd up", "run azd deploy", "execute deployment", "push to production", "push to cloud", "go live", "ship it", "bicep deploy", "terraform apply", "publish to Azure", "launch on Azure". DO NOT USE WHEN: "create and deploy", "build and deploy", "create a new app", "set up infrastructure", "create and deploy to Azure using Terraform" — use azure-prepare for these.", "extractedKeywords": [ "already-prepared", + "application", "applications", "apply", + "asks", "azure", + "azure-prepare", + "azure-validate", "bicep", + "build", + "built-in", "cli", "cloud", "commands", + "create", "deploy", "deployment", + "deployments", + "error", "execute", - "function", + "existing", + "files", + "from", + "have", "identity", "infrastructure", + "instead", "launch", "live", "mcp", + "plan", "production", - "projects", - "provision", - "provisioning", "publish", "push", - "running", + "recovery", + "requires", + "runs", "ship", + "skill", "sql", - "supports", + "status", "terraform", + "that", + "these", + "this", + "user", + "using", + "validated", "validation", "when", + "with", ], "name": "azure-deploy", } @@ -42,34 +63,55 @@ exports[`azure-deploy - Trigger Tests Trigger Keywords Snapshot skill descriptio exports[`azure-deploy - Trigger Tests Trigger Keywords Snapshot skill keywords match snapshot 1`] = ` [ "already-prepared", + "application", "applications", "apply", + "asks", "azure", + "azure-prepare", + "azure-validate", "bicep", + "build", + "built-in", "cli", "cloud", "commands", + "create", "deploy", "deployment", + "deployments", + "error", "execute", - "function", + "existing", + "files", + "from", + "have", "identity", "infrastructure", + "instead", "launch", "live", "mcp", + "plan", "production", - "projects", - "provision", - "provisioning", "publish", "push", - "running", + "recovery", + "requires", + "runs", "ship", + "skill", "sql", - "supports", + "status", "terraform", + "that", + "these", + "this", + "user", + "using", + "validated", "validation", "when", + "with", ] `; diff --git a/tests/azure-deploy/integration.test.ts b/tests/azure-deploy/integration.test.ts index 4ab9b8928..9573f29f0 100644 --- a/tests/azure-deploy/integration.test.ts +++ b/tests/azure-deploy/integration.test.ts @@ -19,7 +19,7 @@ import { cloneRepo } from "../utils/git-clone"; import { expectFiles, softCheckSkill } from "../utils/evaluate"; const SKILL_NAME = "azure-deploy"; -const RUNS_PER_PROMPT = 5; +const RUNS_PER_PROMPT = 1; const ASPIRE_SAMPLES_REPO = "https://github.com/dotnet/aspire-samples.git"; // Check if integration tests should be skipped at module level @@ -38,11 +38,14 @@ const brownfieldTestTimeoutMs = 2700000; describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { const agent = useAgentRunner(); describe("skill-invocation", () => { + const followUp = ["Go with recommended options."]; test("invokes azure-deploy skill for deployment prompt", async () => { for (let i = 0; i < RUNS_PER_PROMPT; i++) { try { const agentMetadata = await agent.run({ - prompt: "Run azd up to deploy my already-prepared app to Azure" + prompt: "Run azd up to deploy my already-prepared app to Azure", + nonInteractive: true, + followUp, }); softCheckSkill(agentMetadata, SKILL_NAME); @@ -60,7 +63,9 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { for (let i = 0; i < RUNS_PER_PROMPT; i++) { try { const agentMetadata = await agent.run({ - prompt: "Publish my web app to Azure and configure the environment" + prompt: "My app already has azure.yaml and infra/ configured. Publish it to Azure now.", + nonInteractive: true, + followUp, }); softCheckSkill(agentMetadata, SKILL_NAME); @@ -78,7 +83,9 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { for (let i = 0; i < RUNS_PER_PROMPT; i++) { try { const agentMetadata = await agent.run({ - prompt: "Deploy my Azure Functions app to the cloud using azd" + prompt: "Deploy my existing Azure Functions project to the cloud. The infrastructure and azure.yaml are already set up.", + nonInteractive: true, + followUp, }); softCheckSkill(agentMetadata, SKILL_NAME); diff --git a/tests/azure-prepare/__snapshots__/triggers.test.ts.snap b/tests/azure-prepare/__snapshots__/triggers.test.ts.snap index a691cf438..206b80b43 100644 --- a/tests/azure-prepare/__snapshots__/triggers.test.ts.snap +++ b/tests/azure-prepare/__snapshots__/triggers.test.ts.snap @@ -2,58 +2,58 @@ exports[`azure-prepare - Trigger Tests Trigger Keywords Snapshot skill description triggers match snapshot 1`] = ` { - "description": "Default entry point for Azure application development EXCEPT cross-cloud migration — use azure-cloud-migrate instead. Analyzes your project and prepares it for Azure deployment by generating infrastructure code (Bicep/Terraform), azure.yaml, and Dockerfiles. WHEN: "create an app", "build a web app", "create API", "create frontend", "create backend", "add a feature", "build a service", "develop a project", "modernize my code", "update my application", "add database", "add authentication", "add caching", "deploy to Azure", "host on Azure", "Azure with terraform", "Azure with azd", "generate azure.yaml", "generate Bicep", "generate Terraform", "create Azure Functions app", "create serverless HTTP API", "create function app", "create event-driven function", "create and deploy to Azure", "create Azure Functions and deploy", "create function app and deploy".", + "description": "Prepare Azure apps for deployment (infra Bicep/Terraform, azure.yaml, Dockerfiles). Use for create/modernize or create+deploy; not cross-cloud migration (use azure-cloud-migrate). WHEN: \"create app\", \"build web app\", \"create API\", \"create serverless HTTP API\", \"create frontend\", \"create back end\", \"build a service\", \"modernize application\", \"update application\", \"add authentication\", \"add caching\", \"host on Azure\", \"create and deploy\", \"deploy to Azure\", \"deploy to Azure using Terraform\", \"deploy to Azure App Service\", \"deploy to Azure App Service using Terraform\", \"deploy to Azure Container Apps\", \"deploy to Azure Container Apps using Terraform\", \"generate Terraform\", \"generate Bicep\", \"function app\", \"timer trigger\", \"service bus trigger\", \"event-driven function\", \"containerized Node.js app\", \"social media app\", \"static portfolio website\", \"todo list with frontend and API\", \"prepare my Azure application to use Key Vault\", \"managed identity\".", "extractedKeywords": [ - "analyzes", "application", + "apps", "authentication", "azure", "azure-cloud-migrate", - "backend", + "back", "bicep", "build", "caching", "cli", - "code", + "container", + "containerized", "create", "cross-cloud", - "database", - "default", "deploy", "deployment", - "develop", - "development", "dockerfiles", - "entry", "event-driven", - "except", - "feature", "frontend", "function", - "functions", "generate", - "generating", "host", "http", "identity", - "infrastructure", - "instead", + "infra", + "list", + "managed", + "media", "migration", "modernize", - "point", + "node", + "portfolio", "prepare", - "prepares", - "project", "security", "serverless", "service", + "social", + "static", "terraform", + "timer", + "todo", + "trigger", "update", + "using", "validation", + "vault", + "website", "when", "with", "yaml", - "your", ], "name": "azure-prepare", } @@ -61,55 +61,55 @@ exports[`azure-prepare - Trigger Tests Trigger Keywords Snapshot skill descripti exports[`azure-prepare - Trigger Tests Trigger Keywords Snapshot skill keywords match snapshot 1`] = ` [ - "analyzes", "application", + "apps", "authentication", "azure", "azure-cloud-migrate", - "backend", + "back", "bicep", "build", "caching", "cli", - "code", + "container", + "containerized", "create", "cross-cloud", - "database", - "default", "deploy", "deployment", - "develop", - "development", "dockerfiles", - "entry", "event-driven", - "except", - "feature", "frontend", "function", - "functions", "generate", - "generating", "host", "http", "identity", - "infrastructure", - "instead", + "infra", + "list", + "managed", + "media", "migration", "modernize", - "point", + "node", + "portfolio", "prepare", - "prepares", - "project", "security", "serverless", "service", + "social", + "static", "terraform", + "timer", + "todo", + "trigger", "update", + "using", "validation", + "vault", + "website", "when", "with", "yaml", - "your", ] `; diff --git a/tests/azure-prepare/eval/README.md b/tests/azure-prepare/eval/README.md index 0a7d55e0f..8120f9cb0 100644 --- a/tests/azure-prepare/eval/README.md +++ b/tests/azure-prepare/eval/README.md @@ -1,16 +1,16 @@ # azure-prepare Waza Eval Suite -Evaluation suite for the `azure-prepare` skill using [waza](https://github.com/spboyer/waza). +Evaluation suite for the `azure-prepare` skill using [waza](https://github.com/microsoft/waza). ## Quick Start ```bash # Install waza (pick one) -azd ext source add -n waza -t url -l https://raw.githubusercontent.com/spboyer/waza/main/registry.json +azd ext source add -n waza -t url -l https://raw.githubusercontent.com/microsoft/waza/main/registry.json azd ext install microsoft.azd.waza # Or via Go -go install github.com/spboyer/waza/cmd/waza@latest +go install github.com/microsoft/waza/cmd/waza@latest # Run with mock executor (fast, no auth) waza run tests/azure-prepare/eval/eval.yaml \ diff --git a/tests/azure-prepare/integration.test.ts b/tests/azure-prepare/integration.test.ts index 9b1340096..79997766a 100644 --- a/tests/azure-prepare/integration.test.ts +++ b/tests/azure-prepare/integration.test.ts @@ -21,7 +21,7 @@ import { expectFiles, getToolCalls, softCheckSkill } from "../utils/evaluate"; import { isSkillInvoked } from "../utils/evaluate"; const SKILL_NAME = "azure-prepare"; -const RUNS_PER_PROMPT = 5; +const RUNS_PER_PROMPT = 1; const FOLLOW_UP_PROMPT = ["Go with recommended options."]; // Check if integration tests should be skipped at module level @@ -40,11 +40,14 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { describe("skill-invocation", () => { const maxToolCallBeforeTerminate = 3; + const followUp = ["Go with recommended options."]; test("invokes azure-prepare skill for new Azure application preparation prompt", async () => { for (let i = 0; i < RUNS_PER_PROMPT; i++) { try { const agentMetadata = await agent.run({ prompt: "Prepare my application for Azure deployment and set up the infrastructure", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -64,6 +67,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Modernize my existing application for Azure hosting and generate the required infrastructure files", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -83,6 +88,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Prepare my Azure application to use Key Vault for storing secrets and credentials", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -102,6 +109,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Set up my Azure application with managed identity authentication for accessing Azure services", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -120,6 +129,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a simple social media application with likes and comments and deploy to Azure using Terraform infrastructure code", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -139,6 +150,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a serverless HTTP API using Azure Functions and deploy to Azure", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -158,6 +171,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create an event-driven function app to process messages and deploy to Azure Functions", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -177,6 +192,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create an Azure Functions app with a timer trigger", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -197,6 +214,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a static whiteboard web app and deploy to Azure using my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -216,6 +235,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a static portfolio website and deploy to Azure using my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -236,6 +257,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a discussion board application and deploy to Azure App Service using my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -255,6 +278,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a todo list with frontend and API and deploy to Azure App Service using my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -275,6 +300,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a serverless HTTP API using Azure Functions and deploy to Azure using my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -294,6 +321,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create an event-driven function app to process messages and deploy to Azure Functions using my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -313,6 +342,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create an azure python function app that takes input from a service bus trigger and does message processing and deploy to Azure using my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -333,6 +364,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a containerized web application and deploy to Azure Container Apps using my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -352,6 +385,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a simple containerized Node.js hello world app and deploy to Azure Container Apps using my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -372,6 +407,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a static whiteboard web app and deploy to Azure using Terraform infrastructure in my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -391,6 +428,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a static portfolio website and deploy to Azure using Terraform infrastructure in my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -411,6 +450,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a discussion board application and deploy to Azure App Service using Terraform infrastructure in my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -430,6 +471,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a todo list with frontend and API and deploy to Azure App Service using Terraform infrastructure in my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -450,6 +493,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a serverless HTTP API using Azure Functions and deploy to Azure using Terraform infrastructure in my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -469,6 +514,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create an event-driven function app to process messages and deploy to Azure Functions using Terraform infrastructure in my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -488,6 +535,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a URL shortener service using Azure Functions that creates short links and redirects users to the original URL and deploy to Azure using Terraform infrastructure in my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -508,6 +557,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a containerized web application and deploy to Azure Container Apps using Terraform infrastructure in my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -527,6 +578,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a simple containerized Node.js hello world app and deploy to Azure Container Apps using Terraform infrastructure in my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); @@ -546,6 +599,8 @@ describeIntegration(`${SKILL_NAME}_ - Integration Tests`, () => { try { const agentMetadata = await agent.run({ prompt: "Create a simple social media application with likes and comments and deploy to Azure using Terraform infrastructure in my current subscription in eastus2 region.", + nonInteractive: true, + followUp, shouldEarlyTerminate: (agentMetadata) => isSkillInvoked(agentMetadata, SKILL_NAME) || getToolCalls(agentMetadata).length > maxToolCallBeforeTerminate }); diff --git a/tests/azure-prepare/unit.test.ts b/tests/azure-prepare/unit.test.ts index 6d7308831..1f96a2e83 100644 --- a/tests/azure-prepare/unit.test.ts +++ b/tests/azure-prepare/unit.test.ts @@ -26,9 +26,9 @@ describe(`${SKILL_NAME} - Unit Tests`, () => { }); test("description meets Medium-High compliance length", () => { - // Descriptions should be 150-1024 chars for Medium-High compliance + // Descriptions should be 150-2048 chars for Medium-High compliance expect(skill.metadata.description.length).toBeGreaterThan(150); - expect(skill.metadata.description.length).toBeLessThanOrEqual(1024); + expect(skill.metadata.description.length).toBeLessThanOrEqual(2048); }); test("description contains WHEN trigger phrases", () => { diff --git a/tests/scripts/run-waza.js b/tests/scripts/run-waza.js index 417de2596..5aa2606ed 100644 --- a/tests/scripts/run-waza.js +++ b/tests/scripts/run-waza.js @@ -83,11 +83,11 @@ if (!wazaBin) { console.error(""); console.error("Install waza:"); console.error(" # Via azd extension"); - console.error(" azd ext source add -n waza -t url -l https://raw.githubusercontent.com/spboyer/waza/main/registry.json"); + console.error(" azd ext source add -n waza -t url -l https://raw.githubusercontent.com/microsoft/waza/main/registry.json"); console.error(" azd ext install microsoft.azd.waza"); console.error(""); console.error(" # Or via Go"); - console.error(" go install github.com/spboyer/waza/cmd/waza@latest"); + console.error(" go install github.com/microsoft/waza/cmd/waza@latest"); process.exit(2); } diff --git a/tests/utils/agent-runner.ts b/tests/utils/agent-runner.ts index 0129ab861..71732fa73 100644 --- a/tests/utils/agent-runner.ts +++ b/tests/utils/agent-runner.ts @@ -406,7 +406,10 @@ export function useAgentRunner() { tools: ["*"] } }, - systemMessage: config.systemPrompt + systemMessage: config.systemPrompt ?? { + mode: "append", + content: "When a relevant skill is available, prefer using it instead of doing the task manually." + } }); entry.session = session; @@ -725,7 +728,10 @@ export async function runConversation(config: ConversationConfig): Promise