diff --git a/scripts/agent-evals/README.md b/scripts/agent-evals/README.md index 538bbf426d8..cd807ae9ddd 100644 --- a/scripts/agent-evals/README.md +++ b/scripts/agent-evals/README.md @@ -48,7 +48,15 @@ describe("", function (this: Mocha.Suite) { // Start the AgentTestRunner, which will start up the coding agent in a // pseudo-terminal, and wait for it to load the Firebase MCP server, and // start accepting keystrokes - const run: AgentTestRunner = await startAgentTest(this); + const run: AgentTestRunner = await startAgentTest(this, { + // Name of the template to run in. You can find the list of templates in + // src/template/index.ts (these will auto-complete) + templateName: "next-app-hello-world", + // List of tool mocks to apply for this test. You can find the list of + // available mocks in src/mock/tool-mocks.ts (these will auto-complete). + // See the instructions below on how to add your own mocks + toolMocks: ["nextJsWithProjectMock"], + }); // Simulate typing in the terminal. This will await until the "turn" is over // so any assertions on what happened will happen on the current "turn" @@ -79,6 +87,28 @@ describe("", function (this: Mocha.Suite) { }); ``` +## Adding Templates + +Templates let you run your tests inside of a folder with existing project files in it. e.g. you could add a template with an iOS app in it. + +1. Add the new template in a new folder `scripts/agent-evals/templates/` + +2. In `scripts/agent-evals/src/template/index.ts`, add the template name to the templates constant: + +``` +export const templates = [ + { + name: "", + platform: TemplatePlatform.NODE, + }, + ... +] as const; +``` + +3. Ensure you have a `.gitignore` for the template. For example, a Node.js template should ignore `node_modules` + +4. Set the `TemplatePlatform` for your template. To ensure the template is setup before each test run, we'll want to add a build command for the template. If you add a new `TemplatePlatform`, update the `buildTemplates()` function for it. For example, Node.js templates will run `npm install` before they are copied into the test directory. + ## Adding Mocks for MCP Tools Mocks applied to MCP tools will completely replace their impelementation with a static output string.