Skip to content

Commit 0f280d2

Browse files
authored
Update Agent Evals to run in a home directory (#9427)
1 parent 659c558 commit 0f280d2

File tree

4 files changed

+55
-31
lines changed

4 files changed

+55
-31
lines changed

scripts/agent-evals/src/runner/gemini-cli-runner.ts

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@ import {
1010
} from "./tool-matcher.js";
1111
import fs from "fs";
1212
import { throwFailure } from "./logging.js";
13-
import { getAgentEvalsRoot } from "./paths.js";
13+
import { getAgentEvalsRoot, RunDirectories } from "./paths.js";
1414
import { execSync } from "node:child_process";
1515
import { ToolMockName } from "../mock/tool-mocks.js";
1616

1717
const READY_PROMPT = "Type your message";
18+
const INSTALL_ID = "238efa5b-efb2-44bd-9dce-9b081532681c";
1819

1920
interface ParsedTelemetryLog {
2021
attributes?: {
@@ -44,15 +45,16 @@ export class GeminiCliRunner implements AgentTestRunner {
4445

4546
constructor(
4647
private readonly testName: string,
47-
testDir: string,
48-
runDir: string,
48+
dirs: RunDirectories,
4949
toolMocks: ToolMockName[],
5050
) {
5151
// Create a settings file to point the CLI to a local telemetry log
52-
this.telemetryPath = path.join(testDir, "telemetry.log");
52+
this.telemetryPath = path.join(dirs.testDir, "telemetry.log");
5353
const mockPath = path.resolve(path.join(getAgentEvalsRoot(), "lib/mock/mock-tools-main.js"));
5454
const firebasePath = execSync("which firebase").toString().trim();
55-
const settings = {
55+
56+
// Write workspace Gemini Settings
57+
this.writeGeminiSettings(dirs.runDir, {
5658
general: {
5759
disableAutoUpdate: true,
5860
},
@@ -71,15 +73,29 @@ export class GeminiCliRunner implements AgentTestRunner {
7173
},
7274
},
7375
},
74-
};
75-
const geminiDir = path.join(runDir, ".gemini");
76-
mkdirSync(geminiDir, { recursive: true });
77-
writeFileSync(path.join(geminiDir, "settings.json"), JSON.stringify(settings, null, 2));
76+
});
77+
78+
// Write user Gemini Settings
79+
this.writeGeminiSettings(dirs.userDir, {
80+
security: {
81+
auth: {
82+
selectedType: "gemini-api-key",
83+
},
84+
},
85+
hasSeenIdeIntegrationNudge: true,
86+
});
87+
88+
this.writeGeminiInstallId(dirs.userDir);
7889

7990
this.cli = new InteractiveCLI("gemini", ["--yolo"], {
80-
cwd: runDir,
91+
cwd: dirs.runDir,
8192
readyPrompt: READY_PROMPT,
8293
showOutput: true,
94+
env: {
95+
// Overwrite $HOME so that we can support GCLI features that only apply
96+
// on a per-user basis, like memories and extensions
97+
HOME: dirs.userDir,
98+
},
8399
});
84100
}
85101

@@ -101,6 +117,21 @@ export class GeminiCliRunner implements AgentTestRunner {
101117
await this.cli.kill();
102118
}
103119

120+
writeGeminiSettings(dir: string, settings: any) {
121+
const geminiDir = path.join(dir, ".gemini");
122+
mkdirSync(geminiDir, { recursive: true });
123+
writeFileSync(path.join(geminiDir, "settings.json"), JSON.stringify(settings, null, 2));
124+
}
125+
126+
/**
127+
* Writes a constant, real install ID so that we don't bump Gemini metrics
128+
* with fake users
129+
*/
130+
writeGeminiInstallId(userDir: string) {
131+
const geminiDir = path.join(userDir, ".gemini");
132+
writeFileSync(path.join(geminiDir, "installation_id"), INSTALL_ID);
133+
}
134+
104135
/**
105136
* Reads the agent's telemetry file and looks for the given event. Throws if
106137
* the event is not found

scripts/agent-evals/src/runner/index.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@ import { randomBytes } from "node:crypto";
33
import { mkdirSync } from "node:fs";
44
import { AgentTestRunner } from "./agent-test-runner.js";
55
import { GeminiCliRunner } from "./gemini-cli-runner.js";
6-
import { buildFirebaseCli, clearUserMcpServers } from "./setup.js";
6+
import { buildFirebaseCli } from "./setup.js";
77
import { addCleanup } from "../helpers/cleanup.js";
88
import { TemplateName, copyTemplate, buildTemplates } from "../template/index.js";
99
import { ToolMockName } from "../mock/tool-mocks.js";
10+
import { RunDirectories } from "./paths.js";
1011

1112
export * from "./agent-test-runner.js";
1213

1314
const dateName = new Date().toISOString().replace("T", "_").replace(/:/g, "-").replace(".", "-");
1415

1516
export async function setupEnvironment(): Promise<void> {
1617
await buildFirebaseCli();
17-
await clearUserMcpServers();
1818
await buildTemplates();
1919
}
2020

@@ -35,13 +35,13 @@ export async function startAgentTest(
3535
throw new Error("startAgentTest must be called inside of an `it` block of a Mocha test.");
3636
}
3737
const testName = mocha.test.fullTitle();
38-
const { testDir, runDir } = createRunDirectory(testName);
38+
const dirs = createRunDirectory(testName);
3939

4040
if (options?.templateName) {
41-
copyTemplate(options.templateName, runDir);
41+
copyTemplate(options.templateName, dirs.runDir);
4242
}
4343

44-
const run = new GeminiCliRunner(testName, testDir, runDir, options?.toolMocks || []);
44+
const run = new GeminiCliRunner(testName, dirs, options?.toolMocks || []);
4545
await run.waitForReadyPrompt();
4646

4747
addCleanup(async () => {
@@ -51,12 +51,17 @@ export async function startAgentTest(
5151
return run;
5252
}
5353

54-
function createRunDirectory(testName: string): { testDir: string; runDir: string } {
54+
function createRunDirectory(testName: string): RunDirectories {
5555
const sanitizedName = testName.toLowerCase().replace(/[^a-z0-9]/g, "-");
5656
const testDir = path.resolve(
5757
path.join("output", dateName, `${sanitizedName}-${randomBytes(8).toString("hex")}`),
5858
);
59+
5960
const runDir = path.join(testDir, "repo");
6061
mkdirSync(runDir, { recursive: true });
61-
return { testDir, runDir };
62+
63+
const userDir = path.join(testDir, "user");
64+
mkdirSync(userDir, { recursive: true });
65+
66+
return { testDir, runDir, userDir };
6267
}

scripts/agent-evals/src/runner/paths.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import path from "path";
22
import { fileURLToPath } from "url";
33

4+
export type RunDirectories = { testDir: string; runDir: string; userDir: string };
5+
46
export function getAgentEvalsRoot(): string {
57
const thisFilePath = path.dirname(fileURLToPath(import.meta.url));
68
return path.resolve(path.join(thisFilePath, "..", ".."));

scripts/agent-evals/src/runner/setup.ts

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,3 @@ export async function buildFirebaseCli() {
1313
console.log(`Building Firebase CLI at ${firebaseCliRoot}`);
1414
await execPromise("./scripts/clean-install.sh", { cwd: firebaseCliRoot });
1515
}
16-
17-
export async function clearUserMcpServers() {
18-
console.log(`Clearing existing MCP servers...`);
19-
try {
20-
await execPromise("gemini extensions uninstall firebase");
21-
} catch (_: any) {
22-
/* This can fail if there's nothing installed, so ignore that */
23-
}
24-
try {
25-
await execPromise("gemini mcp remove firebase");
26-
} catch (_: any) {
27-
/* This can fail if there's nothing installed, so ignore that */
28-
}
29-
}

0 commit comments

Comments
 (0)