@@ -33,6 +33,7 @@ export const evalPlannerAgent = async (params: {
3333 initCommand,
3434 fileStates,
3535 } = params
36+ const plannerStartTime = Date . now ( )
3637 const result = await withTestRepo (
3738 { repoUrl, commitSha, initCommand, checkoutPrevious : true } ,
3839 async ( cwd ) => {
@@ -49,6 +50,7 @@ export const evalPlannerAgent = async (params: {
4950 } )
5051 } ,
5152 )
53+ const plannerLatencyMs = Date . now ( ) - plannerStartTime
5254
5355 const { output } = result
5456
@@ -137,6 +139,7 @@ Evaluate how well the implementation plan matches the real commit changes. Consi
137139 overallScore : 0 ,
138140 } ,
139141 agentOutput : outputString ,
142+ plannerLatencyMs,
140143 }
141144 }
142145 const { output : judgeOutput } = judgeResult
@@ -147,7 +150,7 @@ Evaluate how well the implementation plan matches the real commit changes. Consi
147150 overallScore : number
148151 }
149152
150- return { judgingResults, agentOutput : outputString }
153+ return { judgingResults, agentOutput : outputString , plannerLatencyMs }
151154}
152155
153156const judgeAgent : AgentDefinition = {
@@ -245,6 +248,7 @@ async function main() {
245248 cons : string
246249 overallScore : number
247250 }
251+ plannerLatencyMs : number
248252 } >
249253
250254 // Track statistics
@@ -253,6 +257,7 @@ async function main() {
253257 completed : 0 ,
254258 failed : 0 ,
255259 scores : [ ] as number [ ] ,
260+ plannerLatencies : [ ] as number [ ] ,
256261 }
257262
258263 // Loop through each eval task
@@ -274,12 +279,13 @@ async function main() {
274279 fileStates,
275280 } )
276281
277- const { judgingResults, agentOutput } = result
282+ const { judgingResults, agentOutput, plannerLatencyMs } = result
278283 allResults . push ( {
279284 sha,
280285 spec,
281286 agentOutput,
282287 judgingResults,
288+ plannerLatencyMs,
283289 } )
284290
285291 fs . writeFileSync (
@@ -310,10 +316,14 @@ async function main() {
310316 const emptyBar = '░' . repeat ( 10 - Math . floor ( overallScore / 10 ) )
311317 console . log ( `${ scoreBar } ${ emptyBar } ${ overallScore } /100` )
312318
319+ console . log ( '\n⏱️ LATENCY:' )
320+ console . log ( ` ${ ( plannerLatencyMs / 1000 ) . toFixed ( 2 ) } s` )
321+
313322 console . log ( '\n' + '=' . repeat ( 80 ) + '\n' )
314323
315324 stats . completed ++
316325 stats . scores . push ( overallScore )
326+ stats . plannerLatencies . push ( plannerLatencyMs )
317327 } catch ( error ) {
318328 console . log ( `\n${ '=' . repeat ( 80 ) } ` )
319329 console . error ( `✗ Failed eval for commit ${ sha } ` )
@@ -360,6 +370,22 @@ async function main() {
360370 )
361371 }
362372
373+ if ( stats . plannerLatencies . length > 0 ) {
374+ const avgPlannerLatency =
375+ stats . plannerLatencies . reduce ( ( a , b ) => a + b , 0 ) / stats . plannerLatencies . length
376+ const minPlannerLatency = Math . min ( ...stats . plannerLatencies )
377+ const maxPlannerLatency = Math . max ( ...stats . plannerLatencies )
378+ const medianPlannerLatency = stats . plannerLatencies . sort ( ( a , b ) => a - b ) [
379+ Math . floor ( stats . plannerLatencies . length / 2 )
380+ ]
381+
382+ console . log ( 'Latency Statistics:' )
383+ console . log ( ` Average: ${ ( avgPlannerLatency / 1000 ) . toFixed ( 2 ) } s` )
384+ console . log ( ` Median: ${ ( medianPlannerLatency / 1000 ) . toFixed ( 2 ) } s` )
385+ console . log ( ` Min: ${ ( minPlannerLatency / 1000 ) . toFixed ( 2 ) } s` )
386+ console . log ( ` Max: ${ ( maxPlannerLatency / 1000 ) . toFixed ( 2 ) } s\n` )
387+ }
388+
363389 console . log ( '=' . repeat ( 80 ) )
364390}
365391
0 commit comments