@@ -166,6 +166,26 @@ async def test_judge_input_output_binary_content_list_mock(mocker: MockerFixture
166166 assert image_content in raw_prompt , 'Expected the exact BinaryContent instance to be in the prompt list'
167167
168168
169+ async def test_judge_binary_output_mock (mocker : MockerFixture , image_content : BinaryContent ) -> None :
170+ """Test judge_output function when binary content is to be judged"""
171+ # Mock the agent run method
172+ mock_result = mocker .MagicMock ()
173+ mock_result .output = GradingOutput (reason = 'Test passed' , pass_ = True , score = 1.0 )
174+ mock_run = mocker .patch ('pydantic_ai.agent.AbstractAgent.run' , return_value = mock_result )
175+
176+ result = await judge_output (output = image_content , rubric = 'dummy rubric' )
177+ assert isinstance (result , GradingOutput )
178+ assert result .reason == 'Test passed'
179+ assert result .pass_ is True
180+ assert result .score == 1.0
181+
182+ # Verify the agent was called with correct prompt
183+ mock_run .assert_called_once ()
184+ call_args , * _ = mock_run .call_args
185+
186+ assert call_args == snapshot ((['<Output>' , image_content , '</Output>' , '<Rubric>' , 'dummy rubric' , '</Rubric>' ],))
187+
188+
169189async def test_judge_input_output_binary_content_mock (mocker : MockerFixture , image_content : BinaryContent ):
170190 """Test judge_input_output function with mocked agent."""
171191 # Mock the agent run method
@@ -237,10 +257,24 @@ async def test_judge_input_output_expected_mock(mocker: MockerFixture, image_con
237257
238258 # Verify the agent was called with correct prompt
239259 call_args = mock_run .call_args [0 ]
240- assert '<Input>\n Hello\n </Input>' in call_args [0 ]
241- assert '<ExpectedOutput>\n Hello\n </ExpectedOutput>' in call_args [0 ]
242- assert '<Output>\n Hello world\n </Output>' in call_args [0 ]
243- assert '<Rubric>\n Output contains input\n </Rubric>' in call_args [0 ]
260+ assert call_args == snapshot (
261+ (
262+ """\
263+ <Input>
264+ Hello
265+ </Input>
266+ <Output>
267+ Hello world
268+ </Output>
269+ <Rubric>
270+ Output contains input
271+ </Rubric>
272+ <ExpectedOutput>
273+ Hello
274+ </ExpectedOutput>\
275+ """ ,
276+ )
277+ )
244278
245279 result = await judge_input_output_expected (image_content , 'Hello world' , 'Hello' , 'Output contains input' )
246280 assert isinstance (result , GradingOutput )
@@ -249,10 +283,24 @@ async def test_judge_input_output_expected_mock(mocker: MockerFixture, image_con
249283 assert result .score == 1.0
250284
251285 call_args = mock_run .call_args [0 ]
252- assert image_content in call_args [0 ]
253- assert '<ExpectedOutput>\n Hello\n </ExpectedOutput>' in call_args [0 ]
254- assert '<Output>\n Hello world\n </Output>' in call_args [0 ]
255- assert '<Rubric>\n Output contains input\n </Rubric>' in call_args [0 ]
286+ assert call_args == snapshot (
287+ (
288+ [
289+ '<Input>' ,
290+ image_content ,
291+ '</Input>' ,
292+ '<Output>' ,
293+ 'Hello world' ,
294+ '</Output>' ,
295+ '<Rubric>' ,
296+ 'Output contains input' ,
297+ '</Rubric>' ,
298+ '<ExpectedOutput>' ,
299+ 'Hello' ,
300+ '</ExpectedOutput>' ,
301+ ],
302+ )
303+ )
256304
257305
258306@pytest .mark .anyio
@@ -279,10 +327,24 @@ async def test_judge_input_output_expected_with_model_settings_mock(
279327 assert result .score == 1.0
280328
281329 call_args , call_kwargs = mock_run .call_args
282- assert '<Input>\n Hello settings\n </Input>' in call_args [0 ]
283- assert '<ExpectedOutput>\n Hello\n </ExpectedOutput>' in call_args [0 ]
284- assert '<Output>\n Hello world with settings\n </Output>' in call_args [0 ]
285- assert '<Rubric>\n Output contains input with settings\n </Rubric>' in call_args [0 ]
330+ assert call_args == snapshot (
331+ (
332+ """\
333+ <Input>
334+ Hello settings
335+ </Input>
336+ <Output>
337+ Hello world with settings
338+ </Output>
339+ <Rubric>
340+ Output contains input with settings
341+ </Rubric>
342+ <ExpectedOutput>
343+ Hello
344+ </ExpectedOutput>\
345+ """ ,
346+ )
347+ )
286348 assert call_kwargs ['model_settings' ] == test_model_settings
287349 # Check if 'model' kwarg is passed, its value will be the default model or None
288350 assert 'model' in call_kwargs
@@ -301,10 +363,24 @@ async def test_judge_input_output_expected_with_model_settings_mock(
301363 assert result .score == 1.0
302364
303365 call_args , call_kwargs = mock_run .call_args
304- assert image_content in call_args [0 ]
305- assert '<ExpectedOutput>\n Hello\n </ExpectedOutput>' in call_args [0 ]
306- assert '<Output>\n Hello world with settings\n </Output>' in call_args [0 ]
307- assert '<Rubric>\n Output contains input with settings\n </Rubric>' in call_args [0 ]
366+ assert call_args == snapshot (
367+ (
368+ [
369+ '<Input>' ,
370+ image_content ,
371+ '</Input>' ,
372+ '<Output>' ,
373+ 'Hello world with settings' ,
374+ '</Output>' ,
375+ '<Rubric>' ,
376+ 'Output contains input with settings' ,
377+ '</Rubric>' ,
378+ '<ExpectedOutput>' ,
379+ 'Hello' ,
380+ '</ExpectedOutput>' ,
381+ ],
382+ )
383+ )
308384 assert call_kwargs ['model_settings' ] == test_model_settings
309385 # Check if 'model' kwarg is passed, its value will be the default model or None
310386 assert 'model' in call_kwargs
@@ -326,26 +402,20 @@ async def test_judge_input_output_expected_with_model_settings_mock(
326402
327403 assert call_args == snapshot (
328404 (
329- [
330- '<Input>\n ' ,
331- '123' ,
332- '</Input>' ,
333- """\
405+ """\
406+ <Input>
407+ 123
408+ </Input>
334409<Output>
335410Hello world with settings
336- </Output>\
337- """ ,
338- """\
411+ </Output>
339412<Rubric>
340413Output contains input with settings
341- </Rubric>\
342- """ ,
343- """\
414+ </Rubric>
344415<ExpectedOutput>
345416Hello
346417</ExpectedOutput>\
347418 """ ,
348- ],
349419 )
350420 )
351421
@@ -366,26 +436,20 @@ async def test_judge_input_output_expected_with_model_settings_mock(
366436
367437 assert call_args == snapshot (
368438 (
369- [
370- '<Input>\n ' ,
371- '123' ,
372- '</Input>' ,
373- """\
439+ """\
440+ <Input>
441+ 123
442+ </Input>
374443<Output>
375444Hello world with settings
376- </Output>\
377- """ ,
378- """\
445+ </Output>
379446<Rubric>
380447Output contains input with settings
381- </Rubric>\
382- """ ,
383- """\
448+ </Rubric>
384449<ExpectedOutput>
385450Hello
386451</ExpectedOutput>\
387452 """ ,
388- ],
389453 )
390454 )
391455
@@ -455,10 +519,21 @@ async def test_judge_output_expected_with_model_settings_mock(mocker: MockerFixt
455519 assert result .score == 1.0
456520
457521 call_args , call_kwargs = mock_run .call_args
458- assert '<Input>' not in call_args [0 ]
459- assert '<ExpectedOutput>\n Hello\n </ExpectedOutput>' in call_args [0 ]
460- assert '<Output>' in call_args [0 ]
461- assert '<Rubric>\n Output contains input with settings\n </Rubric>' in call_args [0 ]
522+ assert call_args == snapshot (
523+ (
524+ [
525+ '<Output>' ,
526+ image_content ,
527+ '</Output>' ,
528+ '<Rubric>' ,
529+ 'Output contains input with settings' ,
530+ '</Rubric>' ,
531+ '<ExpectedOutput>' ,
532+ 'Hello' ,
533+ '</ExpectedOutput>' ,
534+ ],
535+ )
536+ )
462537 assert call_kwargs ['model_settings' ] == test_model_settings
463538 # Check if 'model' kwarg is passed, its value will be the default model or None
464539 assert 'model' in call_kwargs
0 commit comments