@@ -9,12 +9,15 @@ import (
99 openai "github.com/gptscript-ai/chat-completion-client"
1010)
1111
12- const instructions = `When given JSON objects that conform to the following JSONSchema :
12+ const instructions = `"actual" is considered equivalent to "expected" if and only if the following rules are satisfied :
1313
1414%s
1515
16- Determine if "actual" is equal to "expected" based on the comparison constraints described by "criteria".
17- "actual" is considered equal to "expected" if and only if the all of the constraints described by "criteria" are satisfied.
16+ When given JSON objects that conform to the following JSONSchema:
17+
18+ %s
19+
20+ Determine if "actual" is considered equivalent to "expected".
1821
1922After making a determination, respond with a JSON object that conforms to the following JSONSchema:
2023
@@ -28,7 +31,7 @@ After making a determination, respond with a JSON object that conforms to the fo
2831 },
2932 "reasoning": {
3033 "type": "string",
31- "description": "The reasoning used to come to the determination, that points out all instances where the given criteria was violated "
34+ "description": "The reasoning used to come to the determination"
3235 }
3336 },
3437 "required": [
@@ -41,14 +44,13 @@ Your responses are concise and include only the json object described above.
4144`
4245
4346type Judge [T any ] struct {
44- client * openai.Client
45- instructions string
47+ client * openai.Client
48+ comparisonSchema string
4649}
4750
4851type comparison [T any ] struct {
49- Expected T `json:"expected"`
50- Actual T `json:"actual"`
51- Criteria string `json:"criteria"`
52+ Expected T `json:"expected"`
53+ Actual T `json:"actual"`
5254}
5355
5456type ruling struct {
@@ -70,22 +72,21 @@ func New[T any](client *openai.Client) (*Judge[T], error) {
7072 return nil , fmt .Errorf ("failed to generate JSONSchema for %T: %w" , new (T ), err )
7173 }
7274
73- schemaJSON , err := json .MarshalIndent (schema , "" , " " )
75+ marshaled , err := json .MarshalIndent (schema , "" , " " )
7476 if err != nil {
7577 return nil , fmt .Errorf ("failed to marshal JSONSchema for %T: %w" , new (T ), err )
7678 }
7779
7880 return & Judge [T ]{
79- client : client ,
80- instructions : fmt . Sprintf ( instructions , schemaJSON ),
81+ client : client ,
82+ comparisonSchema : string ( marshaled ),
8183 }, nil
8284}
8385
8486func (j * Judge [T ]) Equal (ctx context.Context , expected , actual T , criteria string ) (equal bool , reasoning string , err error ) {
8587 comparisonJSON , err := json .MarshalIndent (& comparison [T ]{
8688 Expected : expected ,
8789 Actual : actual ,
88- Criteria : criteria ,
8990 }, "" , " " )
9091 if err != nil {
9192 return false , "" , fmt .Errorf ("failed to marshal judge testcase JSON: %w" , err )
@@ -101,7 +102,7 @@ func (j *Judge[T]) Equal(ctx context.Context, expected, actual T, criteria strin
101102 Messages : []openai.ChatCompletionMessage {
102103 {
103104 Role : openai .ChatMessageRoleSystem ,
104- Content : j . instructions ,
105+ Content : fmt . Sprintf ( instructions , criteria , j . comparisonSchema ) ,
105106 },
106107 {
107108 Role : openai .ChatMessageRoleUser ,
@@ -111,11 +112,11 @@ func (j *Judge[T]) Equal(ctx context.Context, expected, actual T, criteria strin
111112 }
112113 response , err := j .client .CreateChatCompletion (ctx , request )
113114 if err != nil {
114- return false , "" , fmt .Errorf ("failed to make judge chat completion request: %w" , err )
115+ return false , "" , fmt .Errorf ("failed to create chat completion request: %w" , err )
115116 }
116117
117118 if len (response .Choices ) < 1 {
118- return false , "" , fmt .Errorf ("judge chat completion request returned no choices" )
119+ return false , "" , fmt .Errorf ("chat completion request returned no choices" )
119120 }
120121
121122 var equality ruling
0 commit comments