Skip to content

Commit 9ffc369

Browse files
feat(api): api update
1 parent 194633b commit 9ffc369

File tree

4 files changed

+159
-21
lines changed

4 files changed

+159
-21
lines changed

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 41
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-2d787a1d9fe261bee11bff3e707fcb9c957f759e397032b64241ed9703b98cae.yml
3-
openapi_spec_hash: 0675cf7a85dee80cbb0818d54af3fe33
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-e6175cde4c01ced1b2a1c844b130679941a125fece29d94599b25c3e087cdcaa.yml
3+
openapi_spec_hash: 0efc59469914406143931fed26446694
44
config_hash: e2d1be538fd1fb65bfc566a2a168cc16

src/resources/evaluation.ts

Lines changed: 137 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ export interface EvaluationRetrieveResponse {
141141
/**
142142
* The parameters used for this evaluation
143143
*/
144-
parameters?: unknown;
144+
parameters?: { [key: string]: unknown };
145145

146146
/**
147147
* Results of the evaluation (when completed)
@@ -213,10 +213,7 @@ export namespace EvaluationRetrieveResponse {
213213
}
214214

215215
export interface EvaluationScoreResults {
216-
/**
217-
* Aggregated score statistics
218-
*/
219-
aggregated_scores?: unknown;
216+
aggregated_scores?: EvaluationScoreResults.AggregatedScores;
220217

221218
/**
222219
* number of failed samples generated from model
@@ -244,6 +241,16 @@ export namespace EvaluationRetrieveResponse {
244241
result_file_id?: string;
245242
}
246243

244+
export namespace EvaluationScoreResults {
245+
export interface AggregatedScores {
246+
mean_score?: number;
247+
248+
pass_percentage?: number;
249+
250+
std_score?: number;
251+
}
252+
}
253+
247254
export interface EvaluationCompareResults {
248255
/**
249256
* Number of times model A won
@@ -348,10 +355,7 @@ export namespace EvaluationGetStatusResponse {
348355
}
349356

350357
export interface EvaluationScoreResults {
351-
/**
352-
* Aggregated score statistics
353-
*/
354-
aggregated_scores?: unknown;
358+
aggregated_scores?: EvaluationScoreResults.AggregatedScores;
355359

356360
/**
357361
* number of failed samples generated from model
@@ -379,6 +383,16 @@ export namespace EvaluationGetStatusResponse {
379383
result_file_id?: string;
380384
}
381385

386+
export namespace EvaluationScoreResults {
387+
export interface AggregatedScores {
388+
mean_score?: number;
389+
390+
pass_percentage?: number;
391+
392+
std_score?: number;
393+
}
394+
}
395+
382396
export interface EvaluationCompareResults {
383397
/**
384398
* Number of times model A won
@@ -527,10 +541,120 @@ export interface EvaluationUpdateStatusParams {
527541
*/
528542
error?: string;
529543

530-
/**
531-
* Job results (required when status is 'completed')
532-
*/
533-
results?: unknown;
544+
results?:
545+
| EvaluationUpdateStatusParams.EvaluationClassifyResults
546+
| EvaluationUpdateStatusParams.EvaluationScoreResults
547+
| EvaluationUpdateStatusParams.EvaluationCompareResults;
548+
}
549+
550+
export namespace EvaluationUpdateStatusParams {
551+
export interface EvaluationClassifyResults {
552+
/**
553+
* Number of failed generations.
554+
*/
555+
generation_fail_count?: number | null;
556+
557+
/**
558+
* Number of invalid labels
559+
*/
560+
invalid_label_count?: number | null;
561+
562+
/**
563+
* Number of failed judge generations
564+
*/
565+
judge_fail_count?: number | null;
566+
567+
/**
568+
* JSON string representing label counts
569+
*/
570+
label_counts?: string;
571+
572+
/**
573+
* Pecentage of pass labels.
574+
*/
575+
pass_percentage?: number | null;
576+
577+
/**
578+
* Data File ID
579+
*/
580+
result_file_id?: string;
581+
}
582+
583+
export interface EvaluationScoreResults {
584+
aggregated_scores?: EvaluationScoreResults.AggregatedScores;
585+
586+
/**
587+
* number of failed samples generated from model
588+
*/
589+
failed_samples?: number;
590+
591+
/**
592+
* Number of failed generations.
593+
*/
594+
generation_fail_count?: number | null;
595+
596+
/**
597+
* number of invalid scores generated from model
598+
*/
599+
invalid_score_count?: number;
600+
601+
/**
602+
* Number of failed judge generations
603+
*/
604+
judge_fail_count?: number | null;
605+
606+
/**
607+
* Data File ID
608+
*/
609+
result_file_id?: string;
610+
}
611+
612+
export namespace EvaluationScoreResults {
613+
export interface AggregatedScores {
614+
mean_score?: number;
615+
616+
pass_percentage?: number;
617+
618+
std_score?: number;
619+
}
620+
}
621+
622+
export interface EvaluationCompareResults {
623+
/**
624+
* Number of times model A won
625+
*/
626+
A_wins?: number;
627+
628+
/**
629+
* Number of times model B won
630+
*/
631+
B_wins?: number;
632+
633+
/**
634+
* Number of failed generations.
635+
*/
636+
generation_fail_count?: number | null;
637+
638+
/**
639+
* Number of failed judge generations
640+
*/
641+
judge_fail_count?: number | null;
642+
643+
/**
644+
* Total number of samples compared
645+
*/
646+
num_samples?: number;
647+
648+
/**
649+
* Data File ID
650+
*/
651+
result_file_id?: string;
652+
653+
/**
654+
* Number of ties
655+
*/
656+
Ties?: number;
657+
}
534658
}
535659

536660
export declare namespace Evaluation {

src/resources/evaluations.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ export namespace EvaluationListResponse {
4040
/**
4141
* The parameters used for this evaluation
4242
*/
43-
parameters?: unknown;
43+
parameters?: { [key: string]: unknown };
4444

4545
/**
4646
* Results of the evaluation (when completed)
@@ -112,10 +112,7 @@ export namespace EvaluationListResponse {
112112
}
113113

114114
export interface EvaluationScoreResults {
115-
/**
116-
* Aggregated score statistics
117-
*/
118-
aggregated_scores?: unknown;
115+
aggregated_scores?: EvaluationScoreResults.AggregatedScores;
119116

120117
/**
121118
* number of failed samples generated from model
@@ -143,6 +140,16 @@ export namespace EvaluationListResponse {
143140
result_file_id?: string;
144141
}
145142

143+
export namespace EvaluationScoreResults {
144+
export interface AggregatedScores {
145+
mean_score?: number;
146+
147+
pass_percentage?: number;
148+
149+
std_score?: number;
150+
}
151+
}
152+
146153
export interface EvaluationCompareResults {
147154
/**
148155
* Number of times model A won

tests/api-resources/evaluation.test.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,14 @@ describe('resource evaluation', () => {
8383
const response = await client.evaluation.updateStatus('id', {
8484
status: 'completed',
8585
error: 'error',
86-
results: {},
86+
results: {
87+
generation_fail_count: 0,
88+
invalid_label_count: 0,
89+
judge_fail_count: 0,
90+
label_counts: '{"yes": 10, "no": 0}',
91+
pass_percentage: 10,
92+
result_file_id: 'file-1234-aefd',
93+
},
8794
});
8895
});
8996
});

0 commit comments

Comments
 (0)