feat(api): api update

stainless-app[bot] · stainless-app[bot] · commit 9ffc369129e9 · 2025-09-02T13:17:28.000Z
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 41
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-2d787a1d9fe261bee11bff3e707fcb9c957f759e397032b64241ed9703b98cae.yml
-openapi_spec_hash: 0675cf7a85dee80cbb0818d54af3fe33
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-e6175cde4c01ced1b2a1c844b130679941a125fece29d94599b25c3e087cdcaa.yml
+openapi_spec_hash: 0efc59469914406143931fed26446694
 config_hash: e2d1be538fd1fb65bfc566a2a168cc16
diff --git a/src/resources/evaluation.ts b/src/resources/evaluation.ts
@@ -141,7 +141,7 @@ export interface EvaluationRetrieveResponse {
   /**
    * The parameters used for this evaluation
    */
-  parameters?: unknown;
+  parameters?: { [key: string]: unknown };
 
   /**
    * Results of the evaluation (when completed)
@@ -213,10 +213,7 @@ export namespace EvaluationRetrieveResponse {
   }
 
   export interface EvaluationScoreResults {
-    /**
-     * Aggregated score statistics
-     */
-    aggregated_scores?: unknown;
+    aggregated_scores?: EvaluationScoreResults.AggregatedScores;
 
     /**
      * number of failed samples generated from model
@@ -244,6 +241,16 @@ export namespace EvaluationRetrieveResponse {
     result_file_id?: string;
   }
 
+  export namespace EvaluationScoreResults {
+    export interface AggregatedScores {
+      mean_score?: number;
+
+      pass_percentage?: number;
+
+      std_score?: number;
+    }
+  }
+
   export interface EvaluationCompareResults {
     /**
      * Number of times model A won
@@ -348,10 +355,7 @@ export namespace EvaluationGetStatusResponse {
   }
 
   export interface EvaluationScoreResults {
-    /**
-     * Aggregated score statistics
-     */
-    aggregated_scores?: unknown;
+    aggregated_scores?: EvaluationScoreResults.AggregatedScores;
 
     /**
      * number of failed samples generated from model
@@ -379,6 +383,16 @@ export namespace EvaluationGetStatusResponse {
     result_file_id?: string;
   }
 
+  export namespace EvaluationScoreResults {
+    export interface AggregatedScores {
+      mean_score?: number;
+
+      pass_percentage?: number;
+
+      std_score?: number;
+    }
+  }
+
   export interface EvaluationCompareResults {
     /**
      * Number of times model A won
@@ -527,10 +541,120 @@ export interface EvaluationUpdateStatusParams {
    */
   error?: string;
 
-  /**
-   * Job results (required when status is 'completed')
-   */
-  results?: unknown;
+  results?:
+    | EvaluationUpdateStatusParams.EvaluationClassifyResults
+    | EvaluationUpdateStatusParams.EvaluationScoreResults
+    | EvaluationUpdateStatusParams.EvaluationCompareResults;
+}
+
+export namespace EvaluationUpdateStatusParams {
+  export interface EvaluationClassifyResults {
+    /**
+     * Number of failed generations.
+     */
+    generation_fail_count?: number | null;
+
+    /**
+     * Number of invalid labels
+     */
+    invalid_label_count?: number | null;
+
+    /**
+     * Number of failed judge generations
+     */
+    judge_fail_count?: number | null;
+
+    /**
+     * JSON string representing label counts
+     */
+    label_counts?: string;
+
+    /**
+     * Pecentage of pass labels.
+     */
+    pass_percentage?: number | null;
+
+    /**
+     * Data File ID
+     */
+    result_file_id?: string;
+  }
+
+  export interface EvaluationScoreResults {
+    aggregated_scores?: EvaluationScoreResults.AggregatedScores;
+
+    /**
+     * number of failed samples generated from model
+     */
+    failed_samples?: number;
+
+    /**
+     * Number of failed generations.
+     */
+    generation_fail_count?: number | null;
+
+    /**
+     * number of invalid scores generated from model
+     */
+    invalid_score_count?: number;
+
+    /**
+     * Number of failed judge generations
+     */
+    judge_fail_count?: number | null;
+
+    /**
+     * Data File ID
+     */
+    result_file_id?: string;
+  }
+
+  export namespace EvaluationScoreResults {
+    export interface AggregatedScores {
+      mean_score?: number;
+
+      pass_percentage?: number;
+
+      std_score?: number;
+    }
+  }
+
+  export interface EvaluationCompareResults {
+    /**
+     * Number of times model A won
+     */
+    A_wins?: number;
+
+    /**
+     * Number of times model B won
+     */
+    B_wins?: number;
+
+    /**
+     * Number of failed generations.
+     */
+    generation_fail_count?: number | null;
+
+    /**
+     * Number of failed judge generations
+     */
+    judge_fail_count?: number | null;
+
+    /**
+     * Total number of samples compared
+     */
+    num_samples?: number;
+
+    /**
+     * Data File ID
+     */
+    result_file_id?: string;
+
+    /**
+     * Number of ties
+     */
+    Ties?: number;
+  }
 }
 
 export declare namespace Evaluation {
diff --git a/src/resources/evaluations.ts b/src/resources/evaluations.ts
@@ -40,7 +40,7 @@ export namespace EvaluationListResponse {
     /**
      * The parameters used for this evaluation
      */
-    parameters?: unknown;
+    parameters?: { [key: string]: unknown };
 
     /**
      * Results of the evaluation (when completed)
@@ -112,10 +112,7 @@ export namespace EvaluationListResponse {
     }
 
     export interface EvaluationScoreResults {
-      /**
-       * Aggregated score statistics
-       */
-      aggregated_scores?: unknown;
+      aggregated_scores?: EvaluationScoreResults.AggregatedScores;
 
       /**
        * number of failed samples generated from model
@@ -143,6 +140,16 @@ export namespace EvaluationListResponse {
       result_file_id?: string;
     }
 
+    export namespace EvaluationScoreResults {
+      export interface AggregatedScores {
+        mean_score?: number;
+
+        pass_percentage?: number;
+
+        std_score?: number;
+      }
+    }
+
     export interface EvaluationCompareResults {
       /**
        * Number of times model A won
diff --git a/tests/api-resources/evaluation.test.ts b/tests/api-resources/evaluation.test.ts
@@ -83,7 +83,14 @@ describe('resource evaluation', () => {
     const response = await client.evaluation.updateStatus('id', {
       status: 'completed',
       error: 'error',
-      results: {},
+      results: {
+        generation_fail_count: 0,
+        invalid_label_count: 0,
+        judge_fail_count: 0,
+        label_counts: '{"yes": 10, "no": 0}',
+        pass_percentage: 10,
+        result_file_id: 'file-1234-aefd',
+      },
     });
   });
 });