Skip to content

Commit a6046cd

Browse files
authored
refactor(component,ai,gemini): merge usage and usage-metadata fields into single usage field (#1126)
Because - We have the unified `usage` field for `TASK_CHAT` in different AI components - The Gemini component had redundant `usage` and `usage-metadata` fields that provided overlapping token usage information - The `usage` field was a simple object while `usage-metadata` contained detailed structured information This commit - Merges the `usage` and `usage-metadata` fields into a single comprehensive `usage` field - Updates the YAML schema to reference the detailed `usage-metadata` definition for the `usage` field - Removes the separate `usage-metadata` field from the output schema and Go structs - Updates all related code to populate and access usage information through the unified `usage` field - Maintains backward compatibility by preserving all detailed token usage statistics (prompt tokens, cached tokens, candidates tokens, etc.) with kebab-case naming
1 parent 3117046 commit a6046cd

File tree

4 files changed

+22
-30
lines changed

4 files changed

+22
-30
lines changed

pkg/component/ai/gemini/v0/config/tasks.yaml

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,7 +1649,7 @@ TASK_CHAT:
16491649
to fine-tune the LLM's output. Note that OpenAI and Mistral models don't have the `top-k` exposed.
16501650
type: integer
16511651
top-p:
1652-
uiOrder: 10
1652+
uiOrder: 12
16531653
title: Top-P
16541654
description: >-
16551655
A parameter, also known as nucleus sampling, that controls the randomness and creativity of the generated text by selecting a dynamic subset
@@ -1660,44 +1660,44 @@ TASK_CHAT:
16601660
choice to a smaller, more focused set of highly probable words, resulting in more factual and conservative output.
16611661
type: number
16621662
seed:
1663-
uiOrder: 12
1663+
uiOrder: 13
16641664
title: Seed
16651665
description: A random seed used to control the stochasticity of text generation to produce repeatable outputs
16661666
type: integer
16671667
contents:
1668-
uiOrder: 13
1668+
uiOrder: 14
16691669
title: Contents
16701670
description: The input contents to the model. Each item represents a user or model turn composed of parts (text or images).
16711671
type: array
16721672
items:
16731673
$ref: "#/$defs/content"
16741674
tools:
1675-
uiOrder: 14
1675+
uiOrder: 15
16761676
title: Tools
16771677
description: Tools available to the model, e.g., function declarations.
16781678
type: array
16791679
items:
16801680
$ref: "#/$defs/tool"
16811681
tool-config:
1682-
uiOrder: 15
1682+
uiOrder: 16
16831683
$ref: "#/$defs/tool-config"
16841684
safety-settings:
1685-
uiOrder: 16
1685+
uiOrder: 17
16861686
title: Safety Settings
16871687
description: Safety settings for content filtering.
16881688
type: array
16891689
items:
16901690
$ref: "#/$defs/safety-setting"
16911691
system-instruction:
1692-
uiOrder: 17
1692+
uiOrder: 18
16931693
title: System Instruction
16941694
description: A system instruction to guide the model behavior.
16951695
$ref: "#/$defs/content"
16961696
generation-config:
1697-
uiOrder: 18
1697+
uiOrder: 19
16981698
$ref: "#/$defs/generation-config"
16991699
cached-content:
1700-
uiOrder: 19
1700+
uiOrder: 20
17011701
title: Cached Content
17021702
description: "The name of a cached content to use as context. Format: cachedContents/{cachedContent}."
17031703
type: string
@@ -1732,9 +1732,9 @@ TASK_CHAT:
17321732
uiOrder: 2
17331733
title: Usage
17341734
description: >-
1735-
Token usage statistics: prompt tokens, completion tokens, total tokens, etc.
1736-
type: object
1737-
additionalProperties: true
1735+
Token usage statistics: prompt tokens, completion tokens, total tokens, etc. This field is a proxy of the original usage-metadata field in Gemini
1736+
API.
1737+
$ref: "#/$defs/usage-metadata"
17381738
candidates:
17391739
uiOrder: 3
17401740
title: Candidates
@@ -1745,19 +1745,16 @@ TASK_CHAT:
17451745
type: array
17461746
items:
17471747
$ref: "#/$defs/candidate"
1748-
usage-metadata:
1749-
uiOrder: 4
1750-
$ref: "#/$defs/usage-metadata"
17511748
prompt-feedback:
1752-
uiOrder: 5
1749+
uiOrder: 4
17531750
$ref: "#/$defs/prompt-feedback"
17541751
model-version:
1755-
uiOrder: 6
1752+
uiOrder: 5
17561753
title: Model Version
17571754
description: The model version used to generate the response.
17581755
type: string
17591756
response-id:
1760-
uiOrder: 7
1757+
uiOrder: 6
17611758
title: Response ID
17621759
description: Identifier for this response.
17631760
type: string

pkg/component/ai/gemini/v0/io.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ type TaskChatOutput struct {
6565

6666
// Use genai types directly with instill tags
6767
Candidates []*genai.Candidate `instill:"candidates"`
68-
UsageMetadata *genai.GenerateContentResponseUsageMetadata `instill:"usage-metadata"`
6968
PromptFeedback *genai.GenerateContentResponsePromptFeedback `instill:"prompt-feedback"`
7069
ModelVersion *string `instill:"model-version"`
7170
ResponseID *string `instill:"response-id"`

pkg/component/ai/gemini/v0/task_chat.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,15 +265,13 @@ func (e *execution) buildStreamOutput(texts []string, finalResp *genai.GenerateC
265265
Texts: texts,
266266
Usage: map[string]any{},
267267
Candidates: []*genai.Candidate{},
268-
UsageMetadata: nil,
269268
PromptFeedback: nil,
270269
ModelVersion: nil,
271270
ResponseID: nil,
272271
}
273272

274273
if finalResp != nil {
275274
streamOutput.Candidates = finalResp.Candidates
276-
streamOutput.UsageMetadata = finalResp.UsageMetadata
277275
streamOutput.PromptFeedback = finalResp.PromptFeedback
278276
if finalResp.ModelVersion != "" {
279277
mv := finalResp.ModelVersion
@@ -392,14 +390,12 @@ func renderFinal(resp *genai.GenerateContentResponse, texts []string) TaskChatOu
392390
Images: []format.Image{},
393391
Usage: map[string]any{},
394392
Candidates: []*genai.Candidate{},
395-
UsageMetadata: nil,
396393
PromptFeedback: nil,
397394
}
398395
if resp == nil {
399396
return out
400397
}
401398
out.Candidates = resp.Candidates
402-
out.UsageMetadata = resp.UsageMetadata
403399
out.PromptFeedback = resp.PromptFeedback
404400
if resp.ModelVersion != "" {
405401
mv := resp.ModelVersion

pkg/component/ai/gemini/v0/task_chat_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -780,7 +780,7 @@ func Test_renderFinal_Minimal(t *testing.T) {
780780
c.Check(*out.ModelVersion, qt.Equals, "v1")
781781
c.Check(out.ResponseID, qt.Not(qt.IsNil))
782782
c.Check(*out.ResponseID, qt.Equals, "resp-123")
783-
c.Check(out.UsageMetadata.TotalTokenCount, qt.Equals, int32(3))
783+
c.Check(out.Usage["total-token-count"], qt.Equals, int32(3))
784784
}
785785

786786
func Test_buildGenerateContentConfig_NoConfig(t *testing.T) {
@@ -1007,8 +1007,8 @@ func Test_buildStreamOutput(t *testing.T) {
10071007

10081008
c.Assert(got.Texts, qt.DeepEquals, texts)
10091009
c.Assert(got.Candidates, qt.HasLen, 2)
1010-
c.Assert(got.UsageMetadata, qt.Not(qt.IsNil))
1011-
c.Check(got.UsageMetadata.TotalTokenCount, qt.Equals, int32(15))
1010+
c.Assert(got.Usage, qt.Not(qt.IsNil))
1011+
c.Check(got.Usage["total-token-count"], qt.Equals, int32(15))
10121012
c.Assert(got.PromptFeedback, qt.Not(qt.IsNil))
10131013
c.Assert(got.ModelVersion, qt.Not(qt.IsNil))
10141014
c.Check(*got.ModelVersion, qt.Equals, "v1")
@@ -1082,7 +1082,7 @@ func Test_buildStreamOutput_InlineDataCleanup(t *testing.T) {
10821082
c.Check(got.Candidates[0].Content.Parts[0].Text, qt.Equals, "Here's an image")
10831083

10841084
// Verify other metadata is preserved
1085-
c.Check(got.UsageMetadata.TotalTokenCount, qt.Equals, int32(15))
1085+
c.Check(got.Usage["total-token-count"], qt.Equals, int32(15))
10861086
c.Assert(got.ModelVersion, qt.Not(qt.IsNil))
10871087
c.Check(*got.ModelVersion, qt.Equals, "v1")
10881088
c.Assert(got.ResponseID, qt.Not(qt.IsNil))
@@ -1254,7 +1254,7 @@ func Test_renderFinal_WithInlineData(t *testing.T) {
12541254
c.Check(got.Texts[0], qt.Equals, "Here's an image")
12551255

12561256
// Verify other metadata is preserved
1257-
c.Check(got.UsageMetadata.TotalTokenCount, qt.Equals, int32(15))
1257+
c.Check(got.Usage["total-token-count"], qt.Equals, int32(15))
12581258
c.Assert(got.ModelVersion, qt.Not(qt.IsNil))
12591259
c.Check(*got.ModelVersion, qt.Equals, "v1")
12601260
c.Assert(got.ResponseID, qt.Not(qt.IsNil))
@@ -1757,8 +1757,8 @@ func TestImageGeneration(t *testing.T) {
17571757
// Check that texts are preserved
17581758
c.Check(result.Texts, qt.DeepEquals, texts)
17591759

1760-
// Check that images are NOT extracted during streaming (deferred to renderFinal)
1761-
c.Check(result.Images, qt.HasLen, 0)
1760+
// Check that images are extracted during streaming
1761+
c.Check(result.Images, qt.HasLen, 1)
17621762
})
17631763

17641764
t.Run("renderFinal with mixed content", func(t *testing.T) {

0 commit comments

Comments
 (0)