Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added examples/live/live_prompt
Binary file not shown.
198 changes: 198 additions & 0 deletions examples/live/live_prompt.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build ignore_vet

package main

import (
"context"
"encoding/binary"
"flag"
"fmt"
"io"
"log"
"os"

"google.golang.org/genai"
)

var voiceSample = flag.String("voice-sample", "", "Path to voice sample file")
var voiceConsent = flag.String("voice-consent", "", "Path to voice consent file")
var voiceSignature = flag.String("voice-signature", "", "Voice consent signature")
var modelFlag = flag.String("model", "", "Model name")
var promptFlag = flag.String("prompt", "Hello Gemini, are you there?", "Text prompt for testing")

func main() {
flag.Parse()
log.SetFlags(0)

if *promptFlag == "" {
log.Fatal("--prompt must be specified")
}

var voiceSampleAudio []byte
var consentAudio []byte

if *voiceSample != "" {
var err error
voiceSampleAudio, err = os.ReadFile(*voiceSample)
if err != nil {
log.Fatal("read voice sample error: ", err)
}
if *voiceConsent != "" {
consentAudio, err = os.ReadFile(*voiceConsent)
if err != nil {
log.Fatal("read voice consent error: ", err)
}
}
if len(consentAudio) == 0 && *voiceSignature == "" {
log.Fatal("Either --voice-consent or --voice-signature must be provided when --voice-sample is used.")
}
}

ctx := context.Background()
client, err := genai.NewClient(ctx, nil)
if err != nil {
log.Fatal("create client error: ", err)
}

var model string
if *modelFlag != "" {
model = *modelFlag
} else if client.ClientConfig().Backend == genai.BackendVertexAI {
model = "gemini-2.0-flash-live-preview-04-09"
} else {
model = "gemini-live-2.5-flash-preview"
}

config := &genai.LiveConnectConfig{}
config.ResponseModalities = []genai.Modality{genai.ModalityAudio}

if len(voiceSampleAudio) > 0 {
replicatedConfig := &genai.ReplicatedVoiceConfig{
MIMEType: "audio/wav",
VoiceSampleAudio: voiceSampleAudio,
}
if len(consentAudio) > 0 {
replicatedConfig.ConsentAudio = consentAudio
}
if *voiceSignature != "" {
replicatedConfig.VoiceConsentSignature = &genai.VoiceConsentSignature{
Signature: *voiceSignature,
}
}
config.SpeechConfig = &genai.SpeechConfig{
VoiceConfig: &genai.VoiceConfig{
ReplicatedVoiceConfig: replicatedConfig,
},
}
}

session, err := client.Live.Connect(ctx, model, config)
if err != nil {
log.Fatal("connect to model error: ", err)
}
defer session.Close()

// Read SetupComplete
setupMsg, err := session.Receive()
if err != nil {
log.Fatal("receive setup complete error: ", err)
}
if setupMsg.SetupComplete != nil && setupMsg.SetupComplete.VoiceConsentSignature != nil {
log.Printf("\n=== Voice Consent Signature Received ===\n%s\n========================================\n", setupMsg.SetupComplete.VoiceConsentSignature.Signature)
}

fmt.Println("Sending prompt:", *promptFlag)
err = session.SendRealtimeInput(genai.LiveRealtimeInput{
Text: *promptFlag,
})
if err != nil {
log.Fatal("send prompt error: ", err)
}

var audioData []byte
for {
msg, err := session.Receive()
if err == io.EOF {
break
}
if err != nil {
log.Fatal("receive error: ", err)
}

if msg.ServerContent != nil {
content := msg.ServerContent
if content.TurnComplete {
break
}
if content.ModelTurn != nil {
for _, part := range content.ModelTurn.Parts {
if part.InlineData != nil && part.InlineData.Data != nil {
audioData = append(audioData, part.InlineData.Data...)
fmt.Printf("Received audio chunk: %d bytes\n", len(part.InlineData.Data))
}
}
}
}
}

if len(audioData) > 0 {
err = saveWav(audioData, "output.wav")
if err != nil {
log.Fatal("save wav error: ", err)
}
} else {
fmt.Println("No audio data received.")
}
}

func saveWav(data []byte, filename string) error {
f, err := os.Create(filename)
if err != nil {
return err
}
defer f.Close()

// WAV header
// Assume 24kHz, 16-bit, mono as per ReplicatedVoiceConfig spec.
sampleRate := uint32(24000)
bitsPerSample := uint16(16)
channels := uint16(1)
byteRate := sampleRate * uint32(channels) * uint32(bitsPerSample) / 8

// RIFF header
f.Write([]byte("RIFF"))
binary.Write(f, binary.LittleEndian, uint32(36+len(data)))
f.Write([]byte("WAVE"))

// fmt chunk
f.Write([]byte("fmt "))
binary.Write(f, binary.LittleEndian, uint32(16))
binary.Write(f, binary.LittleEndian, uint16(1)) // PCM
binary.Write(f, binary.LittleEndian, channels)
binary.Write(f, binary.LittleEndian, sampleRate)
binary.Write(f, binary.LittleEndian, byteRate)
binary.Write(f, binary.LittleEndian, uint16(channels*bitsPerSample/8))
binary.Write(f, binary.LittleEndian, bitsPerSample)

// data chunk
f.Write([]byte("data"))
binary.Write(f, binary.LittleEndian, uint32(len(data)))
f.Write(data)

fmt.Println("Saved audio response to", filename)
return nil
}
Binary file added examples/live/live_streaming_server
Binary file not shown.
73 changes: 71 additions & 2 deletions examples/live/live_streaming_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ import (
)

var addr = flag.String("addr", "localhost:8080", "http service address")
var voiceSample = flag.String("voice-sample", "", "Path to voice sample file")
var voiceConsent = flag.String("voice-consent", "", "Path to voice consent file")
var voiceSignature = flag.String("voice-signature", "", "Voice consent signature")
var modelFlag = flag.String("model", "", "Model name")

var voiceSampleAudio []byte
var consentAudio []byte

var upgrader = websocket.Upgrader{} // use default options

Expand Down Expand Up @@ -66,18 +73,61 @@ func live(w http.ResponseWriter, r *http.Request) {
}

var model string
if client.ClientConfig().Backend == genai.BackendVertexAI {
if *modelFlag != "" {
model = *modelFlag
} else if client.ClientConfig().Backend == genai.BackendVertexAI {
model = "gemini-2.0-flash-live-preview-04-09"
} else {
model = "gemini-live-2.5-flash-preview"
}

config := &genai.LiveConnectConfig{}
config.ResponseModalities = []genai.Modality{genai.ModalityAudio}
if len(voiceSampleAudio) > 0 {
replicatedConfig := &genai.ReplicatedVoiceConfig{
MIMEType: "audio/wav",
VoiceSampleAudio: voiceSampleAudio,
}
if len(consentAudio) > 0 {
replicatedConfig.ConsentAudio = consentAudio
}
if *voiceSignature != "" {
replicatedConfig.VoiceConsentSignature = &genai.VoiceConsentSignature{
Signature: *voiceSignature,
}
}
config.SpeechConfig = &genai.SpeechConfig{
VoiceConfig: &genai.VoiceConfig{
ReplicatedVoiceConfig: replicatedConfig,
},
}
}

// Establish the live WebSocket connection with the specified GenAI model.
session, err := client.Live.Connect(ctx, model, &genai.LiveConnectConfig{})
session, err := client.Live.Connect(ctx, model, config)
if err != nil {
// Log fatal error if connecting to the model fails (e.g., network issues, invalid model name).
log.Fatal("connect to model error: ", err)
}

// Read the first message which should be SetupComplete
setupMsg, err := session.Receive()
if err != nil {
log.Fatal("receive setup complete error: ", err)
}
if setupMsg.SetupComplete != nil && setupMsg.SetupComplete.VoiceConsentSignature != nil {
log.Printf("\n=== Voice Consent Signature Received ===\n%s\n========================================\n", setupMsg.SetupComplete.VoiceConsentSignature.Signature)
}

// Forward SetupComplete to client
setupBytes, err := json.Marshal(setupMsg)
if err != nil {
log.Fatal("marshal setup complete error: ", err)
}
err = c.WriteMessage(websocket.TextMessage, setupBytes)
if err != nil {
log.Println("write setup complete error: ", err)
}
defer session.Close() // Ensure session is closed when the handler exits

// Goroutine to receive messages from the GenAI service and send to the client
Expand Down Expand Up @@ -176,6 +226,24 @@ func proxyVideo(w http.ResponseWriter, r *http.Request) {
func main() {
flag.Parse()
log.SetFlags(0)

if *voiceSample != "" {
var err error
voiceSampleAudio, err = os.ReadFile(*voiceSample)
if err != nil {
log.Fatal("read voice sample error: ", err)
}
if *voiceConsent != "" {
consentAudio, err = os.ReadFile(*voiceConsent)
if err != nil {
log.Fatal("read voice consent error: ", err)
}
}
if len(consentAudio) == 0 && *voiceSignature == "" {
log.Fatal("Either --voice-consent or --voice-signature must be provided when --voice-sample is used.")
}
}

http.HandleFunc("/", homePage)
http.HandleFunc("/live", live)
http.HandleFunc("/proxyVideo", proxyVideo)
Expand All @@ -194,3 +262,4 @@ func main() {
log.Fatal(err)
}
}

Binary file added examples/live/output.wav
Binary file not shown.
1 change: 1 addition & 0 deletions live.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ func (r *Live) Connect(context context.Context, model string, config *LiveConnec
if err != nil {
return nil, fmt.Errorf("failed to write LiveClientSetup: %w", err)
}

return s, nil
}

Expand Down
5 changes: 5 additions & 0 deletions live_converters.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions live_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ func TestLiveConnect(t *testing.T) {
t.Errorf("Connect() error message = %v, wantErrMessage %v", err.Error(), tt.wantErrMessage)
return
}

defer session.Close()
})
}
Expand Down
Loading
Loading