diff --git a/api-reference/sdk/go/api-reference.mdx b/api-reference/sdk/go/api-reference.mdx new file mode 100644 index 0000000..8992a2c --- /dev/null +++ b/api-reference/sdk/go/api-reference.mdx @@ -0,0 +1,1083 @@ +--- +title: "API Reference" +description: "Complete reference for Fish Audio Go SDK" +icon: "book" +--- + + + +# fishaudio + +```go +import "github.com/fishaudio/fish-audio-go" +``` + + +## Constants + + + +```go +const ( + // DefaultBaseURL is the default Fish Audio API base URL. + DefaultBaseURL = "https://api.fish.audio" + + // DefaultTimeout is the default request timeout. + DefaultTimeout = 240 * time.Second + + // Version is the SDK version. + Version = "0.1.0" +) +``` + + +## type [APIError]() + +APIError is raised when the API returns an error response. + +```go +type APIError struct { + StatusCode int + Message string + Body string +} +``` + + +### func \(\*APIError\) [Error]() + +```go +func (e *APIError) Error() string +``` + + + + +### func \(\*APIError\) [IsFishAudioError]() + +```go +func (e *APIError) IsFishAudioError() +``` + + + + +## type [ASRResponse]() + +ASRResponse contains the result of speech\-to\-text transcription. + +```go +type ASRResponse struct { + // Text is the complete transcription of the audio. + Text string `json:"text"` + // Duration is the total audio duration in milliseconds. + Duration float64 `json:"duration"` + // Segments contains timestamped text segments. + Segments []ASRSegment `json:"segments"` +} +``` + + +## type [ASRSegment]() + +ASRSegment represents a timestamped segment of transcribed text. + +```go +type ASRSegment struct { + // Text is the transcribed text for this segment. + Text string `json:"text"` + // Start is the segment start time in seconds. + Start float64 `json:"start"` + // End is the segment end time in seconds. + End float64 `json:"end"` +} +``` + + +## type [ASRService]() + +ASRService provides speech\-to\-text operations. + +```go +type ASRService struct { + // contains filtered or unexported fields +} +``` + + +### func \(\*ASRService\) [Transcribe]() + +```go +func (s *ASRService) Transcribe(ctx context.Context, audio []byte, params *TranscribeParams) (*ASRResponse, error) +``` + +Transcribe converts audio to text. + +Example: + +``` +audio, _ := os.ReadFile("audio.mp3") +result, err := client.ASR.Transcribe(ctx, audio, &fishaudio.TranscribeParams{ + Language: "en", +}) +fmt.Println(result.Text) +``` + + +## type [AccountService]() + +AccountService provides account and billing operations. + +```go +type AccountService struct { + // contains filtered or unexported fields +} +``` + + +### func \(\*AccountService\) [GetCredits]() + +```go +func (s *AccountService) GetCredits(ctx context.Context, params *GetCreditsParams) (*Credits, error) +``` + +GetCredits returns the API credit balance. + +Example: + +``` +credits, err := client.Account.GetCredits(ctx, nil) +fmt.Printf("Available credits: %s\n", credits.Credit) +``` + + +### func \(\*AccountService\) [GetPackage]() + +```go +func (s *AccountService) GetPackage(ctx context.Context) (*Package, error) +``` + +GetPackage returns the user's package information. + +Example: + +``` +pkg, err := client.Account.GetPackage(ctx) +fmt.Printf("Balance: %d/%d\n", pkg.Balance, pkg.Total) +``` + + +## type [AudioFormat]() + +AudioFormat specifies the output audio format. + +```go +type AudioFormat string +``` + + + +```go +const ( + AudioFormatMP3 AudioFormat = "mp3" + AudioFormatWAV AudioFormat = "wav" + AudioFormatPCM AudioFormat = "pcm" + AudioFormatOpus AudioFormat = "opus" +) +``` + + +## type [AudioStream]() + +AudioStream wraps an HTTP response for streaming audio data. + +It provides two ways to consume audio: + +- Iterate chunk\-by\-chunk using Next\(\) and Bytes\(\) +- Collect all chunks at once using Collect\(\) + +Example: + +``` +// Stream chunks +stream, _ := client.TTS.Stream(ctx, params) +defer stream.Close() +for stream.Next() { + chunk := stream.Bytes() + // process chunk +} +if err := stream.Err(); err != nil { + // handle error +} + +// Or collect all at once +stream, _ := client.TTS.Stream(ctx, params) +audio, err := stream.Collect() +``` + +```go +type AudioStream struct { + // contains filtered or unexported fields +} +``` + + +### func \(\*AudioStream\) [Bytes]() + +```go +func (s *AudioStream) Bytes() []byte +``` + +Bytes returns the current chunk of audio data. Only valid after a successful call to Next\(\). + + +### func \(\*AudioStream\) [Close]() + +```go +func (s *AudioStream) Close() error +``` + +Close closes the underlying response body. + + +### func \(\*AudioStream\) [Collect]() + +```go +func (s *AudioStream) Collect() ([]byte, error) +``` + +Collect reads all remaining audio data and returns it as a single byte slice. This consumes the stream and closes it automatically. + + +### func \(\*AudioStream\) [Err]() + +```go +func (s *AudioStream) Err() error +``` + +Err returns any error that occurred during iteration. + + +### func \(\*AudioStream\) [Next]() + +```go +func (s *AudioStream) Next() bool +``` + +Next advances to the next chunk of audio data. It returns false when there are no more chunks or an error occurred. + + +### func \(\*AudioStream\) [Read]() + +```go +func (s *AudioStream) Read(p []byte) (n int, err error) +``` + +Read implements io.Reader interface. + + +## type [AuthenticationError]() + +AuthenticationError is raised when authentication fails \(401\). + +```go +type AuthenticationError struct { + *APIError +} +``` + + +## type [Author]() + +Author represents voice model author information. + +```go +type Author struct { + ID string `json:"_id"` + Nickname string `json:"nickname"` + Avatar string `json:"avatar"` +} +``` + + +## type [Client]() + +Client is the Fish Audio API client. + +```go +type Client struct { + + // Services + TTS *TTSService + ASR *ASRService + Voices *VoicesService + Account *AccountService + // contains filtered or unexported fields +} +``` + + +### func [NewClient]() + +```go +func NewClient(apiKey string, opts ...ClientOption) *Client +``` + +NewClient creates a new Fish Audio API client. + +If apiKey is empty, it will try to read from the FISH\_API\_KEY environment variable. + + +### func \(\*Client\) [Close]() + +```go +func (c *Client) Close() error +``` + +Close closes the HTTP client's idle connections. + + +## type [ClientOption]() + +ClientOption is a function that configures the Client. + +```go +type ClientOption func(*Client) +``` + + +### func [WithBaseURL]() + +```go +func WithBaseURL(url string) ClientOption +``` + +WithBaseURL sets a custom base URL for the API. + + +### func [WithHTTPClient]() + +```go +func WithHTTPClient(httpClient *http.Client) ClientOption +``` + +WithHTTPClient sets a custom HTTP client. + + +### func [WithTimeout]() + +```go +func WithTimeout(timeout time.Duration) ClientOption +``` + +WithTimeout sets the default timeout for requests. + + +## type [ConvertParams]() + +ConvertParams contains parameters for TTS conversion. + +```go +type ConvertParams struct { + // Text is the text to synthesize into speech (required). + Text string `json:"text"` + // ReferenceID is the voice model ID to use. + ReferenceID string `json:"reference_id,omitempty"` + // References is a list of reference audio for voice cloning. + References []ReferenceAudio `json:"references,omitempty"` + // Format is the audio output format. + Format AudioFormat `json:"format,omitempty"` + // Latency is the generation mode. + Latency LatencyMode `json:"latency,omitempty"` + // Speed is a shorthand for setting prosody speed (0.5-2.0). + Speed float64 `json:"-"` + // Config provides additional TTS configuration. + Config *TTSConfig `json:"-"` +} +``` + + +## type [CreateVoiceParams]() + +CreateVoiceParams contains parameters for creating a voice. + +```go +type CreateVoiceParams struct { + // Title is the voice model name (required). + Title string + // Voices is a list of audio file bytes for training (required). + Voices [][]byte + // Description is the voice description. + Description string + // Texts are transcripts for voice samples. + Texts []string + // Tags are tags for categorization. + Tags []string + // CoverImage is the cover image bytes. + CoverImage []byte + // Visibility is the visibility setting. Default: "private". + Visibility Visibility + // TrainMode is the training mode. Default: "fast". + TrainMode TrainMode + // EnhanceAudioQuality indicates whether to enhance audio quality. Default: true. + EnhanceAudioQuality *bool +} +``` + + +## type [Credits]() + +Credits represents the user's API credit balance. + +```go +type Credits struct { + ID string `json:"_id"` + UserID string `json:"user_id"` + Credit string `json:"credit"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` + HasPhoneSHA256 *bool `json:"has_phone_sha256,omitempty"` + HasFreeCredit *bool `json:"has_free_credit,omitempty"` +} +``` + + +## type [FishAudioError]() + +FishAudioError is the base interface for all Fish Audio SDK errors. + +```go +type FishAudioError interface { + IsFishAudioError() + // contains filtered or unexported methods +} +``` + + +## type [GetCreditsParams]() + +GetCreditsParams contains parameters for getting credits. + +```go +type GetCreditsParams struct { + // CheckFreeCredit indicates whether to check free credit availability. + CheckFreeCredit bool +} +``` + + +## type [LatencyMode]() + +LatencyMode specifies the generation latency mode. + +```go +type LatencyMode string +``` + + + +```go +const ( + LatencyNormal LatencyMode = "normal" + LatencyBalanced LatencyMode = "balanced" +) +``` + + +## type [ListVoicesParams]() + +ListVoicesParams contains parameters for listing voices. + +```go +type ListVoicesParams struct { + // PageSize is the number of results per page. Default: 10. + PageSize int + // PageNumber is the page number (1-indexed). Default: 1. + PageNumber int + // Title filters by title. + Title string + // Tags filters by tags. + Tags []string + // SelfOnly returns only the user's own voices. + SelfOnly bool + // AuthorID filters by author ID. + AuthorID string + // Language filters by language(s). + Language []string + // TitleLanguage filters by title language(s). + TitleLanguage []string + // SortBy is the sort field. Options: "task_count", "created_at". Default: "task_count". + SortBy string +} +``` + + +## type [Model]() + +Model specifies the TTS model to use. + +```go +type Model string +``` + + + +```go +const ( + ModelSpeech15 Model = "speech-1.5" + ModelSpeech16 Model = "speech-1.6" + ModelS1 Model = "s1" +) +``` + + +## type [ModelState]() + +ModelState specifies the state of a voice model. + +```go +type ModelState string +``` + + + +```go +const ( + ModelStateCreated ModelState = "created" + ModelStateTraining ModelState = "training" + ModelStateTrained ModelState = "trained" + ModelStateFailed ModelState = "failed" +) +``` + + +## type [NotFoundError]() + +NotFoundError is raised when a resource is not found \(404\). + +```go +type NotFoundError struct { + *APIError +} +``` + + +## type [Package]() + +Package represents the user's prepaid package information. + +```go +type Package struct { + ID string `json:"_id"` + UserID string `json:"user_id"` + Type string `json:"type"` + Total int `json:"total"` + Balance int `json:"balance"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` + FinishedAt *string `json:"finished_at,omitempty"` +} +``` + + +## type [PaginatedResponse]() + +PaginatedResponse wraps paginated API responses. + +```go +type PaginatedResponse[T any] struct { + Total int `json:"total"` + Items []T `json:"items"` +} +``` + + +## type [PermissionError]() + +PermissionError is raised when permission is denied \(403\). + +```go +type PermissionError struct { + *APIError +} +``` + + +## type [Prosody]() + +Prosody contains speech prosody settings \(speed and volume\). + +```go +type Prosody struct { + // Speed is the speech speed multiplier. Range: 0.5-2.0. Default: 1.0. + Speed float64 `json:"speed,omitempty"` + // Volume is the volume adjustment in decibels. Range: -20.0 to 20.0. Default: 0.0. + Volume float64 `json:"volume,omitempty"` +} +``` + + +## type [RateLimitError]() + +RateLimitError is raised when rate limit is exceeded \(429\). + +```go +type RateLimitError struct { + *APIError +} +``` + + +## type [ReferenceAudio]() + +ReferenceAudio contains reference audio for voice cloning. + +```go +type ReferenceAudio struct { + // Audio is the audio file bytes for the reference sample. + Audio []byte `json:"audio"` + // Text is the transcription of what is spoken in the reference audio. + Text string `json:"text"` +} +``` + + +## type [RequestOptions]() + +RequestOptions allows per\-request overrides of client defaults. + +```go +type RequestOptions struct { + // Timeout overrides the client's default timeout. + Timeout time.Duration + + // AdditionalHeaders are extra headers to include in the request. + AdditionalHeaders map[string]string + + // AdditionalQueryParams are extra query parameters to include. + AdditionalQueryParams map[string]string +} +``` + + +## type [Sample]() + +Sample represents a sample audio for a voice model. + +```go +type Sample struct { + Title string `json:"title"` + Text string `json:"text"` + TaskID string `json:"task_id"` + Audio string `json:"audio"` +} +``` + + +## type [ServerError]() + +ServerError is raised when the server encounters an error \(5xx\). + +```go +type ServerError struct { + *APIError +} +``` + + +## type [StreamParams]() + +StreamParams contains parameters for TTS streaming. + +```go +type StreamParams struct { + // Text is the text to synthesize into speech (required). + Text string `json:"text"` + // ReferenceID is the voice model ID to use. + ReferenceID string `json:"reference_id,omitempty"` + // References is a list of reference audio for voice cloning. + References []ReferenceAudio `json:"references,omitempty"` + // Format is the audio output format. + Format AudioFormat `json:"format,omitempty"` + // Latency is the generation mode. + Latency LatencyMode `json:"latency,omitempty"` + // Speed is a shorthand for setting prosody speed (0.5-2.0). + Speed float64 `json:"-"` + // Config provides additional TTS configuration. + Config *TTSConfig `json:"-"` +} +``` + + +## type [TTSConfig]() + +TTSConfig is reusable configuration for text\-to\-speech requests. + +```go +type TTSConfig struct { + // Format is the audio output format. Options: "mp3", "wav", "pcm", "opus". Default: "mp3". + Format AudioFormat `json:"format,omitempty"` + // SampleRate is the audio sample rate in Hz. + SampleRate int `json:"sample_rate,omitempty"` + // MP3Bitrate is the MP3 bitrate in kbps. Options: 64, 128, 192. Default: 128. + MP3Bitrate int `json:"mp3_bitrate,omitempty"` + // OpusBitrate is the Opus bitrate in kbps. Options: -1000, 24, 32, 48, 64. Default: 32. + OpusBitrate int `json:"opus_bitrate,omitempty"` + // Normalize indicates whether to normalize/clean the input text. Default: true. + Normalize *bool `json:"normalize,omitempty"` + // ChunkLength is the characters per generation chunk. Range: 100-300. Default: 200. + ChunkLength int `json:"chunk_length,omitempty"` + // Latency is the generation mode. Options: "normal", "balanced". Default: "balanced". + Latency LatencyMode `json:"latency,omitempty"` + // ReferenceID is the voice model ID from fish.audio. + ReferenceID string `json:"reference_id,omitempty"` + // References is a list of reference audio samples for instant voice cloning. + References []ReferenceAudio `json:"references,omitempty"` + // Prosody contains speech speed and volume settings. + Prosody *Prosody `json:"prosody,omitempty"` + // TopP is the nucleus sampling parameter. Range: 0.0-1.0. Default: 0.7. + TopP float64 `json:"top_p,omitempty"` + // Temperature is the randomness in generation. Range: 0.0-1.0. Default: 0.7. + Temperature float64 `json:"temperature,omitempty"` +} +``` + + +## type [TTSService]() + +TTSService provides text\-to\-speech operations. + +```go +type TTSService struct { + // contains filtered or unexported fields +} +``` + + +### func \(\*TTSService\) [Convert]() + +```go +func (s *TTSService) Convert(ctx context.Context, params *ConvertParams) ([]byte, error) +``` + +Convert generates speech from text and returns the complete audio. + + +### func \(\*TTSService\) [Stream]() + +```go +func (s *TTSService) Stream(ctx context.Context, params *StreamParams) (*AudioStream, error) +``` + +Stream generates speech from text and returns an audio stream. + + +### func \(\*TTSService\) [StreamWebSocket]() + +```go +func (s *TTSService) StreamWebSocket(ctx context.Context, textChan <-chan string, params *StreamParams, opts *WebSocketOptions) (*WebSocketAudioStream, error) +``` + +StreamWebSocket streams text to speech over WebSocket for real\-time generation. + +The textChan receives text chunks to synthesize. Close the channel to end streaming. Returns a WebSocketAudioStream that can be iterated for audio chunks. + + +## type [TrainMode]() + +TrainMode specifies the training mode for voice models. + +```go +type TrainMode string +``` + + + +```go +const ( + TrainModeFast TrainMode = "fast" +) +``` + + +## type [TranscribeParams]() + +TranscribeParams contains parameters for ASR transcription. + +```go +type TranscribeParams struct { + // Language is the language code (e.g., "en", "zh"). Auto-detected if empty. + Language string + // IncludeTimestamps indicates whether to include timestamp information. Default: true. + IncludeTimestamps *bool +} +``` + + +## type [UpdateVoiceParams]() + +UpdateVoiceParams contains parameters for updating a voice. + +```go +type UpdateVoiceParams struct { + // Title is the new title. + Title string + // Description is the new description. + Description string + // CoverImage is the new cover image bytes. + CoverImage []byte + // Visibility is the new visibility setting. + Visibility Visibility + // Tags are the new tags. + Tags []string +} +``` + + +## type [ValidationError]() + +ValidationError is raised when request validation fails \(422\). + +```go +type ValidationError struct { + *APIError +} +``` + + +## type [Visibility]() + +Visibility specifies the visibility of a voice model. + +```go +type Visibility string +``` + + + +```go +const ( + VisibilityPublic Visibility = "public" + VisibilityUnlist Visibility = "unlist" + VisibilityPrivate Visibility = "private" +) +``` + + +## type [Voice]() + +Voice represents a voice model. + +```go +type Voice struct { + ID string `json:"_id"` + Type string `json:"type"` + Title string `json:"title"` + Description string `json:"description"` + CoverImage string `json:"cover_image"` + TrainMode TrainMode `json:"train_mode"` + State ModelState `json:"state"` + Tags []string `json:"tags"` + Samples []Sample `json:"samples"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + Languages []string `json:"languages"` + Visibility Visibility `json:"visibility"` + LockVisibility bool `json:"lock_visibility"` + LikeCount int `json:"like_count"` + MarkCount int `json:"mark_count"` + SharedCount int `json:"shared_count"` + TaskCount int `json:"task_count"` + Liked bool `json:"liked"` + Marked bool `json:"marked"` + Author Author `json:"author"` +} +``` + + +## type [VoicesService]() + +VoicesService provides voice management operations. + +```go +type VoicesService struct { + // contains filtered or unexported fields +} +``` + + +### func \(\*VoicesService\) [Create]() + +```go +func (s *VoicesService) Create(ctx context.Context, params *CreateVoiceParams) (*Voice, error) +``` + +Create creates/clones a new voice. + + +### func \(\*VoicesService\) [Delete]() + +```go +func (s *VoicesService) Delete(ctx context.Context, voiceID string) error +``` + +Delete deletes a voice. + + +### func \(\*VoicesService\) [Get]() + +```go +func (s *VoicesService) Get(ctx context.Context, voiceID string) (*Voice, error) +``` + +Get returns a voice by ID. + + +### func \(\*VoicesService\) [List]() + +```go +func (s *VoicesService) List(ctx context.Context, params *ListVoicesParams) (*PaginatedResponse[Voice], error) +``` + +List returns available voices/models. + + +### func \(\*VoicesService\) [Update]() + +```go +func (s *VoicesService) Update(ctx context.Context, voiceID string, params *UpdateVoiceParams) error +``` + +Update updates voice metadata. + + +## type [WebSocketAudioStream]() + +WebSocketAudioStream wraps WebSocket audio chunks for iteration. + +```go +type WebSocketAudioStream struct { + // contains filtered or unexported fields +} +``` + + +### func \(\*WebSocketAudioStream\) [Bytes]() + +```go +func (s *WebSocketAudioStream) Bytes() []byte +``` + +Bytes returns the current chunk of audio data. + + +### func \(\*WebSocketAudioStream\) [Close]() + +```go +func (s *WebSocketAudioStream) Close() error +``` + +Close closes the stream. + + +### func \(\*WebSocketAudioStream\) [Collect]() + +```go +func (s *WebSocketAudioStream) Collect() ([]byte, error) +``` + +Collect reads all audio chunks and returns them as a single byte slice. + + +### func \(\*WebSocketAudioStream\) [Err]() + +```go +func (s *WebSocketAudioStream) Err() error +``` + +Err returns any error that occurred during iteration. + + +### func \(\*WebSocketAudioStream\) [Next]() + +```go +func (s *WebSocketAudioStream) Next() bool +``` + +Next advances to the next chunk of audio data. Returns false when there are no more chunks or an error occurred. + + +### func \(\*WebSocketAudioStream\) [Read]() + +```go +func (s *WebSocketAudioStream) Read(p []byte) (n int, err error) +``` + +Read implements io.Reader interface. + + +## type [WebSocketError]() + +WebSocketError is raised when WebSocket connection or streaming fails. + +```go +type WebSocketError struct { + Message string +} +``` + + +### func \(\*WebSocketError\) [Error]() + +```go +func (e *WebSocketError) Error() string +``` + + + + +### func \(\*WebSocketError\) [IsFishAudioError]() + +```go +func (e *WebSocketError) IsFishAudioError() +``` + + + + +## type [WebSocketOptions]() + +WebSocketOptions configures WebSocket connections. + +```go +type WebSocketOptions struct { + // PingTimeout is the maximum delay to wait for a pong response. + // Default: 20 seconds. + PingTimeout time.Duration + + // PingInterval is the interval for sending ping messages. + // Default: 20 seconds. + PingInterval time.Duration + + // MaxMessageSize is the maximum message size in bytes. + // Default: 65536 bytes (64 KiB). + MaxMessageSize int64 + + // ReadBufferSize is the size of the read buffer. + ReadBufferSize int + + // WriteBufferSize is the size of the write buffer. + WriteBufferSize int +} +``` + + +### func [DefaultWebSocketOptions]() + +```go +func DefaultWebSocketOptions() *WebSocketOptions +``` + +DefaultWebSocketOptions returns WebSocketOptions with default values. + +Generated by [gomarkdoc]() diff --git a/docs.json b/docs.json index 5709015..55edf78 100644 --- a/docs.json +++ b/docs.json @@ -200,6 +200,13 @@ "pages": [ "api-reference/sdk/javascript/api-reference" ] + }, + { + "group": "Go SDK", + "icon": "golang", + "pages": [ + "api-reference/sdk/go/api-reference" + ] } ] }