Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,16 @@ curl http://127.0.0.1:8080/v1/audio/speech \

The only required parameter is `input` otherwise generation configuration will be determined by the defaults set on server initialization, and the `response_format` will use `wav`. The `response_format` field currently supports only `wav` and `aiff` audio formats.

#### Voices

For models that support voices a complete json list of supported voices can be queried vis the voices endpoint, `/v1/audio/voices`:

```bash
curl http://127.0.0.1:8080/v1/audio/voices
```

### Future Work

Future work will include:
* Support for token authentication and permissioning
* Multiple model support
* Streaming audio, for longform audio generation.
120 changes: 81 additions & 39 deletions examples/server/public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -60,41 +60,6 @@
gap: 10px;
}

select {
appearance: base-select;
flex-grow: 1;
box-sizing: border-box;
padding: 10px;
border: 1px solid #d1d5db;
border-radius: 6px;
background: none;
font-family: inherit;
font-size: 0.875rem;
transition:
border-color 0.2s,
box-shadow 0.2s;
}

select:focus {
outline: none;
border-color: #3b82f6;
box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.2);
}

::picker(select) {
appearance: base-select;
flex-grow: 1;
box-sizing: border-box;
padding: 10px;
border: 1px solid #d1d5db;
border-radius: 6px;
font-family: inherit;
font-size: 0.875rem;
transition:
border-color 0.2s,
box-shadow 0.2s;
}

.refresh-btn {
padding-right: 9.5px;
padding-left: 9.5px;
Expand Down Expand Up @@ -136,7 +101,8 @@
}

textarea,
input[type="text"] {
input[type="text"],
select {
box-sizing: border-box;
width: 100%;
padding: 10px;
Expand All @@ -148,7 +114,8 @@
}

textarea:focus,
input[type="text"]:focus {
input[type="text"]:focus,
select:focus {
outline: none;
border-color: #3b82f6;
box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.2);
Expand All @@ -159,6 +126,25 @@
resize: vertical;
}

select {
appearance: base-select;
background: none;
}

::picker(select) {
appearance: base-select;
flex-grow: 1;
box-sizing: border-box;
padding: 10px;
border: 1px solid #d1d5db;
border-radius: 6px;
font-family: inherit;
font-size: 0.875rem;
transition:
border-color 0.2s,
box-shadow 0.2s;
}

.slider-container {
margin-top: 8px;
}
Expand Down Expand Up @@ -369,6 +355,14 @@ <h1>TTS.cpp Server API</h1>
<p class="hint">API key for authentication (does nothing for now)</p>
</div>

<div class="form-group">
<label for="voice-select">Voices</label>
<select id="voice-select">
<option value="" disabled selected>Loading voices...</option>
</select>
<p class="hint">Voice to use for the speech (not all model have voices)</p>
</div>

<div class="form-group">
<label for="temperature">
Temperature: <span id="temperature-value" class="slider-value">1</span>
Expand Down Expand Up @@ -449,6 +443,8 @@ <h1>TTS.cpp Server API</h1>
// Advanced parameters
const baseUrl = document.getElementById('base-url');
const apiKey = document.getElementById('api-key');
/** @type{HTMLSelectElement} */
const voiceSelect = document.getElementById('voice-select');
const temperature = document.getElementById('temperature');
const temperatureValue = document.getElementById('temperature-value');
const top_k = document.getElementById('top_k');
Expand All @@ -465,6 +461,47 @@ <h1>TTS.cpp Server API</h1>
return base;
}

// Start: Voices Logic
async function fetchVoices() {
const getURL = new URL("/v1/audio/voices", getBaseURL())
const auth = apiKey.value ? `Bearer ${apiKey.value}` : undefined;

try {
const response = await fetch(getURL, {
method: 'GET',
headers: {
'Authorization': auth,
},
});
return await response.json();
} catch (err) {
console.error('Error fetching voices:', err);
showError(err.toString());
}
}
function updateVoices() {
while (voiceSelect.options.length > 0) {
voiceSelect.remove(0)
}

if (voices === undefined) {
return;
}
const voiceList = voices[modelSelect.value];
if (voiceList === undefined) {
return;
}
for (const voice of voiceList) {
const option = document.createElement('option');
option.textContent = voice;
option.value = voice;
voiceSelect.add(option);
}
}
let voices = undefined;
modelSelect.addEventListener('input', updateVoices);
// End: Voices Logic

// Start: Refresh Logic
async function refreshModels() {
refreshBtn.disabled = true;
Expand All @@ -481,7 +518,7 @@ <h1>TTS.cpp Server API</h1>
});

const models = (await response.json()).data;
for (let i = 0; i <= modelSelect.options.length; i++) {
while (modelSelect.options.length > 0) {
modelSelect.remove(0)
}

Expand All @@ -492,6 +529,10 @@ <h1>TTS.cpp Server API</h1>
option.value = model.id;
modelSelect.add(option);
}

// Fetching voices
voices = await fetchVoices();
updateVoices();
} catch (err) {
console.error('Error fetching models:', err);
showError(err.toString());
Expand Down Expand Up @@ -548,7 +589,8 @@ <h1>TTS.cpp Server API</h1>
temperature: parseFloat(temperature.value),
top_k: parseInt(top_k.value),
repetition_penalty: parseFloat(repetition_penalty.value),
model: modelSelect.value
model: modelSelect.value,
voice: voiceSelect.value,
};

const postURL = new URL(API_URL, getBaseURL())
Expand Down
Loading