From e4a5e7e77f1a56ac3e2054cd0abfa5223714a830 Mon Sep 17 00:00:00 2001
From: Matthew Willett-Jeffries <matthewwillett-jeffries@MacBookPro.lan>
Date: Thu, 1 May 2025 17:23:57 -0400
Subject: [PATCH 1/9] adds simple api endpoint for listing available voices

---
 examples/server/server.cpp | 75 ++++++++++++++++++++++++++++++--------
 include/common.h           | 10 +++++
 include/tts.h              |  1 +
 src/kokoro_model.cpp       |  9 +++++
 src/kokoro_model.h         |  3 +-
 src/tts.cpp                |  9 +++++
 6 files changed, 91 insertions(+), 16 deletions(-)
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 92aae79..206f488 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -54,6 +54,7 @@ enum error_type {
 enum task_type {
     TTS,
     CONDITIONAL_PROMPT,
+    VOICES,
 };
 
 using json = nlohmann::ordered_json;
@@ -96,8 +97,8 @@ static void log_server_request(const httplib::Request & req, const httplib::Resp
     fprintf(stdout, "request: %s %s %s %d\n", req.method.c_str(), req.path.c_str(), req.remote_addr.c_str(), res.status);
 }
 
-struct simple_text_prompt_task {
-    simple_text_prompt_task(task_type task, std::string prompt): task(task), prompt(prompt) {
+struct simple_server_task {
+    simple_server_task(task_type task, std::string prompt = ""): task(task), prompt(prompt) {
         id = rand();
         time = std::chrono::steady_clock::now();
     }
@@ -124,11 +125,11 @@ struct simple_text_prompt_task {
 struct simple_task_queue {
     std::mutex rw_mutex;
     std::condition_variable condition;
-    std::deque<simple_text_prompt_task*> queue;
+    std::deque<simple_server_task*> queue;
     bool running = true;
 
-    struct simple_text_prompt_task * get_next() {
-        struct simple_text_prompt_task * resp;
+    struct simple_server_task * get_next() {
+        struct simple_server_task * resp;
         std::unique_lock<std::mutex> lock(rw_mutex);
         condition.wait(lock, [&]{ 
             return !queue.empty() || !running; 
@@ -148,7 +149,7 @@ struct simple_task_queue {
         condition.notify_all();
     }
 
-    void push(struct simple_text_prompt_task * task) {
+    void push(struct simple_server_task * task) {
         std::lock_guard<std::mutex> lock(rw_mutex);
         queue.push_back(task);
         condition.notify_one();
@@ -162,7 +163,7 @@ struct simple_response_map {
     std::atomic<bool> running = true;
     std::thread * cleanup_thread;
 
-    std::map<int, simple_text_prompt_task*> completed;
+    std::map<int, simple_server_task*> completed;
 
     void cleanup_routine() {
         std::unique_lock<std::mutex> lock(rw_mutex);
@@ -192,16 +193,16 @@ struct simple_response_map {
         updated.notify_all();
     }
 
-    void push(struct simple_text_prompt_task * task) {
+    void push(struct simple_server_task * task) {
         std::unique_lock<std::mutex> lock(rw_mutex);
         completed[task->id] = task;
         lock.unlock();
         updated.notify_all();
     }
 
-    struct simple_text_prompt_task * get(int id) {
+    struct simple_server_task * get(int id) {
         std::unique_lock<std::mutex> lock(rw_mutex);
-        struct simple_text_prompt_task * resp = nullptr;
+        struct simple_server_task * resp = nullptr;
         try {
             return completed.at(id);
         } catch (const std::out_of_range& e) {
@@ -243,14 +244,14 @@ struct worker {
 
     void loop() {
         while (running) {
-            struct simple_text_prompt_task * task = task_queue->get_next();
+            struct simple_server_task * task = task_queue->get_next();
             if (task) {
                 process_task(task);
             }
         }
     }
 
-    void process_task(struct simple_text_prompt_task * task) {
+    void process_task(struct simple_server_task * task) {
         if (task->timed_out(task_timeout)) {
             return;
         }
@@ -277,6 +278,21 @@ struct worker {
                 task->success = true;
                 response_map->push(task);
                 break;
+            case VOICES:
+                if (!runner->supports_voices) {
+                    task->message = "Voices are not supported for architecture '" + runner->arch_name() + "'.";
+                    response_map->push(task);
+                    break;
+                }
+                for (auto voice : list_voices(runner)) {
+                    if (!task->message.empty()) {
+                        task->message += ",";
+                    }
+                    task->message += voice;
+                }
+                task->success = true;
+                response_map->push(task);
+                break;
         }
     }
 };
@@ -518,6 +534,15 @@ int main(int argc, const char ** argv) {
         res.status = 200;
     };
 
+    auto res_ok_voices = [](httplib::Response & res, const std::vector<std::string> & voices) {
+        json json_voices = json::array();
+        for (auto voice : voices) {
+            json_voices.push_back(voice);
+        }
+        res.set_content(safe_json_to_str(json_voices), MIMETYPE_JSON);
+        res.status = 200;
+    };
+
     svr->set_exception_handler([&res_error](const httplib::Request &, httplib::Response & res, const std::exception_ptr & ep) {
         std::string message;
         try {
@@ -614,7 +639,7 @@ int main(int argc, const char ** argv) {
             res_error(res, formatted_error);
             return;
         }
-        struct simple_text_prompt_task * task = new simple_text_prompt_task(TTS, prompt);
+        struct simple_server_task * task = new simple_server_task(TTS, prompt);
         int id = task->id;
         generation_configuration * conf = new generation_configuration();
         std::memcpy((void*)conf, default_generation_config, sizeof(generation_configuration));
@@ -661,7 +686,7 @@ int main(int argc, const char ** argv) {
 
         task->gen_config = conf;
         tqueue->push(task);
-        struct simple_text_prompt_task * rtask = rmap->get(id);
+        struct simple_server_task * rtask = rmap->get(id);
         if (!rtask->success) {
             json formatted_error = format_error_response(rtask->message, ERROR_TYPE_SERVER);
             res_error(res, formatted_error);
@@ -728,7 +753,7 @@ int main(int argc, const char ** argv) {
 
         int id = task->id;
         tqueue->push(task);
-        struct simple_text_prompt_task * rtask = rmap->get(id);
+        struct simple_server_task * rtask = rmap->get(id);
         if (!rtask->success) {
             json formatted_error = format_error_response(rtask->message, ERROR_TYPE_SERVER);
             res_error(res, formatted_error);
@@ -745,6 +770,25 @@ int main(int argc, const char ** argv) {
         &models_json
     ](const httplib::Request & _, httplib::Response & res) {
         res_ok_json(res, models_json);
+    }
+
+    const auto handle_voices = [&args, &tqueue, &rmap, &res_error, &res_ok_voices](const httplib::Request & req, httplib::Response & res) {
+        struct simple_server_task * task = new simple_server_task(VOICES);
+        int id = task->id;
+        tqueue->push(task);
+        struct simple_server_task * rtask = rmap->get(id);
+        if (!rtask->success) {
+            json formatted_error;
+            if (has_prefix(rtask->message, "Voices are not supported")) {
+                formatted_error = format_error_response(rtask->message, ERROR_TYPE_NOT_SUPPORTED);
+            } else {
+                formatted_error = format_error_response(rtask->message, ERROR_TYPE_SERVER);
+            }
+            res_error(res, formatted_error);
+            return;
+        }
+        std::vector<std::string> voices = split(rtask->message, ",");
+        res_ok_voices(res, voices);
     };
 
     // register API routes
@@ -753,6 +797,7 @@ int main(int argc, const char ** argv) {
     svr->Post("/v1/audio/speech", handle_tts);
     svr->Post("/v1/audio/conditional-prompt", handle_conditional);
     svr->Get("/v1/models", handle_models);
+    svr->Get("/v1/audio/voices", handle_voices);
 
     // Start the server
     svr->new_task_queue = [&args] { 
diff --git a/include/common.h b/include/common.h
index fc0dcdf..4f932fc 100644
--- a/include/common.h
+++ b/include/common.h
@@ -28,6 +28,11 @@ const std::map<std::string, tts_arch> SUPPORTED_ARCHITECTURES = {
 	{ "orpheus", ORPHEUS_ARCH }
 };
 
+const std::map<tts_arch, std::string> ARCHITECTURE_NAMES = {
+	{ PARLER_TTS_ARCH, "parler-tts" },
+	{ KOKORO_ARCH, "kokoro" },
+};
+
 struct generation_configuration {
     generation_configuration(
     	std::string voice = "",
@@ -55,6 +60,11 @@ struct tts_runner {
 	tts_arch arch;
 	struct ggml_context * ctx = nullptr;
 	float sampling_rate = 44100.0f;
+	bool supports_voices = false;
+
+	std::string arch_name() {
+		return ARCHITECTURE_NAMES.at(arch);
+	}
 
 	void init_build(std::vector<uint8_t>* buf_compute_meta);
 	void free_build();
diff --git a/include/tts.h b/include/tts.h
index def032b..30e98dc 100644
--- a/include/tts.h
+++ b/include/tts.h
@@ -16,6 +16,7 @@ struct tts_runner * orpheus_from_file(gguf_context * meta_ctx, ggml_context * we
 struct tts_runner * runner_from_file(const std::string & fname, int n_threads, generation_configuration * config, bool cpu_only = true);
 int generate(tts_runner * runner, std::string sentence, struct tts_response * response, generation_configuration * config);
 void update_conditional_prompt(tts_runner * runner, const std::string file_path, const std::string prompt, bool cpu_only = true);
+std::vector<std::string> list_voices(tts_runner * runner);
 
 struct quantization_params {
     quantization_params(uint32_t n_threads, enum ggml_type quantize_type): n_threads(n_threads), quantize_type(quantize_type) {};
diff --git a/src/kokoro_model.cpp b/src/kokoro_model.cpp
index a4b8dfc..f73dddb 100644
--- a/src/kokoro_model.cpp
+++ b/src/kokoro_model.cpp
@@ -1434,6 +1434,15 @@ int kokoro_runner::generate(std::string prompt, struct tts_response * response,
   	return 0;
 }
 
+std::vector<std::string> kokoro_runner::list_voices() {
+	std::vector<std::string> voices;
+	voices.reserve(model->voices.size());
+	for (auto voice : model->voices) {
+		voices.push_back(voice.first);
+	}
+	return voices;
+}
+
 
 std::string get_espeak_id_from_kokoro_voice(std::string voice) {
 	return !voice.empty() && KOKORO_LANG_TO_ESPEAK_ID.find(voice[0]) != KOKORO_LANG_TO_ESPEAK_ID.end() ? KOKORO_LANG_TO_ESPEAK_ID[voice[0]] : "gmw/en-US";
diff --git a/src/kokoro_model.h b/src/kokoro_model.h
index 1985c11..cd332e6 100644
--- a/src/kokoro_model.h
+++ b/src/kokoro_model.h
@@ -426,6 +426,7 @@ struct kokoro_context * build_new_kokoro_context(struct kokoro_model * model, in
 struct kokoro_runner : tts_runner {
     kokoro_runner(kokoro_model * model, kokoro_context * context, single_pass_tokenizer * tokenizer, kokoro_duration_runner * drunner, phonemizer * phmzr): model(model), kctx(context), tokenizer(tokenizer), drunner(drunner), phmzr(phmzr) {
     	tts_runner::sampling_rate = 24000.0f;
+    	tts_runner::supports_voices = true;
     };
     ~kokoro_runner() {
         if (ctx) {
@@ -448,8 +449,8 @@ struct kokoro_runner : tts_runner {
     void init_build() {
         tts_runner::init_build(&kctx->buf_compute_meta);
     }
-    
 
+    std::vector<std::string> list_voices();
     std::vector<std::vector<uint32_t>> tokenize_chunks(std::vector<std::string> clauses);
     void assign_weight(std::string name, ggml_tensor * tensor);
     void prepare_post_load();
diff --git a/src/tts.cpp b/src/tts.cpp
index 348144e..0da56e4 100644
--- a/src/tts.cpp
+++ b/src/tts.cpp
@@ -176,6 +176,15 @@ int generate(tts_runner * runner, std::string sentence, struct tts_response * re
     }
 }
 
+std::vector<std::string> list_voices(tts_runner * runner) {
+    switch(runner->arch) {
+        case KOKORO_ARCH:
+            return ((kokoro_runner*)runner)->list_voices();
+        default:
+            TTS_ABORT("%s failed. The architecture '%d' does not support #list_voices supported.", __func__, runner->arch);
+    }   
+}
+
 void update_conditional_prompt(tts_runner * runner, const std::string file_path, const std::string prompt, bool cpu_only) {
     int n_threads = ((parler_tts_runner*)runner)->pctx->n_threads;
     ((parler_tts_runner*)runner)->update_conditional_prompt(file_path, prompt, n_threads, cpu_only);

From 477c31d7d63dcc55d85d895a7edc9db223c7259e Mon Sep 17 00:00:00 2001
From: Matthew Willett-Jeffries <matthewwillett-jeffries@MacBookPro.lan>
Date: Thu, 1 May 2025 17:29:48 -0400
Subject: [PATCH 2/9] update readme

---
 examples/server/README.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/examples/server/README.md b/examples/server/README.md
index 6f30f15..4afdbfd 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -83,9 +83,16 @@ curl http://127.0.0.1:8080/v1/audio/speech  \
 
 The only required parameter is `input` otherwise generation configuration will be determined by the defaults set on server initialization, and the `response_format` will use `wav`. The `response_format` field currently supports only `wav` and `aiff` audio formats.
 
+#### Voices
+
+For models that support voices a complete json list of supported voices can be queried vis the voices endpoint, `/v1/audio/voices`:
+
+```commandline
+curl http://127.0.0.1:8080/v1/audio/voices
+``` 
+
 ### Future Work
 
 Future work will include:
 * Support for token authentication and permissioning
-* Multiple model support
 * Streaming audio, for longform audio generation.

From d890547661708495043c5fb9d004e22f7ea5fe72 Mon Sep 17 00:00:00 2001
From: Matthew Willett-Jeffries <matthewwillett-jeffries@MacBookPro.lan>
Date: Thu, 1 May 2025 18:09:16 -0400
Subject: [PATCH 3/9] cruft

---
 src/tts.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tts.cpp b/src/tts.cpp
index 0da56e4..f5faf28 100644
--- a/src/tts.cpp
+++ b/src/tts.cpp
@@ -182,7 +182,7 @@ std::vector<std::string> list_voices(tts_runner * runner) {
             return ((kokoro_runner*)runner)->list_voices();
         default:
             TTS_ABORT("%s failed. The architecture '%d' does not support #list_voices supported.", __func__, runner->arch);
-    }   
+    }
 }
 
 void update_conditional_prompt(tts_runner * runner, const std::string file_path, const std::string prompt, bool cpu_only) {

From 3167ff6dddf1b5d91249c8e01202beea6ca2290a Mon Sep 17 00:00:00 2001
From: ecyht2 <ecyht2@nottingham.edu.my>
Date: Sun, 22 Jun 2025 16:10:10 +0800
Subject: [PATCH 4/9] fix: Fixed voices API to work with multiple models

---
 examples/server/server.cpp | 76 +++++++++++++++++++++++++++-----------
 1 file changed, 54 insertions(+), 22 deletions(-)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 206f488..10c7c0d 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -251,7 +251,7 @@ struct worker {
         }
     }
 
-    void process_task(struct simple_server_task * task) {
+    const void process_task(struct simple_server_task * task) {
         if (task->timed_out(task_timeout)) {
             return;
         }
@@ -279,16 +279,31 @@ struct worker {
                 response_map->push(task);
                 break;
             case VOICES:
-                if (!runner->supports_voices) {
-                    task->message = "Voices are not supported for architecture '" + runner->arch_name() + "'.";
-                    response_map->push(task);
-                    break;
+                // Maybe there is a better way to pass the voices rather than
+                // needing a custom serialized message?
+                // Getting all voices
+                std::unordered_map<std::string, std::string> voice_map = {};
+                for (const auto &[id, runner] : runners) {
+                    if (!runner->supports_voices) {
+                        continue;
+                    }
+                    std::string voices_string = "";
+                    for (auto voice : list_voices(runner)) {
+                        if (!voices_string.empty()) {
+                            voices_string += ",";
+                        }
+                        voices_string += voice;
+                    }
+                    voice_map[id] = voices_string;
                 }
-                for (auto voice : list_voices(runner)) {
+                // Formatting final message
+                for (const auto &[id, voices] : voice_map) {
                     if (!task->message.empty()) {
-                        task->message += ",";
+                        task->message += ";";
                     }
-                    task->message += voice;
+                    task->message += id;
+                    task->message += "/";
+                    task->message += voices;
                 }
                 task->success = true;
                 response_map->push(task);
@@ -462,6 +477,7 @@ int main(int argc, const char ** argv) {
     svr.reset(new httplib::Server());
 #endif
 
+    // Models Variables
     std::unordered_map<std::string, std::string> model_map = {};
     const std::string model_path = args.get_string_param("--model-path");
     if (std::filesystem::is_directory(model_path)) {
@@ -509,6 +525,9 @@ int main(int argc, const char ** argv) {
     }
     const json models_json = {{"object", "list"}, {"data", models}};
 
+    // Voices Variables
+    json voices_json = nullptr;
+
     std::atomic<server_state> state{LOADING};
 
     svr->set_logger(log_server_request);
@@ -534,15 +553,6 @@ int main(int argc, const char ** argv) {
         res.status = 200;
     };
 
-    auto res_ok_voices = [](httplib::Response & res, const std::vector<std::string> & voices) {
-        json json_voices = json::array();
-        for (auto voice : voices) {
-            json_voices.push_back(voice);
-        }
-        res.set_content(safe_json_to_str(json_voices), MIMETYPE_JSON);
-        res.status = 200;
-    };
-
     svr->set_exception_handler([&res_error](const httplib::Request &, httplib::Response & res, const std::exception_ptr & ep) {
         std::string message;
         try {
@@ -736,7 +746,7 @@ int main(int argc, const char ** argv) {
             return;
         }
         std::string prompt = data.at("input").get<std::string>();
-        struct simple_text_prompt_task * task = new simple_text_prompt_task(CONDITIONAL_PROMPT, prompt);
+        struct simple_server_task * task = new simple_server_task(CONDITIONAL_PROMPT, prompt);
 
         if (data.contains("model") && data.at("model").is_string()) {
             const std::string model = data.at("model");
@@ -770,10 +780,27 @@ int main(int argc, const char ** argv) {
         &models_json
     ](const httplib::Request & _, httplib::Response & res) {
         res_ok_json(res, models_json);
-    }
+    };
+
+    const auto handle_voices = [
+        &args,
+        &tqueue,
+        &rmap,
+        &res_error,
+        &res_ok_json,
+        &voices_json,
+        &default_model
+    ](const httplib::Request & req, httplib::Response & res) {
+        // Using Cached Values
+        if (!voices_json.is_null()) {
+            res_ok_json(res, voices_json);
+            return;
+        }
 
-    const auto handle_voices = [&args, &tqueue, &rmap, &res_error, &res_ok_voices](const httplib::Request & req, httplib::Response & res) {
         struct simple_server_task * task = new simple_server_task(VOICES);
+        // Setting the model to default model (as dummy value) so no new runner is created
+        task->model = default_model;
+
         int id = task->id;
         tqueue->push(task);
         struct simple_server_task * rtask = rmap->get(id);
@@ -787,8 +814,13 @@ int main(int argc, const char ** argv) {
             res_error(res, formatted_error);
             return;
         }
-        std::vector<std::string> voices = split(rtask->message, ",");
-        res_ok_voices(res, voices);
+        voices_json = json::object();
+        std::vector<std::string> model_voices = split(rtask->message, ";");
+        for (const std::string entry : model_voices) {
+            const std::vector<std::string> entry_split  = split(entry, "/");
+            voices_json[entry_split[0]] = split(entry_split[1], ",");
+        }
+        res_ok_json(res, voices_json);
     };
 
     // register API routes

From bb3f8767b771227636d194e827466b78d346c6b3 Mon Sep 17 00:00:00 2001
From: ecyht2 <ecyht2@nottingham.edu.my>
Date: Sun, 22 Jun 2025 17:34:25 +0800
Subject: [PATCH 5/9] feat: Added voice selection in server API page

---
 examples/server/public/index.html | 121 ++++++++++++++++++++----------
 1 file changed, 82 insertions(+), 39 deletions(-)

diff --git a/examples/server/public/index.html b/examples/server/public/index.html
index ffaa29c..d43cdf0 100644
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@@ -60,41 +60,6 @@
             gap: 10px;
         }
 
-        select {
-            appearance: base-select;
-            flex-grow: 1;
-            box-sizing: border-box;
-            padding: 10px;
-            border: 1px solid #d1d5db;
-            border-radius: 6px;
-            background: none;
-            font-family: inherit;
-            font-size: 0.875rem;
-            transition:
-                border-color 0.2s,
-                box-shadow 0.2s;
-        }
-
-        select:focus {
-            outline: none;
-            border-color: #3b82f6;
-            box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.2);
-        }
-
-        ::picker(select) {
-            appearance: base-select;
-            flex-grow: 1;
-            box-sizing: border-box;
-            padding: 10px;
-            border: 1px solid #d1d5db;
-            border-radius: 6px;
-            font-family: inherit;
-            font-size: 0.875rem;
-            transition:
-                border-color 0.2s,
-                box-shadow 0.2s;
-        }
-
         .refresh-btn {
             padding-right: 9.5px;
             padding-left: 9.5px;
@@ -136,7 +101,8 @@
         }
 
         textarea,
-        input[type="text"] {
+        input[type="text"],
+        select {
             box-sizing: border-box;
             width: 100%;
             padding: 10px;
@@ -148,7 +114,8 @@
         }
 
         textarea:focus,
-        input[type="text"]:focus {
+        input[type="text"]:focus,
+        select:focus {
             outline: none;
             border-color: #3b82f6;
             box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.2);
@@ -159,6 +126,25 @@
             resize: vertical;
         }
 
+        select {
+            appearance: base-select;
+            background: none;
+        }
+
+        ::picker(select) {
+            appearance: base-select;
+            flex-grow: 1;
+            box-sizing: border-box;
+            padding: 10px;
+            border: 1px solid #d1d5db;
+            border-radius: 6px;
+            font-family: inherit;
+            font-size: 0.875rem;
+            transition:
+                border-color 0.2s,
+                box-shadow 0.2s;
+        }
+
         .slider-container {
             margin-top: 8px;
         }
@@ -369,6 +355,14 @@ <h1>TTS.cpp Server API</h1>
                             <p class="hint">API key for authentication (does nothing for now)</p>
                         </div>
 
+                        <div class="form-group">
+                            <label for="voice-select">Voices</label>
+                            <select id="voice-select">
+                                <option value="" disabled selected>Loading voices...</option>
+                            </select>
+                            <p class="hint">Voice to use for the speech (not all model have voices)</p>
+                        </div>
+
                         <div class="form-group">
                             <label for="temperature">
                                 Temperature: <span id="temperature-value" class="slider-value">1</span>
@@ -449,6 +443,8 @@ <h1>TTS.cpp Server API</h1>
         // Advanced parameters
         const baseUrl = document.getElementById('base-url');
         const apiKey = document.getElementById('api-key');
+        /** @type{HTMLSelectElement} */
+        const voiceSelect = document.getElementById('voice-select');
         const temperature = document.getElementById('temperature');
         const temperatureValue = document.getElementById('temperature-value');
         const top_k = document.getElementById('top_k');
@@ -465,6 +461,48 @@ <h1>TTS.cpp Server API</h1>
             return base;
         }
 
+        // Start: Voices Logic
+        async function fetchVoices() {
+            const getURL = new URL("/v1/audio/voices", getBaseURL())
+            const auth = apiKey.value ? `Bearer ${apiKey.value}` : undefined;
+
+            try {
+                const response = await fetch(getURL, {
+                    method: 'GET',
+                    headers: {
+                        'Authorization': auth,
+                    },
+                });
+                return await response.json();
+            } catch (err) {
+                console.error('Error fetching voices:', err);
+                showError(err.toString());
+            }
+        }
+        function updateVoices() {
+            while (voiceSelect.options.length > 0) {
+                voiceSelect.remove(0)
+            }
+
+            // There should be atleast one model
+            if (voices === undefined) {
+                return;
+            }
+            const voiceList = voices[modelSelect.value];
+            if (voiceList === undefined) {
+                return;
+            }
+            for (const voice of voiceList) {
+                const option = document.createElement('option');
+                option.textContent = voice;
+                option.value = voice;
+                voiceSelect.add(option);
+            }
+        }
+        let voices = undefined;
+        modelSelect.addEventListener('input', updateVoices);
+        // End: Voices Logic
+
         // Start: Refresh Logic
         async function refreshModels() {
             refreshBtn.disabled = true;
@@ -481,7 +519,7 @@ <h1>TTS.cpp Server API</h1>
                 });
 
                 const models = (await response.json()).data;
-                for (let i = 0; i <= modelSelect.options.length; i++) {
+                while (modelSelect.options.length > 0) {
                     modelSelect.remove(0)
                 }
 
@@ -492,6 +530,10 @@ <h1>TTS.cpp Server API</h1>
                     option.value = model.id;
                     modelSelect.add(option);
                 }
+
+                // Fetching voices
+                voices = await fetchVoices();
+                updateVoices();
             } catch (err) {
                 console.error('Error fetching models:', err);
                 showError(err.toString());
@@ -548,7 +590,8 @@ <h1>TTS.cpp Server API</h1>
                     temperature: parseFloat(temperature.value),
                     top_k: parseInt(top_k.value),
                     repetition_penalty: parseFloat(repetition_penalty.value),
-                    model: modelSelect.value
+                    model: modelSelect.value,
+                    voice: voiceSelect.value,
                 };
 
                 const postURL = new URL(API_URL, getBaseURL())

From 9d2ced8cf1068d67071404a22eb3a5391e5b2e4a Mon Sep 17 00:00:00 2001
From: ecyht2 <ecyht2@nottingham.edu.my>
Date: Sun, 22 Jun 2025 17:41:21 +0800
Subject: [PATCH 6/9] refactor: Removed unessary comment

---
 examples/server/public/index.html | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/server/public/index.html b/examples/server/public/index.html
index d43cdf0..068eb9c 100644
--- a/examples/server/public/index.html
+++ b/examples/server/public/index.html
@@ -484,7 +484,6 @@ <h1>TTS.cpp Server API</h1>
                 voiceSelect.remove(0)
             }
 
-            // There should be atleast one model
             if (voices === undefined) {
                 return;
             }

From 9fe559cc28527ce5190f63084668523938c54944 Mon Sep 17 00:00:00 2001
From: ecyht2 <ecyht2@nottingham.edu.my>
Date: Thu, 26 Jun 2025 15:48:57 +0800
Subject: [PATCH 7/9] doc: Fix invalid github README syntax

---
 examples/server/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/server/README.md b/examples/server/README.md
index 4afdbfd..18c072b 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -87,7 +87,7 @@ The only required parameter is `input` otherwise generation configuration will b
 
 For models that support voices a complete json list of supported voices can be queried vis the voices endpoint, `/v1/audio/voices`:
 
-```commandline
+```bash
 curl http://127.0.0.1:8080/v1/audio/voices
 ``` 
 

From 037c5f0d9b4e434562d26208c36982e9e5415652 Mon Sep 17 00:00:00 2001
From: ecyht2 <ecyht2@nottingham.edu.my>
Date: Thu, 26 Jun 2025 20:22:34 +0800
Subject: [PATCH 8/9] feat: Added voice list for Orpheus

---
 src/orpheus_model.cpp | 9 +++++++++
 src/orpheus_model.h   | 1 +
 2 files changed, 10 insertions(+)

diff --git a/src/orpheus_model.cpp b/src/orpheus_model.cpp
index dc0fa8f..8e2bb5f 100644
--- a/src/orpheus_model.cpp
+++ b/src/orpheus_model.cpp
@@ -462,3 +462,12 @@ void orpheus_runner::prepare_post_load() {
     auto gf = build_orpheus_graph(batch);
     octx->prep_schedule(gf);
 }
+
+std::vector<std::string> list_voices() {
+	std::vector<std::string> voices;
+	voices.reserve(orpheus_voices.size());
+	for (auto voice : orpheus_voices) {
+		voices.push_back(voice);
+	}
+	return voices;
+}
diff --git a/src/orpheus_model.h b/src/orpheus_model.h
index 6edd36b..9f02d76 100644
--- a/src/orpheus_model.h
+++ b/src/orpheus_model.h
@@ -126,6 +126,7 @@ struct orpheus_runner : tts_runner {
         tts_runner::init_build(&octx->buf_compute_meta);
     }
 
+    std::vector<std::string> list_voices();
     struct ggml_cgraph * build_orpheus_graph(orpheus_ubatch & batch);
     void orpheus_kv_cache_init();
     void orpheus_build_kv_store(struct ggml_context * ctx, struct ggml_cgraph * graph, struct ggml_tensor * k_cur, struct ggml_tensor * v_cur, int index, uint32_t n_tokens, int repeat);

From 683b6c536972625e6e88214b024c3772b436feb2 Mon Sep 17 00:00:00 2001
From: ecyht2 <ecyht2@nottingham.edu.my>
Date: Thu, 26 Jun 2025 20:44:42 +0800
Subject: [PATCH 9/9] feat: Updated architecture names to include new models

---
 include/common.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/common.h b/include/common.h
index 4f932fc..c3a1a1c 100644
--- a/include/common.h
+++ b/include/common.h
@@ -28,10 +28,17 @@ const std::map<std::string, tts_arch> SUPPORTED_ARCHITECTURES = {
 	{ "orpheus", ORPHEUS_ARCH }
 };
 
-const std::map<tts_arch, std::string> ARCHITECTURE_NAMES = {
-	{ PARLER_TTS_ARCH, "parler-tts" },
-	{ KOKORO_ARCH, "kokoro" },
-};
+/// Given a map from keys to values, creates a new map from values to keys 
+template<typename K, typename V>
+static std::map<V, K> reverse_map(const std::map<K, V>& m) {
+    std::map<V, K> r;
+    for (const auto& kv : m) {
+        r[kv.second] = kv.first;
+    }
+    return r;
+}
+
+const std::map<tts_arch, std::string> ARCHITECTURE_NAMES = reverse_map(SUPPORTED_ARCHITECTURES);
 
 struct generation_configuration {
     generation_configuration(