diff --git a/examples/common-whisper.cpp b/examples/common-whisper.cpp index 6218a882eb5..1d8a0223910 100644 --- a/examples/common-whisper.cpp +++ b/examples/common-whisper.cpp @@ -74,6 +74,8 @@ bool read_audio_data(const std::string & fname, std::vector& pcmf32, std: } else if (((result = ma_decoder_init_file(fname.c_str(), &decoder_config, &decoder)) != MA_SUCCESS)) { #if defined(WHISPER_FFMPEG) + fprintf(stderr, "warning: failed to read audio data from file: %s\n", ma_result_description(result)); + if (ffmpeg_decode_audio(fname, audio_data) != 0) { fprintf(stderr, "error: failed to ffmpeg decode '%s'\n", fname.c_str()); @@ -86,11 +88,9 @@ bool read_audio_data(const std::string & fname, std::vector& pcmf32, std: return false; } #else - if ((result = ma_decoder_init_memory(fname.c_str(), fname.size(), &decoder_config, &decoder)) != MA_SUCCESS) { - fprintf(stderr, "error: failed to read audio data as wav (%s)\n", ma_result_description(result)); + fprintf(stderr, "error: failed to read audio data from file (%s)\n", ma_result_description(result)); - return false; - } + return false; #endif } diff --git a/examples/common-whisper.h b/examples/common-whisper.h index 4134362150a..d1e9628d65a 100644 --- a/examples/common-whisper.h +++ b/examples/common-whisper.h @@ -5,7 +5,6 @@ #include // Read WAV audio file and store the PCM data into pcmf32 -// fname can be a buffer of WAV data instead of a filename // The sample rate of the audio must be equal to COMMON_SAMPLE_RATE // If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM bool read_audio_data( diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 901f65f6c35..6a9ef781848 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -306,6 +306,14 @@ std::string generate_temp_filename(const std::string &prefix, const std::string return ss.str(); } +std::string write_audio_to_temp_file(const MultipartFormData &audio_file, const std::string &extension) { + const std::string temp_filename = generate_temp_filename("whisper-server", extension); + std::ofstream temp_file{temp_filename, std::ios::binary}; + temp_file << audio_file.content; + temp_file.close(); + return temp_filename; +} + bool convert_to_wav(const std::string & temp_filename, std::string & error_resp) { std::ostringstream cmd_stream; std::string converted_filename_temp = temp_filename + "_temp.wav"; @@ -820,10 +828,7 @@ int main(int argc, char ** argv) { if (sparams.ffmpeg_converter) { // if file is not wav, convert to wav // write to temporary file - const std::string temp_filename = generate_temp_filename("whisper-server", ".wav"); - std::ofstream temp_file{temp_filename, std::ios::binary}; - temp_file << audio_file.content; - temp_file.close(); + const std::string temp_filename = write_audio_to_temp_file(audio_file, ".wav"); std::string error_resp = "{\"error\":\"Failed to execute ffmpeg command.\"}"; const bool is_converted = convert_to_wav(temp_filename, error_resp); @@ -844,13 +849,18 @@ int main(int argc, char ** argv) { // remove temp file std::remove(temp_filename.c_str()); } else { - if (!::read_audio_data(audio_file.content, pcmf32, pcmf32s, params.diarize)) + // Save to temporary file + const std::string temp_filename = write_audio_to_temp_file(audio_file, ".tmp"); + + if (!::read_audio_data(temp_filename, pcmf32, pcmf32s, params.diarize)) { fprintf(stderr, "error: failed to read audio data\n"); const std::string error_resp = "{\"error\":\"failed to read audio data\"}"; res.set_content(error_resp, "application/json"); + std::remove(temp_filename.c_str()); return; } + std::remove(temp_filename.c_str()); } printf("Successfully loaded %s\n", filename.c_str());