Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion MeetingAssistant.pro
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ win32 {
LIBS += -lole32 -loleaut32 -lmmdevapi
}

# 添加调试信息
QMAKE_CXXFLAGS_RELEASE += /Zi
QMAKE_LFLAGS_RELEASE += /DEBUG /OPT:REF /OPT:ICF

# You can make your code fail to compile if it uses deprecated APIs.
# In order to do so, uncomment the following line.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
Expand Down Expand Up @@ -63,4 +67,7 @@ win32 {
$$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.kws.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.kws.dll\") && \
$$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.kws.ort.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.kws.ort.dll\") && \
$$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.lu.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.lu.dll\")
}
}

# 添加 Windows 调试帮助库
LIBS += -ldbghelp
193 changes: 178 additions & 15 deletions src/azurespeechapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,22 @@ using namespace Microsoft::CognitiveServices::Speech::Audio;
AzureSpeechAPI::AzureSpeechAPI(QObject *parent)
: QObject(parent)
, isInitialized(false)
, logger(std::make_unique<Logger>())
{
LOG_INFO("AzureSpeechAPI 初始化");
}

AzureSpeechAPI::~AzureSpeechAPI()
{
LOG_INFO("AzureSpeechAPI 析构");
stopRecognitionAndTranslation();
}

void AzureSpeechAPI::initialize(const QString &subscriptionKey, const QString &region)
{
try {
LOG_INFO(QString("开始初始化 Azure Speech 服务,区域: %1").arg(region));

// 创建语音配置
speechConfig = SpeechConfig::FromSubscription(subscriptionKey.toStdString(), region.toStdString());

Expand All @@ -28,62 +33,121 @@ void AzureSpeechAPI::initialize(const QString &subscriptionKey, const QString &r
speechConfig->SetProperty(PropertyId::SpeechServiceConnection_EndSilenceTimeoutMs, "1000");

isInitialized = true;
LOG_INFO("Azure Speech 服务初始化成功");
emit statusChanged("Azure Speech服务初始化成功");
}
catch (const std::exception& e) {
emit error(QString("初始化失败: %1").arg(e.what()));
QString errorMsg = QString("初始化失败: %1").arg(e.what());
LOG_ERROR(errorMsg);
emit error(errorMsg);
}
}

void AzureSpeechAPI::startRecognitionAndTranslation(const QString &sourceLanguage, const QString &targetLanguage)
{
if (!isInitialized) {
LOG_ERROR("请先初始化Azure Speech服务");
emit error("请先初始化Azure Speech服务");
return;
}

try {
LOG_INFO(QString("开始语音识别和翻译,源语言: %1, 目标语言: %2")
.arg(sourceLanguage)
.arg(targetLanguage));

currentSourceLanguage = sourceLanguage;
currentTargetLanguage = targetLanguage;

// 创建翻译配置
translationConfig = SpeechTranslationConfig::FromSubscription(speechConfig->GetSubscriptionKey(), speechConfig->GetRegion());
if (!translationConfig) {
LOG_ERROR("创建翻译配置失败");
emit error("创建翻译配置失败");
return;
}

translationConfig->SetSpeechRecognitionLanguage(sourceLanguage.toStdString());
translationConfig->AddTargetLanguage(targetLanguage.toStdString());

// 创建音频流
audioStream = PushAudioInputStream::Create();
if (!audioStream) {
LOG_ERROR("创建音频流失败");
emit error("创建音频流失败");
return;
}

// 创建音频配置
auto audioConfig = AudioConfig::FromStreamInput(audioStream);
if (!audioConfig) {
LOG_ERROR("创建音频配置失败");
emit error("创建音频配置失败");
return;
}

// 创建识别器
recognizer = TranslationRecognizer::FromConfig(translationConfig, audioConfig);
if (!recognizer) {
LOG_ERROR("创建识别器失败");
emit error("创建识别器失败");
return;
}

// 设置事件处理
recognizer->Recognized.Connect([this](const TranslationRecognitionEventArgs& e) {
if (e.Result->Reason == ResultReason::TranslatedSpeech) {
emit recognitionResult(QString::fromStdString(e.Result->Text));

// 获取翻译结果
auto translations = e.Result->Translations;
if (translations.find(currentTargetLanguage.toStdString()) != translations.end()) {
QString translatedText = QString::fromStdString(
translations[currentTargetLanguage.toStdString()]);
emit translationResult(translatedText);
try {
if (e.Result->Reason == ResultReason::TranslatedSpeech) {
QString text = QString::fromStdString(e.Result->Text);
LOG_INFO(QString("识别结果: %1").arg(text));
emit recognitionResult(text);

// 获取翻译结果
auto translations = e.Result->Translations;
if (translations.find(currentTargetLanguage.toStdString()) != translations.end()) {
QString translatedText = QString::fromStdString(
translations[currentTargetLanguage.toStdString()]);
LOG_INFO(QString("翻译结果: %1").arg(translatedText));
emit translationResult(translatedText);
} else {
LOG_ERROR(QString("未找到目标语言 %1 的翻译结果").arg(currentTargetLanguage));
}
} else if (e.Result->Reason == ResultReason::NoMatch) {
LOG_INFO("未检测到语音");
} else if (e.Result->Reason == ResultReason::Canceled) {
LOG_ERROR("识别被取消");
}
} catch (const std::exception& ex) {
LOG_ERROR(QString("处理识别结果时发生异常: %1").arg(ex.what()));
emit error(QString("处理识别结果时发生异常: %1").arg(ex.what()));
}
});

recognizer->Canceled.Connect([this](const TranslationRecognitionCanceledEventArgs& e) {
LOG_ERROR(QString("识别取消: %1").arg(QString::fromStdString(e.ErrorDetails)));
emit error(QString("识别取消: %1").arg(QString::fromStdString(e.ErrorDetails)));
});

recognizer->SessionStarted.Connect([this](const SessionEventArgs&) {
LOG_INFO("识别会话开始");
});

recognizer->SessionStopped.Connect([this](const SessionEventArgs&) {
LOG_INFO("识别会话结束");
});

// 开始连续识别
recognizer->StartContinuousRecognitionAsync();
emit statusChanged("开始语音识别和翻译");
try {
recognizer->StartContinuousRecognitionAsync().wait();
LOG_INFO("开始语音识别和翻译");
emit statusChanged("开始语音识别和翻译");
} catch (const std::exception& e) {
LOG_ERROR(QString("启动连续识别失败: %1").arg(e.what()));
emit error(QString("启动连续识别失败: %1").arg(e.what()));
}
}
catch (const std::exception& e) {
LOG_ERROR(QString("启动识别失败: %1").arg(e.what()));
emit error(QString("启动识别失败: %1").arg(e.what()));
}
}
Expand All @@ -92,20 +156,24 @@ void AzureSpeechAPI::stopRecognitionAndTranslation()
{
if (recognizer) {
try {
recognizer->StopContinuousRecognitionAsync();
LOG_INFO("停止语音识别和翻译");
recognizer->StopContinuousRecognitionAsync().wait();
recognizer.reset();
audioStream.reset();
emit statusChanged("停止语音识别和翻译");
}
catch (const std::exception& e) {
emit error(QString("停止识别失败: %1").arg(e.what()));
QString errorMsg = QString("停止识别失败: %1").arg(e.what());
LOG_ERROR(errorMsg);
emit error(errorMsg);
}
}
}

void AzureSpeechAPI::processAudioData(const QByteArray &audioData)
{
if (!audioStream) {
LOG_ERROR("音频流未初始化");
emit error("音频流未初始化");
return;
}
Expand All @@ -114,10 +182,105 @@ void AzureSpeechAPI::processAudioData(const QByteArray &audioData)
// 将QByteArray转换为std::vector<uint8_t>
std::vector<uint8_t> audioBuffer(audioData.begin(), audioData.end());

// 写入音频数据,确保大小不超过uint32_t的最大值
if (audioBuffer.size() > UINT32_MAX) {
LOG_ERROR("音频数据块太大");
emit error("音频数据块太大");
return;
}

// 使用静态计数器来减少日志输出频率
static int logCounter = 0;
if (++logCounter % 10 == 0) { // 每10个数据块记录一次
LOG_INFO(QString("处理音频数据,大小: %1 字节").arg(audioBuffer.size()));
}

// 写入音频数据
audioStream->Write(audioBuffer.data(), audioBuffer.size());
try {
audioStream->Write(audioBuffer.data(), static_cast<uint32_t>(audioBuffer.size()));
if (logCounter % 10 == 0) {
LOG_INFO("音频数据写入成功");
}
} catch (const std::exception& e) {
LOG_ERROR(QString("写入音频数据失败: %1").arg(e.what()));
emit error(QString("写入音频数据失败: %1").arg(e.what()));
}
}
catch (const std::exception& e) {
LOG_ERROR(QString("处理音频数据失败: %1").arg(e.what()));
emit error(QString("处理音频数据失败: %1").arg(e.what()));
}
}

void AzureSpeechAPI::testConnection(const QString &key, const QString &region)
{
try {
LOG_INFO(QString("开始测试连接,区域: %1").arg(region));

// 1. 构造配置
auto config = SpeechConfig::FromSubscription(
key.toStdString(),
region.toStdString()
);

if (!config) {
QString errorMsg = "Failed to create speech config";
LOG_ERROR(errorMsg);
emit error(errorMsg);
return;
}
LOG_INFO("Speech config created successfully");

// 创建音频流
auto audioStream = PushAudioInputStream::Create();
if (!audioStream) {
QString errorMsg = "Failed to create audio stream";
LOG_ERROR(errorMsg);
emit error(errorMsg);
return;
}
LOG_INFO("Audio stream created successfully");

// 创建音频配置
auto audioConfig = AudioConfig::FromStreamInput(audioStream);
if (!audioConfig) {
QString errorMsg = "Failed to create audio config";
LOG_ERROR(errorMsg);
emit error(errorMsg);
return;
}

// 生成100ms的静音数据
std::vector<uint8_t> silenceData(16000 * 2 * 0.1); // 16kHz, 16-bit, 100ms
audioStream->Write(silenceData.data(), static_cast<uint32_t>(silenceData.size()));
audioStream->Close();
LOG_INFO("Silence data written to stream");

// 创建识别器
auto recognizer = SpeechRecognizer::FromConfig(config, audioConfig);
if (!recognizer) {
QString errorMsg = "Failed to create speech recognizer";
LOG_ERROR(errorMsg);
emit error(errorMsg);
return;
}
LOG_INFO("Speech recognizer created successfully");

// 进行识别
auto result = recognizer->RecognizeOnceAsync().get();
if (result->Reason == ResultReason::RecognizedSpeech) {
LOG_INFO("Connection test successful");
emit statusChanged("连接测试成功");
} else {
QString errorMsg = QString("Connection test failed: %1").arg(static_cast<int>(result->Reason));
LOG_ERROR(errorMsg);
emit error(errorMsg);
}
}
catch (const std::exception &e) {
QString msg = QString("连接测试异常: %1").arg(e.what());
LOG_ERROR(msg);
emit statusChanged(msg);
emit error(msg);
}
}
16 changes: 12 additions & 4 deletions src/azurespeechapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
#include <memory>
#include <speechapi_cxx.h>
#include <speechapi_cxx_translation_recognizer.h>
#include "logger.h"

using namespace Microsoft::CognitiveServices::Speech;
using namespace Microsoft::CognitiveServices::Speech::Translation;
using namespace Microsoft::CognitiveServices::Speech::Audio;

class AzureSpeechAPI : public QObject
{
Expand All @@ -28,21 +33,24 @@ class AzureSpeechAPI : public QObject
// 处理音频数据
void processAudioData(const QByteArray &audioData);

void testConnection(const QString &key, const QString &region);

signals:
void recognitionResult(const QString &text);
void translationResult(const QString &text);
void error(const QString &message);
void statusChanged(const QString &status);

private:
std::shared_ptr<Microsoft::CognitiveServices::Speech::SpeechConfig> speechConfig;
std::shared_ptr<Microsoft::CognitiveServices::Speech::Translation::SpeechTranslationConfig> translationConfig;
std::shared_ptr<Microsoft::CognitiveServices::Speech::Translation::TranslationRecognizer> recognizer;
std::shared_ptr<Microsoft::CognitiveServices::Speech::Audio::PushAudioInputStream> audioStream;
std::shared_ptr<SpeechConfig> speechConfig;
std::shared_ptr<SpeechTranslationConfig> translationConfig;
std::shared_ptr<TranslationRecognizer> recognizer;
std::shared_ptr<PushAudioInputStream> audioStream;

bool isInitialized;
QString currentSourceLanguage;
QString currentTargetLanguage;
std::unique_ptr<Logger> logger;
};

#endif // AZURESPEECHAPI_H
Loading