Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions MeetingAssistant.pro
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ SOURCES += \
src/main.cpp \
src/mainwindow.cpp \
src/audioprocessor.cpp \
src/baiduapi.cpp \
src/azurespeechapi.cpp \
src/logger.cpp \
src/wasapiaudiocapture.cpp

HEADERS += \
src/mainwindow.h \
src/audioprocessor.h \
src/baiduapi.h \
src/azurespeechapi.h \
src/logger.h \
src/wasapiaudiocapture.h

Expand All @@ -45,4 +45,22 @@ INCLUDEPATH += $$[QT_INSTALL_HEADERS]/QtNetwork
INCLUDEPATH += $$[QT_INSTALL_HEADERS]/QtMultimedia

# 添加项目源文件目录
INCLUDEPATH += src
INCLUDEPATH += src

# Azure Speech SDK (本地路径)
INCLUDEPATH += third_party/azure_speech_sdk/include/cxx_api
INCLUDEPATH += third_party/azure_speech_sdk/include/c_api

# 添加库路径
LIBS += -L$$PWD/third_party/azure_speech_sdk/lib \
-lMicrosoft.CognitiveServices.Speech.core

# 复制运行时依赖
win32 {
QMAKE_POST_LINK += $$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.core.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.core.dll\") && \
$$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.audio.sys.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.audio.sys.dll\") && \
$$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.codec.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.codec.dll\") && \
$$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.kws.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.kws.dll\") && \
$$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.kws.ort.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.kws.ort.dll\") && \
$$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.lu.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.lu.dll\")
}
123 changes: 123 additions & 0 deletions src/azurespeechapi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#include "azurespeechapi.h"
#include <QDebug>

using namespace Microsoft::CognitiveServices::Speech;
using namespace Microsoft::CognitiveServices::Speech::Translation;
using namespace Microsoft::CognitiveServices::Speech::Audio;

AzureSpeechAPI::AzureSpeechAPI(QObject *parent)
: QObject(parent)
, isInitialized(false)
{
}

AzureSpeechAPI::~AzureSpeechAPI()
{
stopRecognitionAndTranslation();
}

void AzureSpeechAPI::initialize(const QString &subscriptionKey, const QString &region)
{
try {
// 创建语音配置
speechConfig = SpeechConfig::FromSubscription(subscriptionKey.toStdString(), region.toStdString());

// 设置语音识别和翻译的默认参数
speechConfig->SetSpeechRecognitionLanguage("zh-CN");
speechConfig->SetProperty(PropertyId::SpeechServiceConnection_InitialSilenceTimeoutMs, "5000");
speechConfig->SetProperty(PropertyId::SpeechServiceConnection_EndSilenceTimeoutMs, "1000");

isInitialized = true;
emit statusChanged("Azure Speech服务初始化成功");
}
catch (const std::exception& e) {
emit error(QString("初始化失败: %1").arg(e.what()));
}
}

void AzureSpeechAPI::startRecognitionAndTranslation(const QString &sourceLanguage, const QString &targetLanguage)
{
if (!isInitialized) {
emit error("请先初始化Azure Speech服务");
return;
}

try {
currentSourceLanguage = sourceLanguage;
currentTargetLanguage = targetLanguage;

// 创建翻译配置
translationConfig = SpeechTranslationConfig::FromSubscription(speechConfig->GetSubscriptionKey(), speechConfig->GetRegion());
translationConfig->SetSpeechRecognitionLanguage(sourceLanguage.toStdString());
translationConfig->AddTargetLanguage(targetLanguage.toStdString());

// 创建音频流
audioStream = PushAudioInputStream::Create();

// 创建音频配置
auto audioConfig = AudioConfig::FromStreamInput(audioStream);

// 创建识别器
recognizer = TranslationRecognizer::FromConfig(translationConfig, audioConfig);

// 设置事件处理
recognizer->Recognized.Connect([this](const TranslationRecognitionEventArgs& e) {
if (e.Result->Reason == ResultReason::TranslatedSpeech) {
emit recognitionResult(QString::fromStdString(e.Result->Text));

// 获取翻译结果
auto translations = e.Result->Translations;
if (translations.find(currentTargetLanguage.toStdString()) != translations.end()) {
QString translatedText = QString::fromStdString(
translations[currentTargetLanguage.toStdString()]);
emit translationResult(translatedText);
}
}
});

recognizer->Canceled.Connect([this](const TranslationRecognitionCanceledEventArgs& e) {
emit error(QString("识别取消: %1").arg(QString::fromStdString(e.ErrorDetails)));
});

// 开始连续识别
recognizer->StartContinuousRecognitionAsync();
emit statusChanged("开始语音识别和翻译");
}
catch (const std::exception& e) {
emit error(QString("启动识别失败: %1").arg(e.what()));
}
}

void AzureSpeechAPI::stopRecognitionAndTranslation()
{
if (recognizer) {
try {
recognizer->StopContinuousRecognitionAsync();
recognizer.reset();
audioStream.reset();
emit statusChanged("停止语音识别和翻译");
}
catch (const std::exception& e) {
emit error(QString("停止识别失败: %1").arg(e.what()));
}
}
}

void AzureSpeechAPI::processAudioData(const QByteArray &audioData)
{
if (!audioStream) {
emit error("音频流未初始化");
return;
}

try {
// 将QByteArray转换为std::vector<uint8_t>
std::vector<uint8_t> audioBuffer(audioData.begin(), audioData.end());

// 写入音频数据
audioStream->Write(audioBuffer.data(), audioBuffer.size());
}
catch (const std::exception& e) {
emit error(QString("处理音频数据失败: %1").arg(e.what()));
}
}
48 changes: 48 additions & 0 deletions src/azurespeechapi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#ifndef AZURESPEECHAPI_H
#define AZURESPEECHAPI_H

#include <QObject>
#include <QString>
#include <QByteArray>
#include <memory>
#include <speechapi_cxx.h>
#include <speechapi_cxx_translation_recognizer.h>

class AzureSpeechAPI : public QObject
{
Q_OBJECT

public:
explicit AzureSpeechAPI(QObject *parent = nullptr);
~AzureSpeechAPI();

// 初始化Azure Speech服务
void initialize(const QString &subscriptionKey, const QString &region);

// 开始语音识别和翻译
void startRecognitionAndTranslation(const QString &sourceLanguage, const QString &targetLanguage);

// 停止语音识别和翻译
void stopRecognitionAndTranslation();

// 处理音频数据
void processAudioData(const QByteArray &audioData);

signals:
void recognitionResult(const QString &text);
void translationResult(const QString &text);
void error(const QString &message);
void statusChanged(const QString &status);

private:
std::shared_ptr<Microsoft::CognitiveServices::Speech::SpeechConfig> speechConfig;
std::shared_ptr<Microsoft::CognitiveServices::Speech::Translation::SpeechTranslationConfig> translationConfig;
std::shared_ptr<Microsoft::CognitiveServices::Speech::Translation::TranslationRecognizer> recognizer;
std::shared_ptr<Microsoft::CognitiveServices::Speech::Audio::PushAudioInputStream> audioStream;

bool isInitialized;
QString currentSourceLanguage;
QString currentTargetLanguage;
};

#endif // AZURESPEECHAPI_H
Loading