diff --git a/MeetingAssistant.pro b/MeetingAssistant.pro index ab3e1ae..0c7139f 100644 --- a/MeetingAssistant.pro +++ b/MeetingAssistant.pro @@ -17,14 +17,14 @@ SOURCES += \ src/main.cpp \ src/mainwindow.cpp \ src/audioprocessor.cpp \ - src/baiduapi.cpp \ + src/azurespeechapi.cpp \ src/logger.cpp \ src/wasapiaudiocapture.cpp HEADERS += \ src/mainwindow.h \ src/audioprocessor.h \ - src/baiduapi.h \ + src/azurespeechapi.h \ src/logger.h \ src/wasapiaudiocapture.h @@ -45,4 +45,22 @@ INCLUDEPATH += $$[QT_INSTALL_HEADERS]/QtNetwork INCLUDEPATH += $$[QT_INSTALL_HEADERS]/QtMultimedia # 添加项目源文件目录 -INCLUDEPATH += src \ No newline at end of file +INCLUDEPATH += src + +# Azure Speech SDK (本地路径) +INCLUDEPATH += third_party/azure_speech_sdk/include/cxx_api +INCLUDEPATH += third_party/azure_speech_sdk/include/c_api + +# 添加库路径 +LIBS += -L$$PWD/third_party/azure_speech_sdk/lib \ + -lMicrosoft.CognitiveServices.Speech.core + +# 复制运行时依赖 +win32 { + QMAKE_POST_LINK += $$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.core.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.core.dll\") && \ + $$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.audio.sys.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.audio.sys.dll\") && \ + $$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.codec.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.codec.dll\") && \ + $$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.kws.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.kws.dll\") && \ + $$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.kws.ort.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.kws.ort.dll\") && \ + $$quote(cmd /c copy /Y \"$$PWD\\third_party\\azure_speech_sdk\\bin\\Microsoft.CognitiveServices.Speech.extension.lu.dll\" \"$$OUT_PWD\\release\\Microsoft.CognitiveServices.Speech.extension.lu.dll\") +} \ No newline at end of file diff --git a/src/azurespeechapi.cpp b/src/azurespeechapi.cpp new file mode 100644 index 0000000..bb137a0 --- /dev/null +++ b/src/azurespeechapi.cpp @@ -0,0 +1,123 @@ +#include "azurespeechapi.h" +#include + +using namespace Microsoft::CognitiveServices::Speech; +using namespace Microsoft::CognitiveServices::Speech::Translation; +using namespace Microsoft::CognitiveServices::Speech::Audio; + +AzureSpeechAPI::AzureSpeechAPI(QObject *parent) + : QObject(parent) + , isInitialized(false) +{ +} + +AzureSpeechAPI::~AzureSpeechAPI() +{ + stopRecognitionAndTranslation(); +} + +void AzureSpeechAPI::initialize(const QString &subscriptionKey, const QString ®ion) +{ + try { + // 创建语音配置 + speechConfig = SpeechConfig::FromSubscription(subscriptionKey.toStdString(), region.toStdString()); + + // 设置语音识别和翻译的默认参数 + speechConfig->SetSpeechRecognitionLanguage("zh-CN"); + speechConfig->SetProperty(PropertyId::SpeechServiceConnection_InitialSilenceTimeoutMs, "5000"); + speechConfig->SetProperty(PropertyId::SpeechServiceConnection_EndSilenceTimeoutMs, "1000"); + + isInitialized = true; + emit statusChanged("Azure Speech服务初始化成功"); + } + catch (const std::exception& e) { + emit error(QString("初始化失败: %1").arg(e.what())); + } +} + +void AzureSpeechAPI::startRecognitionAndTranslation(const QString &sourceLanguage, const QString &targetLanguage) +{ + if (!isInitialized) { + emit error("请先初始化Azure Speech服务"); + return; + } + + try { + currentSourceLanguage = sourceLanguage; + currentTargetLanguage = targetLanguage; + + // 创建翻译配置 + translationConfig = SpeechTranslationConfig::FromSubscription(speechConfig->GetSubscriptionKey(), speechConfig->GetRegion()); + translationConfig->SetSpeechRecognitionLanguage(sourceLanguage.toStdString()); + translationConfig->AddTargetLanguage(targetLanguage.toStdString()); + + // 创建音频流 + audioStream = PushAudioInputStream::Create(); + + // 创建音频配置 + auto audioConfig = AudioConfig::FromStreamInput(audioStream); + + // 创建识别器 + recognizer = TranslationRecognizer::FromConfig(translationConfig, audioConfig); + + // 设置事件处理 + recognizer->Recognized.Connect([this](const TranslationRecognitionEventArgs& e) { + if (e.Result->Reason == ResultReason::TranslatedSpeech) { + emit recognitionResult(QString::fromStdString(e.Result->Text)); + + // 获取翻译结果 + auto translations = e.Result->Translations; + if (translations.find(currentTargetLanguage.toStdString()) != translations.end()) { + QString translatedText = QString::fromStdString( + translations[currentTargetLanguage.toStdString()]); + emit translationResult(translatedText); + } + } + }); + + recognizer->Canceled.Connect([this](const TranslationRecognitionCanceledEventArgs& e) { + emit error(QString("识别取消: %1").arg(QString::fromStdString(e.ErrorDetails))); + }); + + // 开始连续识别 + recognizer->StartContinuousRecognitionAsync(); + emit statusChanged("开始语音识别和翻译"); + } + catch (const std::exception& e) { + emit error(QString("启动识别失败: %1").arg(e.what())); + } +} + +void AzureSpeechAPI::stopRecognitionAndTranslation() +{ + if (recognizer) { + try { + recognizer->StopContinuousRecognitionAsync(); + recognizer.reset(); + audioStream.reset(); + emit statusChanged("停止语音识别和翻译"); + } + catch (const std::exception& e) { + emit error(QString("停止识别失败: %1").arg(e.what())); + } + } +} + +void AzureSpeechAPI::processAudioData(const QByteArray &audioData) +{ + if (!audioStream) { + emit error("音频流未初始化"); + return; + } + + try { + // 将QByteArray转换为std::vector + std::vector audioBuffer(audioData.begin(), audioData.end()); + + // 写入音频数据 + audioStream->Write(audioBuffer.data(), audioBuffer.size()); + } + catch (const std::exception& e) { + emit error(QString("处理音频数据失败: %1").arg(e.what())); + } +} \ No newline at end of file diff --git a/src/azurespeechapi.h b/src/azurespeechapi.h new file mode 100644 index 0000000..4776465 --- /dev/null +++ b/src/azurespeechapi.h @@ -0,0 +1,48 @@ +#ifndef AZURESPEECHAPI_H +#define AZURESPEECHAPI_H + +#include +#include +#include +#include +#include +#include + +class AzureSpeechAPI : public QObject +{ + Q_OBJECT + +public: + explicit AzureSpeechAPI(QObject *parent = nullptr); + ~AzureSpeechAPI(); + + // 初始化Azure Speech服务 + void initialize(const QString &subscriptionKey, const QString ®ion); + + // 开始语音识别和翻译 + void startRecognitionAndTranslation(const QString &sourceLanguage, const QString &targetLanguage); + + // 停止语音识别和翻译 + void stopRecognitionAndTranslation(); + + // 处理音频数据 + void processAudioData(const QByteArray &audioData); + +signals: + void recognitionResult(const QString &text); + void translationResult(const QString &text); + void error(const QString &message); + void statusChanged(const QString &status); + +private: + std::shared_ptr speechConfig; + std::shared_ptr translationConfig; + std::shared_ptr recognizer; + std::shared_ptr audioStream; + + bool isInitialized; + QString currentSourceLanguage; + QString currentTargetLanguage; +}; + +#endif // AZURESPEECHAPI_H \ No newline at end of file diff --git a/src/baiduapi.cpp b/src/baiduapi.cpp deleted file mode 100644 index e8390d7..0000000 --- a/src/baiduapi.cpp +++ /dev/null @@ -1,206 +0,0 @@ -#include "baiduapi.h" -#include -#include -#include -#include -#include -#include -#include -#include - -BaiduAPI::BaiduAPI(QObject *parent) : QObject(parent) -{ - networkManager = new QNetworkAccessManager(this); -} - -BaiduAPI::~BaiduAPI() -{ - delete networkManager; -} - -void BaiduAPI::testConnection(const QString &appId, const QString &apiKey) -{ - currentAppId = appId; - currentApiKey = apiKey; - - // 生成随机数 - QString salt = QString::number(QRandomGenerator::global()->bounded(1000000)); - // 生成签名 - QString signStr = appId + "test" + salt + apiKey; - QByteArray signBytes = QCryptographicHash::hash(signStr.toUtf8(), QCryptographicHash::Md5); - QString sign = signBytes.toHex(); - - QUrl url("https://fanyi-api.baidu.com/api/trans/vip/translate"); - QUrlQuery query; - query.addQueryItem("q", "test"); - query.addQueryItem("from", "auto"); - query.addQueryItem("to", "zh"); - query.addQueryItem("appid", appId); - query.addQueryItem("salt", salt); - query.addQueryItem("sign", sign); - url.setQuery(query); - - QNetworkRequest request(url); - request.setHeader(QNetworkRequest::ContentTypeHeader, "application/x-www-form-urlencoded"); - request.setHeader(QNetworkRequest::UserAgentHeader, "MeetingAssistant/1.0"); - - QNetworkReply *reply = networkManager->get(request); - - connect(reply, &QNetworkReply::finished, [=]() { - if (reply->error() == QNetworkReply::NoError) { - QByteArray response = reply->readAll(); - QJsonDocument doc = QJsonDocument::fromJson(response); - QJsonObject obj = doc.object(); - - if (obj.contains("trans_result")) { - emit testResult(true, "API连接测试成功"); - } else if (obj.contains("error_code")) { - QString errorMsg = QString("API连接测试失败:%1 - %2") - .arg(obj["error_code"].toString()) - .arg(obj["error_msg"].toString()); - emit testResult(false, errorMsg); - } else { - emit testResult(false, "API连接测试失败:未知错误"); - } - } else { - emit testResult(false, "网络错误: " + reply->errorString()); - } - reply->deleteLater(); - }); -} - -void BaiduAPI::getAccessToken(const QString &appId, const QString &apiKey) -{ - QUrl url("https://aip.baidubce.com/oauth/2.0/token"); - QUrlQuery query; - query.addQueryItem("grant_type", "client_credentials"); - query.addQueryItem("client_id", appId); - query.addQueryItem("client_secret", apiKey); - url.setQuery(query); - - QNetworkRequest request(url); - request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json"); - - QNetworkReply *reply = networkManager->post(request, QByteArray()); - - connect(reply, &QNetworkReply::finished, [=]() { - if (reply->error() == QNetworkReply::NoError) { - QByteArray response = reply->readAll(); - QJsonDocument doc = QJsonDocument::fromJson(response); - QJsonObject obj = doc.object(); - - if (obj.contains("access_token")) { - accessToken = obj["access_token"].toString(); - emit testResult(true, "API连接测试成功"); - } else { - emit testResult(false, "获取访问令牌失败: " + obj["error_description"].toString()); - } - } else { - emit testResult(false, "网络错误: " + reply->errorString()); - } - reply->deleteLater(); - }); -} - -void BaiduAPI::recognizeSpeech(const QByteArray &audioData) -{ - if (accessToken.isEmpty()) { - emit error("未获取访问令牌,请先测试API连接"); - return; - } - - QUrl url("https://vop.baidu.com/server_api"); - QNetworkRequest request(url); - request.setHeader(QNetworkRequest::ContentTypeHeader, "application/json"); - - QJsonObject json; - json["format"] = "pcm"; - json["rate"] = 16000; - json["channel"] = 1; - json["token"] = accessToken; - json["cuid"] = "MeetingAssistant"; - json["len"] = audioData.size(); - json["speech"] = QString(audioData.toBase64()); - - QNetworkReply *reply = networkManager->post(request, QJsonDocument(json).toJson()); - - connect(reply, &QNetworkReply::finished, [=]() { - if (reply->error() == QNetworkReply::NoError) { - QByteArray response = reply->readAll(); - QJsonDocument doc = QJsonDocument::fromJson(response); - QJsonObject obj = doc.object(); - - if (obj.contains("result")) { - QJsonArray results = obj["result"].toArray(); - if (!results.isEmpty()) { - emit recognitionResult(results[0].toString()); - } - } else { - emit error("语音识别失败: " + obj["err_msg"].toString()); - } - } else { - handleNetworkError(reply); - } - reply->deleteLater(); - }); -} - -void BaiduAPI::translateText(const QString &text, const QString &from, const QString &to) -{ - if (currentAppId.isEmpty() || currentApiKey.isEmpty()) { - emit error("未设置APP ID或API Key"); - return; - } - - // 生成随机数 - QString salt = QString::number(QRandomGenerator::global()->bounded(1000000)); - // 生成签名 - QString signStr = currentAppId + text + salt + currentApiKey; - QByteArray signBytes = QCryptographicHash::hash(signStr.toUtf8(), QCryptographicHash::Md5); - QString sign = signBytes.toHex(); - - QUrl url("https://fanyi-api.baidu.com/api/trans/vip/translate"); - QUrlQuery query; - query.addQueryItem("q", text); - query.addQueryItem("from", from); - query.addQueryItem("to", to); - query.addQueryItem("appid", currentAppId); - query.addQueryItem("salt", salt); - query.addQueryItem("sign", sign); - url.setQuery(query); - - QNetworkRequest request(url); - request.setHeader(QNetworkRequest::ContentTypeHeader, "application/x-www-form-urlencoded"); - request.setHeader(QNetworkRequest::UserAgentHeader, "MeetingAssistant/1.0"); - - QNetworkReply *reply = networkManager->get(request); - - connect(reply, &QNetworkReply::finished, [=]() { - if (reply->error() == QNetworkReply::NoError) { - QByteArray response = reply->readAll(); - QJsonDocument doc = QJsonDocument::fromJson(response); - QJsonObject obj = doc.object(); - - if (obj.contains("trans_result")) { - QJsonArray results = obj["trans_result"].toArray(); - if (!results.isEmpty()) { - QJsonObject result = results[0].toObject(); - emit translationResult(result["dst"].toString()); - } - } else if (obj.contains("error_code")) { - QString errorMsg = QString("翻译失败:%1 - %2") - .arg(obj["error_code"].toString()) - .arg(obj["error_msg"].toString()); - emit error(errorMsg); - } - } else { - handleNetworkError(reply); - } - reply->deleteLater(); - }); -} - -void BaiduAPI::handleNetworkError(QNetworkReply *reply) -{ - emit error("网络错误: " + reply->errorString()); -} \ No newline at end of file diff --git a/src/baiduapi.h b/src/baiduapi.h deleted file mode 100644 index 2f0c0c4..0000000 --- a/src/baiduapi.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef BAIDUAPI_H -#define BAIDUAPI_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -class BaiduAPI : public QObject -{ - Q_OBJECT - -public: - explicit BaiduAPI(QObject *parent = nullptr); - ~BaiduAPI(); - - // 测试API连接 - void testConnection(const QString &appId, const QString &apiKey); - - // 语音识别 - void recognizeSpeech(const QByteArray &audioData); - - // 翻译文本 - void translateText(const QString &text, const QString &from = "auto", const QString &to = "zh"); - -signals: - void testResult(bool success, const QString &message); - void recognitionResult(const QString &text); - void translationResult(const QString &text); - void error(const QString &message); - -private: - QNetworkAccessManager *networkManager; - QString accessToken; - QString currentAppId; - QString currentApiKey; - - void getAccessToken(const QString &appId, const QString &apiKey); - QString generateSign(const QString &appId, const QString &apiKey); - void handleNetworkError(QNetworkReply *reply); -}; - -#endif // BAIDUAPI_H \ No newline at end of file diff --git a/src/mainwindow.cpp b/src/mainwindow.cpp index b90034e..ae829cf 100644 --- a/src/mainwindow.cpp +++ b/src/mainwindow.cpp @@ -1,253 +1,124 @@ #include "mainwindow.h" -#include -#include +#include "./ui_mainwindow.h" #include -#include -#include -#include -#include -#include -#include -#include -#include "logger.h" MainWindow::MainWindow(QWidget *parent) : QMainWindow(parent) - , isRecording(false) + , ui(new Ui::MainWindow) + , audioProcessor(new AudioProcessor(this)) + , azureSpeechApi(new AzureSpeechAPI(this)) { - setupUI(); - loadConfig(); - updateButtonState(); - - audioProcessor = new AudioProcessor(this); - baiduApi = new BaiduAPI(this); + ui->setupUi(this); + // 连接信号和槽 connect(audioProcessor, &AudioProcessor::audioDataReceived, this, &MainWindow::onAudioDataReceived); - connect(baiduApi, &BaiduAPI::recognitionResult, + + connect(azureSpeechApi, &AzureSpeechAPI::recognitionResult, this, &MainWindow::onRecognitionResult); - connect(baiduApi, &BaiduAPI::translationResult, + connect(azureSpeechApi, &AzureSpeechAPI::translationResult, this, &MainWindow::onTranslationResult); - connect(baiduApi, &BaiduAPI::error, - this, &MainWindow::onApiError); - connect(baiduApi, &BaiduAPI::testResult, - this, &MainWindow::onApiTestResult); + connect(azureSpeechApi, &AzureSpeechAPI::error, + this, &MainWindow::onError); + connect(azureSpeechApi, &AzureSpeechAPI::statusChanged, + this, &MainWindow::onStatusChanged); + + // 连接按钮信号 + connect(ui->startButton, &QPushButton::clicked, + this, &MainWindow::onStartButtonClicked); + connect(ui->stopButton, &QPushButton::clicked, + this, &MainWindow::onStopButtonClicked); + connect(ui->testConnectionButton, &QPushButton::clicked, + this, &MainWindow::onTestConnectionButtonClicked); + + // 初始化UI状态 + ui->stopButton->setEnabled(false); } MainWindow::~MainWindow() { + delete ui; delete audioProcessor; - delete baiduApi; + delete azureSpeechApi; } -void MainWindow::setupUI() +void MainWindow::onStartButtonClicked() { - resize(800, 600); - setWindowTitle("Meeting Assistant"); - - QWidget *centralWidget = new QWidget(this); - setCentralWidget(centralWidget); - - QVBoxLayout *mainLayout = new QVBoxLayout(centralWidget); - mainLayout->setSpacing(10); - mainLayout->setContentsMargins(10, 10, 10, 10); - - // 配置区域 - QHBoxLayout *configLayout = new QHBoxLayout(); - configLayout->setSpacing(10); - - QLabel *appIdLabel = new QLabel("APP ID:", this); - appIdInput = new QLineEdit(this); - appIdInput->setMinimumWidth(150); - - QLabel *apiKeyLabel = new QLabel("API Key:", this); - apiKeyInput = new QLineEdit(this); - apiKeyInput->setMinimumWidth(150); - apiKeyInput->setEchoMode(QLineEdit::Password); - - saveConfigButton = new QPushButton("保存配置", this); - testApiButton = new QPushButton("测试API", this); - - configLayout->addWidget(appIdLabel); - configLayout->addWidget(appIdInput); - configLayout->addWidget(apiKeyLabel); - configLayout->addWidget(apiKeyInput); - configLayout->addWidget(saveConfigButton); - configLayout->addWidget(testApiButton); - configLayout->addStretch(); - - mainLayout->addLayout(configLayout); - - // 字幕显示区域 - subtitleDisplay = new QTextEdit(this); - subtitleDisplay->setMinimumHeight(400); - subtitleDisplay->setReadOnly(true); - subtitleDisplay->setFont(QFont("Arial", 16)); - mainLayout->addWidget(subtitleDisplay); + currentAppId = ui->appIdInput->text(); + currentApiKey = ui->appIdInput->text(); + currentRegion = ui->regionInput->text(); - // 控制按钮区域 - QHBoxLayout *controlLayout = new QHBoxLayout(); - controlLayout->setSpacing(10); - - startStopButton = new QPushButton("开始", this); - startStopButton->setMinimumWidth(80); - clearButton = new QPushButton("清除", this); - clearButton->setMinimumWidth(80); - - controlLayout->addWidget(startStopButton); - controlLayout->addWidget(clearButton); - controlLayout->addStretch(); - - mainLayout->addLayout(controlLayout); - - // 状态栏 - statusBar = new QStatusBar(this); - setStatusBar(statusBar); - - // 连接信号和槽 - connect(startStopButton, &QPushButton::clicked, this, &MainWindow::onStartStopClicked); - connect(clearButton, &QPushButton::clicked, this, &MainWindow::onClearClicked); - connect(saveConfigButton, &QPushButton::clicked, this, &MainWindow::onSaveConfigClicked); - connect(testApiButton, &QPushButton::clicked, this, &MainWindow::onTestApiClicked); -} - -void MainWindow::onStartStopClicked() -{ - if (!isRecording) { - if (audioProcessor->startRecording()) { - isRecording = true; - startStopButton->setText("停止"); - statusBar->showMessage("正在录音..."); - Logger::instance().log("开始录音"); - } else { - QMessageBox::critical(this, "错误", "无法启动录音设备"); - Logger::instance().log("无法启动录音设备", "ERROR"); - } - } else { - audioProcessor->stopRecording(); - isRecording = false; - startStopButton->setText("开始"); - statusBar->showMessage("就绪"); - Logger::instance().log("停止录音"); + if (currentAppId.isEmpty() || currentApiKey.isEmpty() || currentRegion.isEmpty()) { + QMessageBox::warning(this, "错误", "请填写完整的Azure Speech服务配置信息"); + return; } -} - -void MainWindow::onClearClicked() -{ - subtitleDisplay->clear(); -} - -void MainWindow::onSaveConfigClicked() -{ - saveConfig(); - QMessageBox::information(this, "成功", "配置已保存"); - Logger::instance().log("配置已保存"); -} - -void MainWindow::onTestApiClicked() -{ - QString appId = appIdInput->text().trimmed(); - QString apiKey = apiKeyInput->text().trimmed(); - if (appId.isEmpty() || apiKey.isEmpty()) { - QMessageBox::warning(this, "警告", "请输入APP ID和API Key"); - Logger::instance().log("API测试失败:未输入APP ID或API Key", "WARNING"); + // 初始化Azure Speech服务 + azureSpeechApi->initialize(currentApiKey, currentRegion); + + // 开始语音识别和翻译 + azureSpeechApi->startRecognitionAndTranslation("zh-CN", "en-US"); + + // 开始音频处理 + audioProcessor->startRecording(); + + // 更新UI状态 + ui->startButton->setEnabled(false); + ui->stopButton->setEnabled(true); + ui->testConnectionButton->setEnabled(false); +} + +void MainWindow::onStopButtonClicked() +{ + // 停止音频处理 + audioProcessor->stopRecording(); + + // 停止语音识别和翻译 + azureSpeechApi->stopRecognitionAndTranslation(); + + // 更新UI状态 + ui->startButton->setEnabled(true); + ui->stopButton->setEnabled(false); + ui->testConnectionButton->setEnabled(true); +} + +void MainWindow::onTestConnectionButtonClicked() +{ + currentAppId = ui->appIdInput->text(); + currentApiKey = ui->appIdInput->text(); + currentRegion = ui->regionInput->text(); + + if (currentAppId.isEmpty() || currentApiKey.isEmpty() || currentRegion.isEmpty()) { + QMessageBox::warning(this, "错误", "请填写完整的Azure Speech服务配置信息"); return; } - statusBar->showMessage("正在测试API连接..."); - Logger::instance().log("开始测试API连接"); - baiduApi->testConnection(appId, apiKey); + // 初始化Azure Speech服务 + azureSpeechApi->initialize(currentApiKey, currentRegion); } void MainWindow::onAudioDataReceived(const QByteArray &data) { - baiduApi->recognizeSpeech(data); + azureSpeechApi->processAudioData(data); } void MainWindow::onRecognitionResult(const QString &text) { - appendSubtitle(text); - baiduApi->translateText(text); + ui->recognitionText->append(text); } void MainWindow::onTranslationResult(const QString &text) { - appendSubtitle(text); + ui->translationText->append(text); } -void MainWindow::onApiError(const QString &message) +void MainWindow::onError(const QString &message) { - statusBar->showMessage(message); QMessageBox::warning(this, "错误", message); - Logger::instance().log("API错误:" + message, "ERROR"); -} - -void MainWindow::onApiTestResult(bool success, const QString &message) -{ - if (success) { - statusBar->showMessage("API连接测试成功"); - QMessageBox::information(this, "成功", message); - Logger::instance().log("API连接测试成功:" + message); - } else { - statusBar->showMessage("API连接测试失败"); - QMessageBox::warning(this, "错误", message); - Logger::instance().log("API连接测试失败:" + message, "ERROR"); - } -} - -void MainWindow::loadConfig() -{ - QFile file(getConfigPath()); - if (file.open(QIODevice::ReadOnly)) { - QJsonDocument doc = QJsonDocument::fromJson(file.readAll()); - QJsonObject obj = doc.object(); - - if (obj.contains("appId")) { - appIdInput->setText(obj["appId"].toString()); - } - if (obj.contains("apiKey")) { - apiKeyInput->setText(obj["apiKey"].toString()); - } - - file.close(); - Logger::instance().log("成功加载配置文件"); - } else { - Logger::instance().log("无法打开配置文件", "WARNING"); - } -} - -void MainWindow::saveConfig() -{ - QJsonObject obj; - obj["appId"] = appIdInput->text(); - obj["apiKey"] = apiKeyInput->text(); - - QFile file(getConfigPath()); - if (file.open(QIODevice::WriteOnly)) { - file.write(QJsonDocument(obj).toJson()); - file.close(); - Logger::instance().log("成功保存配置文件"); - } else { - QMessageBox::warning(this, "错误", "无法保存配置文件"); - Logger::instance().log("无法保存配置文件", "ERROR"); - } -} - -void MainWindow::updateButtonState() -{ - bool hasConfig = !appIdInput->text().isEmpty() && !apiKeyInput->text().isEmpty(); - startStopButton->setEnabled(hasConfig); -} - -void MainWindow::appendSubtitle(const QString &text) -{ - subtitleDisplay->append(text); + ui->statusBar->showMessage(message); } -QString MainWindow::getConfigPath() const +void MainWindow::onStatusChanged(const QString &status) { - // 获取应用程序可执行文件所在目录 - QString exePath = QCoreApplication::applicationDirPath(); - return exePath + "/MeetingAssistant.json"; + ui->statusBar->showMessage(status); } \ No newline at end of file diff --git a/src/mainwindow.h b/src/mainwindow.h index 93dd6ea..74db725 100644 --- a/src/mainwindow.h +++ b/src/mainwindow.h @@ -2,24 +2,16 @@ #define MAINWINDOW_H #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include "audioprocessor.h" -#include "baiduapi.h" +#include "azurespeechapi.h" -namespace Ui { -class MainWindow; -} +QT_BEGIN_NAMESPACE +namespace Ui { class MainWindow; } +QT_END_NAMESPACE class MainWindow : public QMainWindow { @@ -30,35 +22,22 @@ class MainWindow : public QMainWindow ~MainWindow(); private slots: - void onStartStopClicked(); - void onClearClicked(); - void onSaveConfigClicked(); - void onTestApiClicked(); + void onStartButtonClicked(); + void onStopButtonClicked(); + void onTestConnectionButtonClicked(); void onAudioDataReceived(const QByteArray &data); void onRecognitionResult(const QString &text); void onTranslationResult(const QString &text); - void onApiError(const QString &message); - void onApiTestResult(bool success, const QString &message); + void onError(const QString &message); + void onStatusChanged(const QString &status); private: - void setupUI(); - void loadConfig(); - void saveConfig(); - void updateButtonState(); - void appendSubtitle(const QString &text); - QString getConfigPath() const; - - QLineEdit *appIdInput; - QLineEdit *apiKeyInput; - QPushButton *saveConfigButton; - QPushButton *testApiButton; - QPushButton *startStopButton; - QPushButton *clearButton; - QTextEdit *subtitleDisplay; - QStatusBar *statusBar; + Ui::MainWindow *ui; AudioProcessor *audioProcessor; - BaiduAPI *baiduApi; - bool isRecording; + AzureSpeechAPI *azureSpeechApi; + QString currentAppId; + QString currentApiKey; + QString currentRegion; }; #endif // MAINWINDOW_H \ No newline at end of file diff --git a/src/mainwindow.ui b/src/mainwindow.ui index 10b4f80..e16763c 100644 --- a/src/mainwindow.ui +++ b/src/mainwindow.ui @@ -15,129 +15,72 @@ - - 10 - - - 10 - - - 10 - - - 10 - - - 10 - - - - 10 - + - + - APP ID: + 订阅密钥: - - - 150 - - + - + - API Key: + 区域: - - - 150 - - + - + - 保存配置 + 测试连接 - - - - 测试API - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - - - 0 - 400 - - - - - Arial - 16 - - - - true + + + Qt::Vertical + + + true + + + + + true + + - - - 10 - + - - - 80 - + 开始 - - - 80 - + - 清除 + 停止 - + Qt::Horizontal diff --git a/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.core.dll b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.core.dll new file mode 100644 index 0000000..47b8200 Binary files /dev/null and b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.core.dll differ diff --git a/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.audio.sys.dll b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.audio.sys.dll new file mode 100644 index 0000000..a598fba Binary files /dev/null and b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.audio.sys.dll differ diff --git a/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.codec.dll b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.codec.dll new file mode 100644 index 0000000..c4407e9 Binary files /dev/null and b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.codec.dll differ diff --git a/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.kws.dll b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.kws.dll new file mode 100644 index 0000000..4f64ae0 Binary files /dev/null and b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.kws.dll differ diff --git a/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.kws.ort.dll b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.kws.ort.dll new file mode 100644 index 0000000..2de3280 Binary files /dev/null and b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.kws.ort.dll differ diff --git a/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.lu.dll b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.lu.dll new file mode 100644 index 0000000..6c0e633 Binary files /dev/null and b/third_party/azure_speech_sdk/bin/Microsoft.CognitiveServices.Speech.extension.lu.dll differ diff --git a/third_party/azure_speech_sdk/include/c_api/CMakeLists.txt b/third_party/azure_speech_sdk/include/c_api/CMakeLists.txt new file mode 100644 index 0000000..725b6bc --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/CMakeLists.txt @@ -0,0 +1,8 @@ +cmake_minimum_required(VERSION 3.19) + +project(c_headers) + +set(SRC_DIR "${PROJECT_SOURCE_DIR}") +add_library(${PROJECT_NAME} INTERFACE ${SPEECH_C_API_HEADERS}) +target_include_directories(${PROJECT_NAME} INTERFACE ${PROJECT_SOURCE_DIR}) +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER api) diff --git a/third_party/azure_speech_sdk/include/c_api/azac_api_c_common.h b/third_party/azure_speech_sdk/include/c_api/azac_api_c_common.h new file mode 100644 index 0000000..036915b --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/azac_api_c_common.h @@ -0,0 +1,79 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once + +#include +#include + +#ifdef __cplusplus +#define AZAC_EXTERN_C extern "C" +#else +#define AZAC_EXTERN_C +#endif + +#ifdef _WIN32 +#define AZAC_DLL_EXPORT __declspec(dllexport) +#define AZAC_DLL_IMPORT __declspec(dllimport) +#define AZAC_API_NOTHROW __declspec(nothrow) +#define AZAC_API_RESULTTYPE AZACHR +#define AZAC_API_CALLTYPE __stdcall +#define AZAC_API_VCALLTYPE __cdecl +#else +#define AZAC_DLL_EXPORT __attribute__ ((__visibility__("default"))) +#define AZAC_DLL_IMPORT +#define AZAC_API_NOTHROW __attribute__((nothrow)) +#define AZAC_API_RESULTTYPE AZACHR +#define AZAC_API_CALLTYPE +#define AZAC_API_VCALLTYPE __attribute__((cdecl)) +#endif + +#ifdef AZAC_CONFIG_EXPORTAPIS +#define AZAC_API_EXPORT AZAC_DLL_EXPORT +#endif +#ifdef AZAC_CONFIG_IMPORTAPIS +#define AZAC_API_EXPORT AZAC_DLL_IMPORT +#endif +#ifdef AZAC_CONFIG_STATIC_LINK_APIS +#define AZAC_API_EXPORT +#endif +#ifndef AZAC_API_EXPORT +#define AZAC_API_EXPORT AZAC_DLL_IMPORT +#endif + +#define AZAC_API AZAC_EXTERN_C AZAC_API_EXPORT AZAC_API_RESULTTYPE AZAC_API_NOTHROW AZAC_API_CALLTYPE +#define AZAC_API_(type) AZAC_EXTERN_C AZAC_API_EXPORT type AZAC_API_NOTHROW AZAC_API_CALLTYPE +#define AZAC_API__(type) AZAC_EXTERN_C AZAC_API_EXPORT AZAC_API_NOTHROW type AZAC_API_CALLTYPE +#define AZAC_APIV AZAC_EXTERN_C AZAC_API_EXPORT AZAC_API_NOTHROW AZAC_API_RESULTTYPE AZAC_API_VCALLTYPE +#define AZAC_APIV_(type) AZAC_EXTERN_C AZAC_API_EXPORT AZAC_API_NOTHROW type AZAC_API_VCALLTYPE +#define AZAC_API_PRIVATE AZAC_EXTERN_C AZAC_API_RESULTTYPE AZAC_API_NOTHROW AZAC_API_CALLTYPE +#define AZAC_API_PRIVATE_(type) AZAC_EXTERN_C type AZAC_API_NOTHROW AZAC_API_CALLTYPE + +struct _azac_empty {}; +typedef struct _azac_empty* _azachandle; +typedef _azachandle AZAC_HANDLE; + +#define AZAC_HANDLE_INVALID ((AZAC_HANDLE)-1) +#define AZAC_HANDLE_RESERVED1 ((AZAC_HANDLE)+1) + +#ifndef AZAC_SUPPRESS_DIAGNOSTICS_INCLUDE_FROM_COMMON +#define AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DIAGNOSTICS +#include +#undef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DIAGNOSTICS +#endif + +#ifndef AZAC_SUPPRESS_ERROR_INCLUDE_FROM_COMMON +#define AZAC_SUPPRESS_COMMON_INCLUDE_FROM_ERROR +#include +#undef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_ERROR +#endif + +#ifndef AZAC_SUPPRESS_DEBUG_INCLUDE_FROM_COMMON +#define AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DEBUG +#include +#undef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DEBUG +#endif + +#define AZACPROPERTYBAGHANDLE AZAC_HANDLE diff --git a/third_party/azure_speech_sdk/include/c_api/azac_api_c_diagnostics.h b/third_party/azure_speech_sdk/include/c_api/azac_api_c_diagnostics.h new file mode 100644 index 0000000..2941993 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/azac_api_c_diagnostics.h @@ -0,0 +1,78 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once + +#ifndef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DIAGNOSTICS +#define AZAC_SUPPRESS_DIAGNOSTICS_INCLUDE_FROM_COMMON +#include +#undef AZAC_SUPPRESS_DIAGNOSTICS_INCLUDE_FROM_COMMON +#endif + +#include +#include + +// +// APIs to manage logging to file +// +AZAC_API diagnostics_log_start_logging(AZAC_HANDLE hpropbag, void* reserved); +AZAC_API diagnostics_log_apply_properties(AZAC_HANDLE hpropbag, void* reserved); +AZAC_API diagnostics_log_stop_logging(); + +// +// APIs to manage logging events +// +typedef void(*DIAGNOSTICS_CALLBACK_FUNC)(const char *logLine); +AZAC_API diagnostics_logmessage_set_callback(DIAGNOSTICS_CALLBACK_FUNC callback); +AZAC_API diagnostics_logmessage_set_filters(const char* filters); + +// +// APIs to managed eventSource events +// +typedef void(*DIAGNOSTICS_EVENTSOURCE_CALLBACK_FUNC)(const char *logLine, const int level); +AZAC_API diagnostics_eventsource_logmessage_set_callback(DIAGNOSTICS_EVENTSOURCE_CALLBACK_FUNC callback); +AZAC_API diagnostics_eventsource_logmessage_set_filters(const char* filters); + +// +// APIs to manage logging to memory +// +AZAC_API_(void) diagnostics_log_memory_start_logging(); +AZAC_API_(void) diagnostics_log_memory_stop_logging(); +AZAC_API_(void) diagnostics_log_memory_set_filters(const char* filters); + +// The binding layers use these to implement a dump to vector of strings or an output stream +AZAC_API_(size_t) diagnostics_log_memory_get_line_num_oldest(); +AZAC_API_(size_t) diagnostics_log_memory_get_line_num_newest(); +AZAC_API__(const char*) diagnostics_log_memory_get_line(size_t lineNum); + +// Dump to file, std out or std err with optional prefix string +AZAC_API diagnostics_log_memory_dump_to_stderr(); // This calls diagnostics_log_memory_dump(nullptr, nullptr, false, true) +AZAC_API diagnostics_log_memory_dump(const char* filename, const char* linePrefix, bool emitToStdOut, bool emitToStdErr); +AZAC_API diagnostics_log_memory_dump_on_exit(const char* filename, const char* linePrefix, bool emitToStdOut, bool emitToStdErr); + +// +// APIs to manage logging to the console +// +AZAC_API_(void) diagnostics_log_console_start_logging(bool logToStderr); +AZAC_API_(void) diagnostics_log_console_stop_logging(); +AZAC_API_(void) diagnostics_log_console_set_filters(const char* filters); + +// +// APIs to log a string +// +AZAC_API_(void) diagnostics_log_format_message(char* buffer, size_t bufferSize, int level, const char* pszTitle, const char* fileName, const int lineNumber, const char* pszFormat, va_list argptr); +AZAC_API_(void) diagnostics_log_trace_string(int level, const char* pszTitle, const char* fileName, const int lineNumber, const char* psz); +AZAC_API_(void) diagnostics_log_trace_message(int level, const char* pszTitle, const char* fileName, const int lineNumber, const char* pszFormat, ...); +AZAC_API_(void) diagnostics_log_trace_message2(int level, const char* pszTitle, const char* fileName, const int lineNumber, const char* pszFormat, va_list argptr); + +AZAC_API_(void) diagnostics_set_log_level(const char * logger, const char * level); +AZAC_API_(bool) diagnostics_is_log_level_enabled(int level); + +// +// Memory tracking API's +// +AZAC_API_(size_t) diagnostics_get_handle_count(); +AZAC_API__(const char*) diagnostics_get_handle_info(); +AZAC_API diagnostics_free_string(const char* value); diff --git a/third_party/azure_speech_sdk/include/c_api/azac_api_c_error.h b/third_party/azure_speech_sdk/include/c_api/azac_api_c_error.h new file mode 100644 index 0000000..c805c67 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/azac_api_c_error.h @@ -0,0 +1,22 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once + +#ifndef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_ERROR +#define AZAC_SUPPRESS_ERROR_INCLUDE_FROM_COMMON +#include +#undef AZAC_SUPPRESS_ERROR_INCLUDE_FROM_COMMON +#endif + +typedef const char * const_char_ptr; + +AZAC_API_(const_char_ptr) error_get_message(AZAC_HANDLE errorHandle); + +AZAC_API_(const_char_ptr) error_get_call_stack(AZAC_HANDLE errorHandle); + +AZAC_API error_get_error_code(AZAC_HANDLE errorHandle); + +AZAC_API error_release(AZAC_HANDLE errorHandle); diff --git a/third_party/azure_speech_sdk/include/c_api/azac_api_c_pal.h b/third_party/azure_speech_sdk/include/c_api/azac_api_c_pal.h new file mode 100644 index 0000000..6a33988 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/azac_api_c_pal.h @@ -0,0 +1,11 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once + +#include "azac_api_c_common.h" + +AZAC_API_(size_t) pal_wstring_to_string(char * dst, const wchar_t * src, size_t dstSize); +AZAC_API_(size_t) pal_string_to_wstring(wchar_t * dst, const char * src, size_t dstSize); \ No newline at end of file diff --git a/third_party/azure_speech_sdk/include/c_api/azac_debug.h b/third_party/azure_speech_sdk/include/c_api/azac_debug.h new file mode 100644 index 0000000..c7ab02d --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/azac_debug.h @@ -0,0 +1,843 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once +#include +#include + +#ifndef _MSC_VER +// macros in this header generate a bunch of +// "ISO C++11 requires at least one argument for the "..." in a variadic macro" errors. +// system_header pragma is the only mechanism that helps to suppress them. +// https://stackoverflow.com/questions/35587137/how-to-suppress-gcc-variadic-macro-argument-warning-for-zero-arguments-for-a-par +// TODO: try to make macros standard-compliant. +#pragma GCC system_header +#endif + +#ifndef __cplusplus +#define static_assert _Static_assert +#endif + +#define UNUSED(x) (void)(x) + +//------------------------------------------------------- +// Re-enabled ability to compile out all macros... +// However, currently still need to keep all macros until +// final review of all macros is complete. +//------------------------------------------------------- + +#define AZAC_CONFIG_TRACE_INCLUDE_DBG_WITH_ALL 1 + +#ifdef AZAC_CONFIG_TRACE_INCLUDE_DBG_WITH_ALL +#if defined(AZAC_CONFIG_TRACE_ALL) && !defined(AZAC_CONFIG_DBG_TRACE_ALL) && (!defined(DEBUG) || !defined(_DEBUG)) +#define AZAC_CONFIG_DBG_TRACE_ALL 1 +#endif +#endif + +//------------------------------------------------------- +// AZAC_ and AZAC_DBG_ macro configuration +//------------------------------------------------------- + +#ifdef AZAC_CONFIG_DBG_TRACE_ALL +#define AZAC_CONFIG_DBG_TRACE_VERBOSE 1 +#define AZAC_CONFIG_DBG_TRACE_INFO 1 +#define AZAC_CONFIG_DBG_TRACE_WARNING 1 +#define AZAC_CONFIG_DBG_TRACE_ERROR 1 +#define AZAC_CONFIG_DBG_TRACE_FUNCTION 1 +#define AZAC_CONFIG_DBG_TRACE_SCOPE 1 +#define AZAC_CONFIG_DBG_TRACE_ASSERT 1 +#define AZAC_CONFIG_DBG_TRACE_VERIFY 1 +#ifndef AZAC_CONFIG_TRACE_ALL +#define AZAC_CONFIG_TRACE_ALL 1 +#endif +#endif + +#ifdef AZAC_CONFIG_TRACE_ALL +#define AZAC_CONFIG_TRACE_VERBOSE 1 +#define AZAC_CONFIG_TRACE_INFO 1 +#define AZAC_CONFIG_TRACE_WARNING 1 +#define AZAC_CONFIG_TRACE_ERROR 1 +#define AZAC_CONFIG_TRACE_FUNCTION 1 +#define AZAC_CONFIG_TRACE_SCOPE 1 +#define AZAC_CONFIG_TRACE_THROW_ON_FAIL 1 +#define AZAC_CONFIG_TRACE_REPORT_ON_FAIL 1 +#define AZAC_CONFIG_TRACE_RETURN_ON_FAIL 1 +#define AZAC_CONFIG_TRACE_EXITFN_ON_FAIL 1 +#endif + +//----------------------------------------------------------- +// AZAC_TRACE macro common implementations +//----------------------------------------------------------- + +#define __AZAC_TRACE_LEVEL_INFO 0x08 // Trace_Info +#define __AZAC_TRACE_LEVEL_WARNING 0x04 // Trace_Warning +#define __AZAC_TRACE_LEVEL_ERROR 0x02 // Trace_Error +#define __AZAC_TRACE_LEVEL_VERBOSE 0x10 // Trace_Verbose + +#ifndef __AZAC_DO_TRACE_IMPL +#ifdef __cplusplus +#include +#include +#include +#include +inline void __azac_do_trace_message(int level, const char* pszTitle, const char* fileName, const int lineNumber, const char* pszFormat, ...) throw() +{ + UNUSED(level); + + bool logToConsole = false; +#if defined(DEBUG) || defined(_DEBUG) + logToConsole = true; +#endif + + if (!logToConsole) + { + return; + } + + try + { + va_list argptr; + va_start(argptr, pszFormat); + + std::string format; + while (*pszFormat == '\n' || *pszFormat == '\r') + { + if (*pszFormat == '\r') + { + pszTitle = nullptr; + } + + format += *pszFormat++; + } + + if (pszTitle != nullptr) + { + format += pszTitle; + } + + std::string fileNameOnly(fileName); + std::replace(fileNameOnly.begin(), fileNameOnly.end(), '\\', '/'); + + std::string fileNameLineNumber = " " + fileNameOnly.substr(fileNameOnly.find_last_of('/', std::string::npos) + 1) + ":" + std::to_string(lineNumber) + " "; + + format += fileNameLineNumber; + + format += pszFormat; + + if (format.length() < 1 || format[format.length() - 1] != '\n') + { + format += "\n"; + } + + vfprintf(stderr, format.c_str(), argptr); + + va_end(argptr); + } + catch(...) + { + } +} +#define __AZAC_DO_TRACE_IMPL __azac_do_trace_message +#else // __cplusplus +#define __AZAC_DO_TRACE_IMPL +#endif // __cplusplus +#endif + +#define __AZAC_DOTRACE(level, title, fileName, lineNumber, ...) \ + do { \ + __AZAC_DO_TRACE_IMPL(level, title, fileName, lineNumber, ##__VA_ARGS__); \ + } while (0) + +#define __AZAC_TRACE_INFO(title, fileName, lineNumber, msg, ...) __AZAC_DOTRACE(__AZAC_TRACE_LEVEL_INFO, title, fileName, lineNumber, msg, ##__VA_ARGS__) +#define __AZAC_TRACE_INFO_IF(cond, title, fileName, lineNumber, msg, ...) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + __AZAC_TRACE_INFO(title, fileName, lineNumber, msg, ##__VA_ARGS__); \ + } } while (0) + +#define __AZAC_TRACE_WARNING(title, fileName, lineNumber, msg, ...) __AZAC_DOTRACE(__AZAC_TRACE_LEVEL_WARNING, title, fileName, lineNumber, msg, ##__VA_ARGS__) +#define __AZAC_TRACE_WARNING_IF(cond, title, fileName, lineNumber, msg, ...) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + __AZAC_TRACE_WARNING(title, fileName, lineNumber, msg, ##__VA_ARGS__); \ + } } while (0) + +#define __AZAC_TRACE_ERROR(title, fileName, lineNumber, msg, ...) __AZAC_DOTRACE(__AZAC_TRACE_LEVEL_ERROR, title, fileName, lineNumber, msg, ##__VA_ARGS__) +#define __AZAC_TRACE_ERROR_IF(cond, title, fileName, lineNumber, msg, ...) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + __AZAC_TRACE_ERROR(title, fileName, lineNumber, msg, ##__VA_ARGS__); \ + } } while (0) + +#define __AZAC_TRACE_VERBOSE(title, fileName, lineNumber, msg, ...) __AZAC_DOTRACE(__AZAC_TRACE_LEVEL_VERBOSE, title, fileName, lineNumber, msg, ##__VA_ARGS__) +#define __AZAC_TRACE_VERBOSE_IF(cond, title, fileName, lineNumber, msg, ...) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + __AZAC_TRACE_VERBOSE(title, fileName, lineNumber, msg, ##__VA_ARGS__); \ + } } while (0) + +#define ___AZAC_EXPR_AS_STRING(_String) "" #_String +#define __AZAC_EXPR_AS_STRING(_String) ___AZAC_EXPR_AS_STRING(_String) + +#define __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x) __AZAC_TRACE_ERROR(title, fileName, lineNumber, __AZAC_EXPR_AS_STRING(hr) " = 0x%0" PRIxPTR, x) + +#define __AZAC_REPORT_ON_FAIL(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } } while (0) +#define __AZAC_REPORT_ON_FAIL_IFNOT(title, fileName, lineNumber, hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } } } while (0) + +#define __AZAC_T_RETURN_HR(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } \ + return x; \ + } while (0) +#define __AZAC_T_RETURN_HR_IF(title, fileName, lineNumber, hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } \ + return x; \ + } } while (0) +#define __AZAC_T_RETURN_ON_FAIL(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + return x; \ + } } while (0) +#define __AZAC_T_RETURN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + return x; \ + } } } while (0) +#define __AZAC_RETURN_HR(hr) return hr +#define __AZAC_RETURN_HR_IF(hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + return hr; \ + } } while (0) +#define __AZAC_RETURN_ON_FAIL(hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + return x; \ + } } while (0) +#define __AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + return x; \ + } } } while (0) + +#define __AZAC_T_EXITFN_HR(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } \ + goto AZAC_EXITFN_CLEANUP; \ + } while (0) +#define __AZAC_T_EXITFN_HR_IF(title, fileName, lineNumber, hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + } \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) +#define __AZAC_T_EXITFN_ON_FAIL(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) +#define __AZAC_T_EXITFN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + goto AZAC_EXITFN_CLEANUP; \ + } } } while (0) + +#define __AZAC_EXITFN_HR(hr) \ + do { \ + AZACHR x = hr; \ + goto AZAC_EXITFN_CLEANUP; \ + } while (0) +#define __AZAC_EXITFN_HR_IF(hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + AZACHR x = hr; \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) +#define __AZAC_EXITFN_ON_FAIL(hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) +#define __AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + goto AZAC_EXITFN_CLEANUP; \ + } } } while (0) + +#define __AZAC_TRACE_ASSERT(title, fileName, lineNumber, expr) __AZAC_TRACE_ERROR(title, fileName, lineNumber, __AZAC_EXPR_AS_STRING(expr) " = false") +#define __AZAC_TRACE_ASSERT_MSG(title, fileName, lineNumber, expr, ...) __AZAC_TRACE_ERROR(title, fileName, lineNumber, __AZAC_EXPR_AS_STRING(expr) " = false; " __VA_ARGS__) + +#define __AZAC_DBG_ASSERT(title, fileName, lineNumber, expr) \ + do { \ + int fCond = !!(expr); \ + if (!fCond) { \ + __AZAC_TRACE_ASSERT(title, fileName, lineNumber, expr); \ + abort(); \ + } } while (0) +#define __AZAC_DBG_ASSERT_WITH_MESSAGE(title, fileName, lineNumber, expr, ...) \ + do { \ + int fCond = !!(expr); \ + if (!fCond) { \ + __AZAC_TRACE_ASSERT_MSG(title, fileName, lineNumber, expr, ##__VA_ARGS__); \ + abort(); \ + } } while (0) + +#define __AZAC_DBG_VERIFY(title, fileName, lineNumber, expr) \ + do { \ + int fCond = !!(expr); \ + if (!fCond) { \ + __AZAC_TRACE_ASSERT(title, fileName, lineNumber, expr); \ + abort(); \ + } } while (0) +#define __AZAC_DBG_VERIFY_WITH_MESSAGE(title, fileName, lineNumber, expr, ...) \ + do { \ + int fCond = !!(expr); \ + if (!fCond) { \ + __AZAC_TRACE_ASSERT_MSG(title, fileName, lineNumber, expr, ##__VA_ARGS__); \ + abort(); \ + } } while (0) + +#ifdef __cplusplus + +#include +#define __AZAC_TRACE_SCOPE(t1, fileName, lineNumber, t2, x, y) \ + __AZAC_TRACE_INFO(t1, fileName, lineNumber, "%s", x); \ + auto evaluateYInScopeInMacros##lineNumber = y; \ + auto leavingScopePrinterInMacros##lineNumber = [&evaluateYInScopeInMacros##lineNumber](int*) -> void { \ + __AZAC_TRACE_INFO(t2, fileName, lineNumber, "%s", evaluateYInScopeInMacros##lineNumber); \ + }; \ + std::unique_ptr onExit##lineNumber((int*)1, leavingScopePrinterInMacros##lineNumber) + +#ifndef __AZAC_THROW_HR_IMPL +#define __AZAC_THROW_HR_IMPL(hr) __azac_rethrow(hr) +#endif +#ifndef __AZAC_THROW_HR +#define __AZAC_THROW_HR(hr) __AZAC_THROW_HR_IMPL(hr) +#endif + +#ifndef __AZAC_LOG_HR_IMPL +#define __AZAC_LOG_HR_IMPL(hr) __azac_log_only(hr) +#endif +#ifndef __AZAC_LOG_HR +#define __AZAC_LOG_HR(hr) __AZAC_LOG_HR_IMPL(hr) +#endif + +#define __AZAC_T_LOG_ON_FAIL(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + __AZAC_LOG_HR(x); \ + } } while (0) +#define __AZAC_T_THROW_ON_FAIL(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + __AZAC_THROW_HR(x); \ + } } while (0) +#define __AZAC_T_THROW_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + __AZAC_THROW_HR(x); \ + } } } while (0) +#define __AZAC_T_THROW_HR_IF(title, fileName, lineNumber, hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + AZACHR x = hr; \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + __AZAC_THROW_HR(x); \ + } } while (0) +#define __AZAC_T_THROW_HR(title, fileName, lineNumber, hr) \ + do { \ + AZACHR x = hr; \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x); \ + __AZAC_THROW_HR(x); \ + } while (0) + + +#define __AZAC_LOG_ON_FAIL(hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_LOG_HR(x); \ + } } while (0) +#define __AZAC_THROW_ON_FAIL(hr) \ + do { \ + AZACHR x = hr; \ + if (AZAC_FAILED(x)) { \ + __AZAC_THROW_HR(x); \ + } } while (0) +#define __AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) \ + do { \ + AZACHR x = hr; \ + if (x != hrNot) { \ + if (AZAC_FAILED(x)) { \ + __AZAC_THROW_HR(x); \ + } } } while (0) +#define __AZAC_THROW_HR_IF(hr, cond) \ + do { \ + int fCond = !!(cond); \ + if (fCond) { \ + AZACHR x = hr; \ + __AZAC_THROW_HR(x); \ + } } while (0) + +#endif // __cplusplus + + + +//------------------------------------------------------- +// AZAC_ macro definitions +//------------------------------------------------------- + +#ifdef AZAC_CONFIG_TRACE_VERBOSE +#define AZAC_TRACE_VERBOSE(msg, ...) __AZAC_TRACE_VERBOSE("AZAC_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_TRACE_VERBOSE_IF(cond, msg, ...) __AZAC_TRACE_VERBOSE_IF(cond, "AZAC_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_TRACE_VERBOSE(...) +#define AZAC_TRACE_VERBOSE_IF(...) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_VERBOSE +#define AZAC_DBG_TRACE_VERBOSE(msg, ...) __AZAC_TRACE_VERBOSE("AZAC_DBG_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_DBG_TRACE_VERBOSE_IF(cond, msg, ...) __AZAC_TRACE_VERBOSE_IF(cond, "AZAC_DBG_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_DBG_TRACE_VERBOSE(...) +#define AZAC_DBG_TRACE_VERBOSE_IF(...) +#endif + +#ifdef AZAC_CONFIG_TRACE_INFO +#define AZAC_TRACE_INFO(msg, ...) __AZAC_TRACE_INFO("AZAC_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_TRACE_INFO_IF(cond, msg, ...) __AZAC_TRACE_INFO_IF(cond, "AZAC_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_TRACE_INFO(...) +#define AZAC_TRACE_INFO_IF(...) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_INFO +#define AZAC_DBG_TRACE_INFO(msg, ...) __AZAC_TRACE_INFO("AZAC_DBG_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_DBG_TRACE_INFO_IF(cond, msg, ...) __AZAC_TRACE_INFO_IF(cond, "AZAC_DBG_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_DBG_TRACE_INFO(...) +#define AZAC_DBG_TRACE_INFO_IF(...) +#endif + +#ifdef AZAC_CONFIG_TRACE_WARNING +#define AZAC_TRACE_WARNING(msg, ...) __AZAC_TRACE_WARNING("AZAC_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_TRACE_WARNING_IF(cond, msg, ...) __AZAC_TRACE_WARNING_IF(cond, "AZAC_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_TRACE_WARNING(...) +#define AZAC_TRACE_WARNING_IF(...) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_WARNING +#define AZAC_DBG_TRACE_WARNING(msg, ...) __AZAC_TRACE_WARNING("AZAC_DBG_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_DBG_TRACE_WARNING_IF(cond, msg, ...) __AZAC_TRACE_WARNING_IF(cond, "AZAC_DBG_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_DBG_TRACE_WARNING(...) +#define AZAC_DBG_TRACE_WARNING_IF(...) +#endif + +#ifdef AZAC_CONFIG_TRACE_ERROR +#define AZAC_TRACE_ERROR(msg, ...) __AZAC_TRACE_ERROR("AZAC_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_TRACE_ERROR_IF(cond, msg, ...) __AZAC_TRACE_ERROR_IF(cond, "AZAC_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_TRACE_ERROR(...) +#define AZAC_TRACE_ERROR_IF(...) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_ERROR +#define AZAC_DBG_TRACE_ERROR(msg, ...) __AZAC_TRACE_ERROR("AZAC_DBG_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define AZAC_DBG_TRACE_ERROR_IF(cond, msg, ...) __AZAC_TRACE_ERROR_IF(cond, "AZAC_DBG_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define AZAC_DBG_TRACE_ERROR(...) +#define AZAC_DBG_TRACE_ERROR_IF(...) +#endif + +#ifdef AZAC_CONFIG_TRACE_FUNCTION +#define AZAC_TRACE_FUNCTION(...) __AZAC_TRACE_VERBOSE("AZAC_TRACE_FUNCTION: ", __FILE__, __LINE__, __FUNCTION__) +#else +#define AZAC_TRACE_FUNCTION(...) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_FUNCTION +#define AZAC_DBG_TRACE_FUNCTION(...) __AZAC_TRACE_VERBOSE("AZAC_DBG_TRACE_FUNCTION: ", __FILE__, __LINE__, __FUNCTION__) +#else +#define AZAC_DBG_TRACE_FUNCTION(...) +#endif + +#ifdef AZAC_CONFIG_TRACE_REPORT_ON_FAIL +#define AZAC_REPORT_ON_FAIL(hr) __AZAC_REPORT_ON_FAIL("AZAC_REPORT_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_REPORT_ON_FAIL_IFNOT(hr, hrNot) __AZAC_REPORT_ON_FAIL_IFNOT("AZAC_REPORT_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define AZAC_REPORT_ON_FAIL(hr) UNUSED(hr) +#define AZAC_REPORT_ON_FAIL_IFNOT(hr, hrNot) UNUSED(hr); UNUSED(hrNot) +#endif + +#ifdef AZAC_CONFIG_TRACE_RETURN_ON_FAIL +#define AZAC_RETURN_HR(hr) __AZAC_T_RETURN_HR("AZAC_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_RETURN_HR_IF(hr, cond) __AZAC_T_RETURN_HR_IF("AZAC_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr, cond) +#define AZAC_RETURN_ON_FAIL(hr) __AZAC_T_RETURN_ON_FAIL("AZAC_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_T_RETURN_ON_FAIL_IF_NOT("AZAC_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define AZAC_RETURN_HR(hr) __AZAC_RETURN_HR(hr) +#define AZAC_RETURN_HR_IF(hr, cond) __AZAC_RETURN_HR_IF(hr, cond) +#define AZAC_RETURN_ON_FAIL(hr) __AZAC_RETURN_ON_FAIL(hr) +#define AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) +#endif + +#define AZAC_IFTRUE_RETURN_HR(cond, hr) AZAC_RETURN_HR_IF(hr, cond) +#define AZAC_IFFALSE_RETURN_HR(cond, hr) AZAC_RETURN_HR_IF(hr, !(cond)) +#define AZAC_IFFAILED_RETURN_HR(hr) AZAC_RETURN_ON_FAIL(hr) +#define AZAC_IFFAILED_RETURN_HR_IFNOT(hr, hrNot) AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) + +#ifdef AZAC_CONFIG_TRACE_EXITFN_ON_FAIL +#define AZAC_EXITFN_HR(hr) __AZAC_T_EXITFN_HR("AZAC_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_EXITFN_HR_IF(hr, cond) __AZAC_T_EXITFN_HR_IF("AZAC_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr, cond) +#define AZAC_EXITFN_ON_FAIL(hr) __AZAC_T_EXITFN_ON_FAIL("AZAC_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_EXITFN_ON_FAIL_IF_NOT("AZAC_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define AZAC_EXITFN_HR(hr) __AZAC_EXITFN_HR(hr) +#define AZAC_EXITFN_HR_IF(hr, cond) __AZAC_EXITFN_HR_IF(hr, cond) +#define AZAC_EXITFN_ON_FAIL(hr) __AZAC_EXITFN_ON_FAIL(hr) +#define AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) +#endif + +#define AZAC_IFTRUE_EXITFN_WHR(cond, hr) AZAC_EXITFN_HR_IF(hr, cond) +#define AZAC_IFFALSE_EXITFN_WHR(cond, hr) AZAC_EXITFN_HR_IF(hr, !(cond)) +#define AZAC_IFFAILED_EXITFN_WHR(hr) AZAC_EXITFN_ON_FAIL(hr) +#define AZAC_IFFAILED_EXITFN_WHR_IFNOT(hr, hrNot) AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) + +#define AZAC_IFTRUE_EXITFN_CLEANUP(cond, expr) \ + do { \ + int fCondT = !!(cond); \ + if (fCondT) { \ + expr; \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) + +#define AZAC_IFFALSE_EXITFN_CLEANUP(cond, expr) \ + do { \ + int fCondF = !!(cond); \ + if (!fCondF) { \ + expr; \ + goto AZAC_EXITFN_CLEANUP; \ + } } while (0) + +#if defined(AZAC_CONFIG_DBG_TRACE_ASSERT) && (defined(DEBUG) || defined(_DEBUG)) +#define AZAC_DBG_ASSERT(expr) __AZAC_DBG_ASSERT("AZAC_ASSERT: ", __FILE__, __LINE__, expr) +#define AZAC_DBG_ASSERT_WITH_MESSAGE(expr, ...) __AZAC_DBG_ASSERT_WITH_MESSAGE("AZAC_ASSERT: ", __FILE__, __LINE__, expr, ##__VA_ARGS__) +#else +#define AZAC_DBG_ASSERT(expr) +#define AZAC_DBG_ASSERT_WITH_MESSAGE(expr, ...) +#endif + +#if defined(AZAC_CONFIG_DBG_TRACE_VERIFY) && (defined(DEBUG) || defined(_DEBUG)) +#define AZAC_DBG_VERIFY(expr) __AZAC_DBG_VERIFY("AZAC_VERIFY: ", __FILE__, __LINE__, expr) +#define AZAC_DBG_VERIFY_WITH_MESSAGE(expr, ...) __AZAC_DBG_VERIFY_WITH_MESSAGE("AZAC_VERIFY: ", __FILE__, __LINE__, expr, ##__VA_ARGS__) +#else +#define AZAC_DBG_VERIFY(expr) (expr) +#define AZAC_DBG_VERIFY_WITH_MESSAGE(expr, ...) (expr) +#endif + +#define AZAC_IFTRUE(cond, expr) \ + do { \ + int fCondT = !!(cond); \ + if (fCondT) { \ + expr; \ + } } while (0) + +#define AZAC_IFFALSE(cond, expr) \ + do { \ + int fCondF = !!(cond); \ + if (!fCondF) { \ + expr; \ + } } while (0) + +// handle circular dependency +#ifndef AZAC_SUPPRESS_COMMON_INCLUDE_FROM_DEBUG +#define AZAC_SUPPRESS_DEBUG_INCLUDE_FROM_COMMON +#include +#undef AZAC_SUPPRESS_DEBUG_INCLUDE_FROM_COMMON +#endif + +#ifdef __cplusplus + +#ifdef AZAC_CONFIG_TRACE_SCOPE +#define AZAC_TRACE_SCOPE(x, y) __AZAC_TRACE_SCOPE("AZAC_TRACE_SCOPE_ENTER: ", __FILE__, __LINE__, "AZAC_TRACE_SCOPE_EXIT: ", x, y) +#else +#define AZAC_TRACE_SCOPE(x, y) +#endif + +#ifdef AZAC_CONFIG_DBG_TRACE_SCOPE +#define AZAC_DBG_TRACE_SCOPE(x, y) __AZAC_TRACE_SCOPE("AZAC_DBG_TRACE_SCOPE_ENTER: ", __FILE__, __LINE__, "AZAC_DBG_TRACE_SCOPE_EXIT: ", x, y) +#else +#define AZAC_DBG_TRACE_SCOPE(x, y) +#endif + +#ifdef AZAC_CONFIG_TRACE_THROW_ON_FAIL +#define AZAC_THROW_ON_FAIL(hr) __AZAC_T_THROW_ON_FAIL("AZAC_THROW_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_T_THROW_ON_FAIL_IF_NOT("AZAC_THROW_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#define AZAC_LOG_ON_FAIL(hr) __AZAC_T_LOG_ON_FAIL("AZAC_LOG_ON_FAIL: ", __FILE__, __LINE__, hr) +#define AZAC_THROW_HR_IF(hr, cond) __AZAC_T_THROW_HR_IF("AZAC_THROW_HR_IF: ", __FILE__, __LINE__, hr, cond) +#define AZAC_THROW_HR(hr) __AZAC_T_THROW_HR("AZAC_THROW_HR: ", __FILE__, __LINE__, hr) +#else +#define AZAC_THROW_ON_FAIL(hr) __AZAC_THROW_ON_FAIL(hr) +#define AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) __AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) +#define AZAC_LOG_ON_FAIL(hr) __AZAC_LOG_ON_FAIL(hr) +#define AZAC_THROW_HR_IF(hr, cond) __AZAC_THROW_HR_IF(hr, cond) +#define AZAC_THROW_HR(hr) __AZAC_THROW_HR(hr) +#endif + +#define AZAC_IFTRUE_THROW_HR(cond, hr) AZAC_THROW_HR_IF(hr, cond) +#define AZAC_IFFALSE_THROW_HR(cond, hr) AZAC_THROW_HR_IF(hr, !(cond)) +#define AZAC_IFFAILED_THROW_HR(hr) AZAC_THROW_ON_FAIL(hr) +#define AZAC_IFFAILED_THROW_HR_IFNOT(hr, hrNot) AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) + +#include +#include +#include +#include + +inline void __azac_handle_native_ex(AZACHR hr, bool throwException) +{ + AZAC_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + auto handle = reinterpret_cast(hr); + auto error = error_get_error_code(handle); + if (error == AZAC_ERR_NONE) + { + if (throwException) + { + throw hr; + } + else + { + // do nothing. This is already logged by the macros that call this function + return; + } + } + + std::string errorMsg; + try + { + auto callstack = error_get_call_stack(handle); + auto what = error_get_message(handle); + + if (what) + { + errorMsg += what; + } + else + { + errorMsg += "Exception with error code: "; + errorMsg += std::to_string(error); + } + + if (callstack) + { + errorMsg += callstack; + } + } + catch (...) + { + error_release(handle); + throw hr; + } + + error_release(handle); + if (throwException) + { + throw std::runtime_error(errorMsg); + } + else + { + AZAC_TRACE_ERROR("Error details: %s", errorMsg.c_str()); + } +} + +inline void __azac_log_only(AZACHR hr) +{ + __azac_handle_native_ex(hr, false); +} + +inline void __azac_rethrow(AZACHR hr) +{ + __azac_handle_native_ex(hr, true); +} + +#else // __cplusplus + +#define AZAC_TRACE_SCOPE(x, y) static_assert(false) +#define AZAC_DBG_TRACE_SCOPE(x, y) static_assert(false) +#define AZAC_LOG_ON_FAIL(hr) static_assert(false) +#define AZAC_THROW_ON_FAIL(hr) static_assert(false) +#define AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) static_assert(false) +#define AZAC_THROW_HR_IF(hr, cond) static_assert(false) +#define AZAC_THROW_HR(hr) static_assert(false) +#define AZAC_IFTRUE_THROW_HR(cond, hr) static_assert(false) +#define AZAC_IFFALSE_THROW_HR(cond, hr) static_assert(false) +#define AZAC_IFFAILED_THROW_HR(hr) static_assert(false) +#define AZAC_IFFAILED_THROW_HR_IFNOT(hr, hrNot) static_assert(false) + +#endif // __cplusplus + +//--------------------------------------------------------------------------- + +#ifdef __AZAC_DEBUG_H_EXAMPLES_IN_MAIN + +void main() +{ + int x = 4; + printf("%s = %d\n", __AZAC_EXPR_AS_STRING(x + 3), x + 3); + + AZAC_TRACE_INFO("hello there"); + AZAC_TRACE_ERROR("hello there"); + AZAC_TRACE_WARNING("hello there"); + AZAC_TRACE_VERBOSE("hello there"); + + AZAC_TRACE_INFO("hello there %d", 5); + AZAC_TRACE_ERROR("hello there %d", 5); + AZAC_TRACE_WARNING("hello there %d", 5); + AZAC_TRACE_VERBOSE("hello there %d", 5); + + AZAC_TRACE_INFO_IF(false, "hello there false"); + AZAC_TRACE_ERROR_IF(false, "hello there false"); + AZAC_TRACE_WARNING_IF(false, "hello there false"); + AZAC_TRACE_VERBOSE_IF(false, "hello there false"); + + AZAC_TRACE_INFO_IF(false, "hello there false %d", 5); + AZAC_TRACE_ERROR_IF(false, "hello there false %d", 5); + AZAC_TRACE_WARNING_IF(false, "hello there false %d", 5); + AZAC_TRACE_VERBOSE_IF(false, "hello there false %d", 5); + + AZAC_TRACE_INFO_IF(true, "hello there true"); + AZAC_TRACE_ERROR_IF(true, "hello there true"); + AZAC_TRACE_WARNING_IF(true, "hello there true"); + AZAC_TRACE_VERBOSE_IF(true, "hello there true"); + + AZAC_TRACE_INFO_IF(true, "hello there true %d", 5); + AZAC_TRACE_ERROR_IF(true, "hello there true %d", 5); + AZAC_TRACE_WARNING_IF(true, "hello there true %d", 5); + AZAC_TRACE_VERBOSE_IF(true, "hello there true %d", 5); + + AZAC_DBG_TRACE_INFO("hello there"); + AZAC_DBG_TRACE_ERROR("hello there"); + AZAC_DBG_TRACE_WARNING("hello there"); + AZAC_DBG_TRACE_VERBOSE("hello there"); + + AZAC_DBG_TRACE_INFO("hello there %d", 5); + AZAC_DBG_TRACE_ERROR("hello there %d", 5); + AZAC_DBG_TRACE_WARNING("hello there %d", 5); + AZAC_DBG_TRACE_VERBOSE("hello there %d", 5); + + AZAC_DBG_TRACE_INFO_IF(false, "hello there false"); + AZAC_DBG_TRACE_ERROR_IF(false, "hello there false"); + AZAC_DBG_TRACE_WARNING_IF(false, "hello there false"); + AZAC_DBG_TRACE_VERBOSE_IF(false, "hello there false"); + + AZAC_DBG_TRACE_INFO_IF(false, "hello there false %d", 5); + AZAC_DBG_TRACE_ERROR_IF(false, "hello there false %d", 5); + AZAC_DBG_TRACE_WARNING_IF(false, "hello there false %d", 5); + AZAC_DBG_TRACE_VERBOSE_IF(false, "hello there false %d", 5); + + AZAC_DBG_TRACE_INFO_IF(true, "hello there true"); + AZAC_DBG_TRACE_ERROR_IF(true, "hello there true"); + AZAC_DBG_TRACE_WARNING_IF(true, "hello there true"); + AZAC_DBG_TRACE_VERBOSE_IF(true, "hello there true"); + + AZAC_DBG_TRACE_INFO_IF(true, "hello there true %d", 5); + AZAC_DBG_TRACE_ERROR_IF(true, "hello there true %d", 5); + AZAC_DBG_TRACE_WARNING_IF(true, "hello there true %d", 5); + AZAC_DBG_TRACE_VERBOSE_IF(true, "hello there true %d", 5); + + AZAC_TRACE_SCOPE("A", "B"); + + AZAC_TRACE_FUNCTION(); + AZAC_DBG_TRACE_FUNCTION(); + + AZAC_DBG_ASSERT(false); + AZAC_DBG_ASSERT(true); + + AZAC_DBG_ASSERT_WITH_MESSAGE(false, "HEY!"); + AZAC_DBG_ASSERT_WITH_MESSAGE(true, "HEY!!"); + + AZAC_DBG_VERIFY(false); + AZAC_DBG_VERIFY(true); + + AZAC_DBG_VERIFY_WITH_MESSAGE(false, "HEY!"); + AZAC_DBG_VERIFY_WITH_MESSAGE(true, "HEY!!"); + + AZACHR hr1 { 0x80001111 }; + AZACHR hr2 { 0x00001111 }; + + AZAC_TRACE_VERBOSE("Testing out AZAC_REPORT_ON_FAIL, should see two failures..."); + AZAC_REPORT_ON_FAIL(hr1); + AZAC_REPORT_ON_FAIL_IFNOT(hr1, 0x80001000); + AZAC_TRACE_VERBOSE("Testing out AZAC_REPORT_ON_FAIL, should see two failures... Done!"); + + AZAC_TRACE_VERBOSE("Testing out AZAC_REPORT_ON_FAIL, should see zero failures..."); + AZAC_REPORT_ON_FAIL(hr2); + AZAC_REPORT_ON_FAIL_IFNOT(hr1, 0x80001111); + AZAC_REPORT_ON_FAIL_IFNOT(hr2, 0x80001111); + AZAC_REPORT_ON_FAIL_IFNOT(hr2, 0x80001000); + AZAC_TRACE_VERBOSE("Testing out AZAC_REPORT_ON_FAIL, should see zero failures... Done!"); +} + +#endif diff --git a/third_party/azure_speech_sdk/include/c_api/azac_error.h b/third_party/azure_speech_sdk/include/c_api/azac_error.h new file mode 100644 index 0000000..4e8ac19 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/azac_error.h @@ -0,0 +1,458 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. + +#pragma once + +#include + +/// +/// Type definition for Azure AI Core result codes. +/// +typedef uintptr_t AZACHR; + +/// +/// Default result code indicating no error. +/// +#define AZAC_ERR_NONE 0 + +/// +/// Declare and initialize result code variable. +/// +#define AZAC_INIT_HR(hr) AZACHR hr = AZAC_ERR_NONE; \ + (void)(hr) + +/// +/// Check if result code indicates success. +/// +#define AZAC_SUCCEEDED(x) ((x) == AZAC_ERR_NONE) + +/// +/// Check if result code indicates error. +/// +#define AZAC_FAILED(x) (!AZAC_SUCCEEDED(x)) + +/// +/// Base macros for all error codes. +/// +#define __AZAC_ERRCODE_FAILED(x) (x) + +/// +/// The function is not implemented. +/// +#define AZAC_ERR_NOT_IMPL __AZAC_ERRCODE_FAILED(0xfff) + +/// +/// The object has not been properly initialized. +/// +#define AZAC_ERR_UNINITIALIZED __AZAC_ERRCODE_FAILED(0x001) + +/// +/// The object has already been initialized. +/// +#define AZAC_ERR_ALREADY_INITIALIZED __AZAC_ERRCODE_FAILED(0x002) + +/// +/// An unhandled exception was detected. +/// +#define AZAC_ERR_UNHANDLED_EXCEPTION __AZAC_ERRCODE_FAILED(0x003) + +/// +/// The object or property was not found. +/// +#define AZAC_ERR_NOT_FOUND __AZAC_ERRCODE_FAILED(0x004) + +/// +/// One or more arguments are not valid. +/// +#define AZAC_ERR_INVALID_ARG __AZAC_ERRCODE_FAILED(0x005) + +/// +/// The specified timeout value has elapsed. +/// +#define AZAC_ERR_TIMEOUT __AZAC_ERRCODE_FAILED(0x006) + +/// +/// The asynchronous operation is already in progress. +/// +#define AZAC_ERR_ALREADY_IN_PROGRESS __AZAC_ERRCODE_FAILED(0x007) + +/// +/// The attempt to open the file failed. +/// +#define AZAC_ERR_FILE_OPEN_FAILED __AZAC_ERRCODE_FAILED(0x008) + +/// +/// The end of the file was reached unexpectedly. +/// +#define AZAC_ERR_UNEXPECTED_EOF __AZAC_ERRCODE_FAILED(0x009) + +/// +/// Invalid audio header encountered. +/// +#define AZAC_ERR_INVALID_HEADER __AZAC_ERRCODE_FAILED(0x00a) + +/// +/// The requested operation cannot be performed while audio is pumping +/// +#define AZAC_ERR_AUDIO_IS_PUMPING __AZAC_ERRCODE_FAILED(0x00b) + +/// +/// Unsupported audio format. +/// +#define AZAC_ERR_UNSUPPORTED_FORMAT __AZAC_ERRCODE_FAILED(0x00c) + +/// +/// Operation aborted. +/// +#define AZAC_ERR_ABORT __AZAC_ERRCODE_FAILED(0x00d) + +/// +/// Microphone is not available. +/// +#define AZAC_ERR_MIC_NOT_AVAILABLE __AZAC_ERRCODE_FAILED(0x00e) + +/// +/// An invalid state was encountered. +/// +#define AZAC_ERR_INVALID_STATE __AZAC_ERRCODE_FAILED(0x00f) + +/// +/// Attempting to create a UUID failed. +/// +#define AZAC_ERR_UUID_CREATE_FAILED __AZAC_ERRCODE_FAILED(0x010) + +/// +/// An unexpected session state transition was encountered when setting the session audio format. +/// +/// +/// Valid transitions are: +/// * WaitForPumpSetFormatStart --> ProcessingAudio (at the beginning of stream) +/// * StoppingPump --> WaitForAdapterCompletedSetFormatStop (at the end of stream) +/// * ProcessingAudio --> WaitForAdapterCompletedSetFormatStop (when the stream runs out of data) +/// All other state transitions are invalid. +/// +#define AZAC_ERR_SETFORMAT_UNEXPECTED_STATE_TRANSITION __AZAC_ERRCODE_FAILED(0x011) + +/// +/// An unexpected session state was encountered in while processing audio. +/// +/// +/// Valid states to encounter are: +/// * ProcessingAudio: We're allowed to process audio while in this state. +/// * StoppingPump: We're allowed to be called to process audio, but we'll ignore the data passed in while we're attempting to stop the pump. +/// All other states are invalid while processing audio. +/// +#define AZAC_ERR_PROCESS_AUDIO_INVALID_STATE __AZAC_ERRCODE_FAILED(0x012) + +/// +/// An unexpected state transition was encountered while attempting to start recognizing. +/// +/// +/// A valid transition is: +/// * Idle --> WaitForPumpSetFormatStart +/// All other state transitions are invalid when attempting to start recognizing +/// +#define AZAC_ERR_START_RECOGNIZING_INVALID_STATE_TRANSITION __AZAC_ERRCODE_FAILED(0x013) + +/// +/// An unexpected error was encountered when trying to create an internal object. +/// +#define AZAC_ERR_UNEXPECTED_CREATE_OBJECT_FAILURE __AZAC_ERRCODE_FAILED(0x014) + +/// +/// An error in the audio-capturing system. +/// +#define AZAC_ERR_MIC_ERROR __AZAC_ERRCODE_FAILED(0x015) + +/// +/// The requested operation cannot be performed; there is no audio input. +/// +#define AZAC_ERR_NO_AUDIO_INPUT __AZAC_ERRCODE_FAILED(0x016) + +/// +/// An unexpected error was encountered when trying to access the USP site. +/// +#define AZAC_ERR_UNEXPECTED_USP_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x017) + +/// +/// An unexpected error was encountered when trying to access the LU site. +/// +#define AZAC_ERR_UNEXPECTED_LU_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x018) + +/// +/// The buffer is too small. +/// +#define AZAC_ERR_BUFFER_TOO_SMALL __AZAC_ERRCODE_FAILED(0x019) + +/// +/// A method failed to allocate memory. +/// +#define AZAC_ERR_OUT_OF_MEMORY __AZAC_ERRCODE_FAILED(0x01A) + +/// +/// An unexpected runtime error occurred. +/// +#define AZAC_ERR_RUNTIME_ERROR __AZAC_ERRCODE_FAILED(0x01B) + +/// +/// The url specified is invalid. +/// +#define AZAC_ERR_INVALID_URL __AZAC_ERRCODE_FAILED(0x01C) + +/// +/// The region specified is invalid or missing. +/// +#define AZAC_ERR_INVALID_REGION __AZAC_ERRCODE_FAILED(0x01D) + +/// +/// Switch between single shot and continuous recognition is not supported. +/// +#define AZAC_ERR_SWITCH_MODE_NOT_ALLOWED __AZAC_ERRCODE_FAILED(0x01E) + +/// +/// Changing connection status is not supported in the current recognition state. +/// +#define AZAC_ERR_CHANGE_CONNECTION_STATUS_NOT_ALLOWED __AZAC_ERRCODE_FAILED(0x01F) + +/// +/// Explicit connection management is not supported by the specified recognizer. +/// +#define AZAC_ERR_EXPLICIT_CONNECTION_NOT_SUPPORTED_BY_RECOGNIZER __AZAC_ERRCODE_FAILED(0x020) + +/// +/// The handle is invalid. +/// +#define AZAC_ERR_INVALID_HANDLE __AZAC_ERRCODE_FAILED(0x021) + +/// +/// The recognizer is invalid. +/// +#define AZAC_ERR_INVALID_RECOGNIZER __AZAC_ERRCODE_FAILED(0x022) + +/// +/// The value is out of range. +/// Added in version 1.3.0. +/// +#define AZAC_ERR_OUT_OF_RANGE __AZAC_ERRCODE_FAILED(0x023) + +/// +/// Extension library not found. +/// Added in version 1.3.0. +/// +#define AZAC_ERR_EXTENSION_LIBRARY_NOT_FOUND __AZAC_ERRCODE_FAILED(0x024) + +/// +/// An unexpected error was encountered when trying to access the TTS engine site. +/// Added in version 1.4.0. +/// +#define AZAC_ERR_UNEXPECTED_TTS_ENGINE_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x025) + +/// +/// An unexpected error was encountered when trying to access the audio output stream. +/// Added in version 1.4.0. +/// +#define AZAC_ERR_UNEXPECTED_AUDIO_OUTPUT_FAILURE __AZAC_ERRCODE_FAILED(0x026) + +/// +/// Gstreamer internal error. +/// Added in version 1.4.0. +/// +#define AZAC_ERR_GSTREAMER_INTERNAL_ERROR __AZAC_ERRCODE_FAILED(0x027) + +/// +/// Compressed container format not supported. +/// Added in version 1.4.0. +/// +#define AZAC_ERR_CONTAINER_FORMAT_NOT_SUPPORTED_ERROR __AZAC_ERRCODE_FAILED(0x028) + +/// +/// Codec extension or gstreamer not found. +/// Added in version 1.4.0. +/// +#define AZAC_ERR_GSTREAMER_NOT_FOUND_ERROR __AZAC_ERRCODE_FAILED(0x029) + +/// +/// The language specified is missing. +/// Added in version 1.5.0. +/// +#define AZAC_ERR_INVALID_LANGUAGE __AZAC_ERRCODE_FAILED(0x02A) + +/// +/// The API is not applicable. +/// Added in version 1.5.0. +/// +#define AZAC_ERR_UNSUPPORTED_API_ERROR __AZAC_ERRCODE_FAILED(0x02B) + +/// +/// The ring buffer is unavailable. +/// Added in version 1.8.0. +/// +#define AZAC_ERR_RINGBUFFER_DATA_UNAVAILABLE __AZAC_ERRCODE_FAILED(0x02C) + +/// +/// An unexpected error was encountered when trying to access the Conversation site. +/// Added in version 1.5.0. +/// +#define AZAC_ERR_UNEXPECTED_CONVERSATION_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x030) + +/// +/// An unexpected error was encountered when trying to access the Conversation site. +/// Added in version 1.8.0. +/// +#define AZAC_ERR_UNEXPECTED_CONVERSATION_TRANSLATOR_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x031) + +/// +/// An asynchronous operation was canceled before it was executed. +/// Added in version 1.8.0. +/// +#define AZAC_ERR_CANCELED __AZAC_ERRCODE_FAILED(0x032) + +/// +/// Codec for compression could not be initialized. +/// Added in version 1.10.0. +/// +#define AZAC_ERR_COMPRESS_AUDIO_CODEC_INITIFAILED __AZAC_ERRCODE_FAILED(0x033) + +/// +/// Data not available. +/// Added in version 1.10.0. +/// +#define AZAC_ERR_DATA_NOT_AVAILABLE __AZAC_ERRCODE_FAILED(0x034) + +/// +/// Invalid result reason. +/// Added in version 1.12.0 +/// +#define AZAC_ERR_INVALID_RESULT_REASON __AZAC_ERRCODE_FAILED(0x035) + +/// +/// An unexpected error was encountered when trying to access the RNN-T site. +/// +#define AZAC_ERR_UNEXPECTED_RNNT_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x036) + +/// +/// Sending of a network message failed. +/// +#define AZAC_ERR_NETWORK_SEND_FAILED __AZAC_ERRCODE_FAILED(0x037) + +/// +/// Audio extension library not found. +/// Added in version 1.16.0. +/// +#define AZAC_ERR_AUDIO_SYS_LIBRARY_NOT_FOUND __AZAC_ERRCODE_FAILED(0x038) + +/// +/// An error in the audio-rendering system. +/// Added in version 1.20.0 +/// +#define AZAC_ERR_LOUDSPEAKER_ERROR __AZAC_ERRCODE_FAILED(0x039) + +/// +/// An unexpected error was encountered when trying to access the Vision site. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_VISION_SITE_FAILURE __AZAC_ERRCODE_FAILED(0x050) + +/// +/// Stream number provided was invalid in the current context. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_MEDIA_INVALID_STREAM __AZAC_ERRCODE_FAILED(0x060) + +/// +/// Offset required is invalid in the current context. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_MEDIA_INVALID_OFFSET __AZAC_ERRCODE_FAILED(0x061) + +/// +/// No more data is available in source. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_MEDIA_NO_MORE_DATA __AZAC_ERRCODE_FAILED(0x062) + +/// +/// Source has not been started. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_MEDIA_NOT_STARTED __AZAC_ERRCODE_FAILED(0x063) + +/// +/// Source has already been started. +/// Added in version 1.15.0. +/// +#define AZAC_ERR_MEDIA_ALREADY_STARTED __AZAC_ERRCODE_FAILED(0x064) + +/// +/// Media device creation failed. +/// Added in version 1.18.0. +/// +#define AZAC_ERR_MEDIA_DEVICE_CREATION_FAILED __AZAC_ERRCODE_FAILED(0x065) + +/// +/// No devices of the selected category are available. +/// Added in version 1.18.0. +/// +#define AZAC_ERR_MEDIA_NO_DEVICE_AVAILABLE __AZAC_ERRCODE_FAILED(0x066) + +/// +/// Enabled Voice Activity Detection while using keyword recognition is not allowed. +/// +#define AZAC_ERR_VAD_CANNOT_BE_USED_WITH_KEYWORD_RECOGNIZER __AZAC_ERRCODE_FAILED(0x067) + +/// +/// The specified RecoEngineAdapter could not be created. +/// +#define AZAC_ERR_COULD_NOT_CREATE_ENGINE_ADAPTER __AZAC_ERRCODE_FAILED(0x070) + +/// +/// The input file has a size of 0 bytes. +/// +#define AZAC_ERR_INPUT_FILE_SIZE_IS_ZERO_BYTES __AZAC_ERRCODE_FAILED(0x072) + +/// +/// Cannot open the input media file for reading. Does it exist? +/// +#define AZAC_ERR_FAILED_TO_OPEN_INPUT_FILE_FOR_READING __AZAC_ERRCODE_FAILED(0x073) + +/// +/// Failed to read from the input media file. +/// +#define AZAC_ERR_FAILED_TO_READ_FROM_INPUT_FILE __AZAC_ERRCODE_FAILED(0x074) + +/// +/// Input media file is too large. +/// +#define AZAC_ERR_INPUT_FILE_TOO_LARGE __AZAC_ERRCODE_FAILED(0x075) + +/// +/// The input URL is unsupported. It should start with `http://`, `https://` or `rtsp://`. +/// +#define AZAC_ERR_UNSUPPORTED_URL_PROTOCOL __AZAC_ERRCODE_FAILED(0x076) + +/// +/// The Nullable value is empty. Check HasValue() before getting the value. +/// +#define AZAC_ERR_EMPTY_NULLABLE __AZAC_ERRCODE_FAILED(0x077) + +/// +/// The given model version string is not in the expected format. The format +/// is specified by the regular expression `^(latest|\d{4}-\d{2}-\d{2})(-preview)?$`. +/// +#define AZAC_ERR_INVALID_MODEL_VERSION_FORMAT __AZAC_ERRCODE_FAILED(0x078) + +/// +/// Malformed network message +/// +#define AZAC_ERR_NETWORK_MALFORMED __AZAC_ERRCODE_FAILED(0x090) + +/// +/// Unexpected message received +/// +#define AZAC_ERR_NETWORK_PROTOCOL_VIOLATION __AZAC_ERRCODE_FAILED(0x091) + +/// +/// MAS extension library not found. +/// +#define AZAC_ERR_MAS_LIBRARY_NOT_FOUND __AZAC_ERRCODE_FAILED(0x092) diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c.h new file mode 100644 index 0000000..61a4a5b --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c.h @@ -0,0 +1,51 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c.h: Master include header for public C API declarations +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_config.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_config.h new file mode 100644 index 0000000..a8d042e --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_config.h @@ -0,0 +1,27 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_audio_config.h: Public API declarations for audio configuration related C methods and types +// + +#pragma once +#include + + +SPXAPI_(bool) audio_config_is_handle_valid(SPXAUDIOCONFIGHANDLE haudioConfig); +SPXAPI audio_config_create_audio_input_from_default_microphone(SPXAUDIOCONFIGHANDLE* haudioConfig); +SPXAPI audio_config_create_audio_input_from_a_microphone(SPXAUDIOCONFIGHANDLE* haudioConfig, const char* deviceName); +SPXAPI audio_config_create_audio_input_from_wav_file_name(SPXAUDIOCONFIGHANDLE* haudioConfig, const char* fileName); +SPXAPI audio_config_create_audio_input_from_stream(SPXAUDIOCONFIGHANDLE* haudioConfig, SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI audio_config_create_push_audio_input_stream(SPXAUDIOCONFIGHANDLE* haudioConfig, SPXAUDIOSTREAMHANDLE* haudioStream, SPXAUDIOSTREAMFORMATHANDLE hformat); +SPXAPI audio_config_create_pull_audio_input_stream(SPXAUDIOCONFIGHANDLE* haudioConfig, SPXAUDIOSTREAMHANDLE* haudioStream, SPXAUDIOSTREAMFORMATHANDLE hformat); +SPXAPI audio_config_create_audio_output_from_default_speaker(SPXAUDIOCONFIGHANDLE* haudioConfig); +SPXAPI audio_config_create_audio_output_from_a_speaker(SPXAUDIOCONFIGHANDLE* haudioConfig, const char* deviceName); +SPXAPI audio_config_create_audio_output_from_wav_file_name(SPXAUDIOCONFIGHANDLE* haudioConfig, const char* fileName); +SPXAPI audio_config_create_audio_output_from_stream(SPXAUDIOCONFIGHANDLE* haudioConfig, SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI audio_config_set_audio_processing_options(SPXAUDIOCONFIGHANDLE haudioConfig, SPXAUDIOPROCESSINGOPTIONSHANDLE haudioProcessingOptions); +SPXAPI audio_config_get_audio_processing_options(SPXAUDIOCONFIGHANDLE haudioConfig, SPXAUDIOPROCESSINGOPTIONSHANDLE* haudioProcessingOptions); +SPXAPI audio_config_release(SPXAUDIOCONFIGHANDLE haudioConfig); +SPXAPI audio_config_get_property_bag(SPXAUDIOCONFIGHANDLE haudioConfig, SPXPROPERTYBAGHANDLE* hpropbag); + diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_processing_options.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_processing_options.h new file mode 100644 index 0000000..ece9933 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_processing_options.h @@ -0,0 +1,173 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_audio_processing_options.h: Public API declarations for audio processing options related C methods and types +// + +#pragma once +#include + +/// +/// Types of preset microphone array geometries. +/// See [Microphone Array Recommendations](/azure/cognitive-services/speech-service/speech-devices-sdk-microphone) for more details. +/// +typedef enum +{ + /// + /// Indicates that no geometry specified. Speech SDK will determine the microphone array geometry. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Uninitialized, + /// + /// Indicates a microphone array with one microphone in the center and six microphones evenly spaced + /// in a circle with radius approximately equal to 42.5 mm. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Circular7, + /// + /// Indicates a microphone array with one microphone in the center and three microphones evenly spaced + /// in a circle with radius approximately equal to 42.5 mm. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Circular4, + /// + /// Indicates a microphone array with four linearly placed microphones with 40 mm spacing between them. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Linear4, + /// + /// Indicates a microphone array with two linearly placed microphones with 40 mm spacing between them. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Linear2, + /// + /// Indicates a microphone array with a single microphone. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Mono, + /// + /// Indicates a microphone array with custom geometry. + /// + AudioProcessingOptions_PresetMicrophoneArrayGeometry_Custom +} AudioProcessingOptions_PresetMicrophoneArrayGeometry; + +/// +/// Types of microphone arrays. +/// +typedef enum +{ + AudioProcessingOptions_MicrophoneArrayType_Linear, + AudioProcessingOptions_MicrophoneArrayType_Planar +} AudioProcessingOptions_MicrophoneArrayType; + +/// +/// Defines speaker reference channel position in input audio. +/// +typedef enum +{ + /// + /// Indicates that the input audio does not have a speaker reference channel. + /// + AudioProcessingOptions_SpeakerReferenceChannel_None, + /// + /// Indicates that the last channel in the input audio corresponds to the speaker + /// reference for echo cancellation. + /// + AudioProcessingOptions_SpeakerReferenceChannel_LastChannel +} AudioProcessingOptions_SpeakerReferenceChannel; + +#pragma pack(push, 1) + +/// +/// Represents coordinates of a microphone. +/// +typedef struct +{ + /// + /// X-coordinate of the microphone in millimeters. + /// + int X; + /// + /// Y-coordinate of the microphone in millimeters. + /// + int Y; + /// + /// Z-coordinate of the microphone in millimeters. + /// + int Z; +} AudioProcessingOptions_MicrophoneCoordinates; + +/// +/// Represents the geometry of a microphone array. +/// +typedef struct +{ + /// + /// Type of microphone array. + /// + AudioProcessingOptions_MicrophoneArrayType microphoneArrayType; + /// + /// Start angle for beamforming in degrees. + /// + uint16_t beamformingStartAngle; + /// + /// End angle for beamforming in degrees. + /// + uint16_t beamformingEndAngle; + /// + /// Number of microphones in the microphone array. + /// + uint16_t numberOfMicrophones; + /// + /// Coordinates of microphones in the microphone array. + /// + AudioProcessingOptions_MicrophoneCoordinates* microphoneCoordinates; +} AudioProcessingOptions_MicrophoneArrayGeometry; + +#pragma pack(pop) + +/// +/// Disables built-in input audio processing. +/// +const int AUDIO_INPUT_PROCESSING_NONE = 0x00000000; +/// +/// Enables default built-in input audio processing. +/// +const int AUDIO_INPUT_PROCESSING_ENABLE_DEFAULT = 0x00000001; +/// +/// Disables dereverberation in the default audio processing pipeline. +/// +const int AUDIO_INPUT_PROCESSING_DISABLE_DEREVERBERATION = 0x00000002; +/// +/// Disables noise suppression in the default audio processing pipeline. +/// +const int AUDIO_INPUT_PROCESSING_DISABLE_NOISE_SUPPRESSION = 0x00000004; +/// +/// Disables automatic gain control in the default audio processing pipeline. +/// +const int AUDIO_INPUT_PROCESSING_DISABLE_GAIN_CONTROL = 0x00000008; +/// +/// Disables echo cancellation in the default audio processing pipeline. +/// +const int AUDIO_INPUT_PROCESSING_DISABLE_ECHO_CANCELLATION = 0x00000010; +/// +/// Enables voice activity detection in input audio processing. +/// +const int AUDIO_INPUT_PROCESSING_ENABLE_VOICE_ACTIVITY_DETECTION = 0x00000020; +/// +/// Enables the new version (V2) of input audio processing with improved echo cancellation performance. +/// This flag is mutually exclusive with AUDIO_INPUT_PROCESSING_ENABLE_DEFAULT flag. +/// AUDIO_INPUT_PROCESSING_DISABLE_* flags do not affect this pipeline. +/// This feature is currently in preview and only available for Windows x64 and ARM64 platform. +/// +const int AUDIO_INPUT_PROCESSING_ENABLE_V2 = 0x00000040; + +SPXAPI_(bool) audio_processing_options_is_handle_valid(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions); +SPXAPI audio_processing_options_create(SPXAUDIOPROCESSINGOPTIONSHANDLE* hoptions, int audioProcessingFlags); +SPXAPI audio_processing_options_create_from_preset_microphone_array_geometry(SPXAUDIOPROCESSINGOPTIONSHANDLE* hoptions, int audioProcessingFlags, AudioProcessingOptions_PresetMicrophoneArrayGeometry microphoneArrayGeometry, AudioProcessingOptions_SpeakerReferenceChannel speakerReferenceChannel); +SPXAPI audio_processing_options_create_from_microphone_array_geometry(SPXAUDIOPROCESSINGOPTIONSHANDLE* hoptions, int audioProcessingFlags, const AudioProcessingOptions_MicrophoneArrayGeometry* microphoneArrayGeometry, AudioProcessingOptions_SpeakerReferenceChannel speakerReferenceChannel); +SPXAPI audio_processing_options_get_audio_processing_flags(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, int* audioProcessingFlags); +SPXAPI audio_processing_options_get_preset_microphone_array_geometry(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, AudioProcessingOptions_PresetMicrophoneArrayGeometry* microphoneArrayGeometry); +SPXAPI audio_processing_options_get_microphone_array_type(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, AudioProcessingOptions_MicrophoneArrayType* microphoneArrayType); +SPXAPI audio_processing_options_get_beamforming_start_angle(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, uint16_t* startAngle); +SPXAPI audio_processing_options_get_beamforming_end_angle(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, uint16_t* endAngle); +SPXAPI audio_processing_options_get_microphone_count(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, uint16_t* microphoneCount); +SPXAPI audio_processing_options_get_microphone_coordinates(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, AudioProcessingOptions_MicrophoneCoordinates* microphoneCoordinates, uint16_t microphoneCount); +SPXAPI audio_processing_options_get_speaker_reference_channel(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, AudioProcessingOptions_SpeakerReferenceChannel* speakerReferenceChannel); +SPXAPI audio_processing_options_release(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions); +SPXAPI audio_processing_options_get_property_bag(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_stream.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_stream.h new file mode 100644 index 0000000..d6ff17a --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_stream.h @@ -0,0 +1,68 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_audio_stream.h: Public API declarations for audio stream related C methods and types +// + +#pragma once +#include +#include + +typedef enum +{ + StreamStatus_Unknown = 0, + StreamStatus_NoData = 1, + StreamStatus_PartialData = 2, + StreamStatus_AllData = 3, + StreamStatus_Canceled = 4 +} Stream_Status; + +// audio_stream +SPXAPI_(bool) audio_stream_is_handle_valid(SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI audio_stream_create_push_audio_input_stream(SPXAUDIOSTREAMHANDLE* haudioStream, SPXAUDIOSTREAMFORMATHANDLE hformat); +SPXAPI audio_stream_create_pull_audio_input_stream(SPXAUDIOSTREAMHANDLE* haudioStream, SPXAUDIOSTREAMFORMATHANDLE hformat); +SPXAPI audio_stream_create_pull_audio_output_stream(SPXAUDIOSTREAMHANDLE* haudioStream); +SPXAPI audio_stream_create_push_audio_output_stream(SPXAUDIOSTREAMHANDLE* haudioStream); +SPXAPI audio_stream_release(SPXAUDIOSTREAMHANDLE haudioStream); + +// pull_audio_input_stream +typedef int (*CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK)(void* pvContext, uint8_t* buffer, uint32_t size); +typedef void (*CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK)(void* pvContext); +typedef void (*CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK)(void* pvContext, int id, uint8_t* value, uint32_t size); +SPXAPI pull_audio_input_stream_set_callbacks(SPXAUDIOSTREAMHANDLE haudioStream, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback); +SPXAPI pull_audio_input_stream_set_getproperty_callback(SPXAUDIOSTREAMHANDLE haudioStream, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback); + +// push_audio_input_stream +SPXAPI push_audio_input_stream_write(SPXAUDIOSTREAMHANDLE haudioStream, uint8_t* buffer, uint32_t size); +SPXAPI push_audio_input_stream_close(SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI push_audio_input_stream_set_property_by_id(SPXAUDIOSTREAMHANDLE haudioStream, int id, const char* value); +SPXAPI push_audio_input_stream_set_property_by_name(SPXAUDIOSTREAMHANDLE haudioStream, const char* name, const char* value); + +// pull audio output stream +SPXAPI pull_audio_output_stream_read(SPXAUDIOSTREAMHANDLE haudioStream, uint8_t* buffer, uint32_t bufferSize, uint32_t* pfilledSize); + +// push_audio_output_stream +typedef int(*CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK)(void* pvContext, uint8_t* buffer, uint32_t size); +typedef void(*CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK)(void* pvContext); +SPXAPI push_audio_output_stream_set_callbacks(SPXAUDIOSTREAMHANDLE haudioStream, void* pvContext, CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK writeCallback, CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK closeCallback); + +// audio data stream +SPXAPI_(bool) audio_data_stream_is_handle_valid(SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI audio_data_stream_create_from_file(SPXAUDIOSTREAMHANDLE* haudioStream, const char* fileName); +SPXAPI audio_data_stream_create_from_result(SPXAUDIOSTREAMHANDLE* haudioStream, SPXRESULTHANDLE hresult); +SPXAPI audio_data_stream_create_from_keyword_result(SPXAUDIOSTREAMHANDLE* audioStreamHandle, SPXRESULTHANDLE resultHandle); +SPXAPI audio_data_stream_get_status(SPXAUDIOSTREAMHANDLE haudioStream, Stream_Status* status); +SPXAPI audio_data_stream_get_reason_canceled(SPXAUDIOSTREAMHANDLE haudioStream, Result_CancellationReason* reason); +SPXAPI audio_data_stream_get_canceled_error_code(SPXAUDIOSTREAMHANDLE haudioStream, Result_CancellationErrorCode* errorCode); +SPXAPI_(bool) audio_data_stream_can_read_data(SPXAUDIOSTREAMHANDLE haudioStream, uint32_t requestedSize); +SPXAPI_(bool) audio_data_stream_can_read_data_from_position(SPXAUDIOSTREAMHANDLE haudioStream, uint32_t requestedSize, uint32_t position); +SPXAPI_(uint32_t) audio_data_stream_get_available_size(SPXAUDIOSTREAMHANDLE haudioStream); +SPXAPI audio_data_stream_read(SPXAUDIOSTREAMHANDLE haudioStream, uint8_t* buffer, uint32_t bufferSize, uint32_t* pfilledSize); +SPXAPI audio_data_stream_read_from_position(SPXAUDIOSTREAMHANDLE haudioStream, uint8_t* buffer, uint32_t bufferSize, uint32_t position, uint32_t* pfilledSize); +SPXAPI audio_data_stream_save_to_wave_file(SPXAUDIOSTREAMHANDLE haudioStream, const char* fileName); +SPXAPI audio_data_stream_get_position(SPXAUDIOSTREAMHANDLE haudioStream, uint32_t* position); +SPXAPI audio_data_stream_set_position(SPXAUDIOSTREAMHANDLE haudioStream, uint32_t position); +SPXAPI audio_data_stream_detach_input(SPXAUDIOSTREAMHANDLE audioStreamHandle); +SPXAPI audio_data_stream_get_property_bag(SPXAUDIOSTREAMHANDLE haudioStream, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI audio_data_stream_release(SPXAUDIOSTREAMHANDLE haudioStream); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_stream_format.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_stream_format.h new file mode 100644 index 0000000..ac1e8e4 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_audio_stream_format.h @@ -0,0 +1,93 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_audio_stream_format.h: Public API declarations for audio stream format related C methods and types +// + +#pragma once +#include + +/// +/// Defines supported audio stream container format. +/// Changed in version 1.4.0. +/// +enum Audio_Stream_Container_Format +{ + /// + /// Stream ContainerFormat definition for OGG OPUS. + /// + StreamFormat_Ogg_Opus = 0x101, + + /// + /// Stream ContainerFormat definition for MP3. + /// + StreamFormat_Mp3 = 0x102, + + /// + /// Stream ContainerFormat definition for FLAC. Added in version 1.7.0. + /// + StreamFormat_Flac = 0x103, + + /// + /// Stream ContainerFormat definition for ALAW. Added in version 1.7.0. + /// + StreamFormat_Alaw = 0x104, + + /// + /// Stream ContainerFormat definition for MULAW. Added in version 1.7.0. + /// + StreamFormat_Mulaw = 0x105, + + /// + /// Stream ContainerFormat definition for AMRNB. Currently not supported. + /// + StreamFormat_Amrnb = 0x106, + + /// + /// Stream ContainerFormat definition for AMRWB. Currently not supported. + /// + StreamFormat_Amrwb = 0x107, + + /// + /// Stream ContainerFormat definition for any other or unknown format. + /// + StreamFormat_Any = 0x108, +}; + +/// +/// Defines supported audio stream wave format in WAV container. +/// +enum Audio_Stream_Wave_Format +{ + /// + /// Stream WaveFormat definition for PCM (pulse-code modulated) data in integer format. + /// + StreamWaveFormat_PCM = 0x0001, + + /// + /// Stream WaveFormat definition for A-law-encoded format. + /// + StreamWaveFormat_ALAW = 0x0006, + + /// + /// Stream WaveFormat definition for Mu-law-encoded format. + /// + StreamWaveFormat_MULAW = 0x0007, + + /// + /// Stream WaveFormat definition for G.722-encoded format. + /// + StreamWaveFormat_G722 = 0x028F +}; + +typedef enum Audio_Stream_Container_Format Audio_Stream_Container_Format; +typedef enum Audio_Stream_Wave_Format Audio_Stream_Wave_Format; + +SPXAPI_(bool) audio_stream_format_is_handle_valid(SPXAUDIOSTREAMFORMATHANDLE hformat); +SPXAPI audio_stream_format_create_from_default_input(SPXAUDIOSTREAMFORMATHANDLE* hformat); +SPXAPI audio_stream_format_create_from_waveformat(SPXAUDIOSTREAMFORMATHANDLE* hformat, uint32_t samplesPerSecond, uint8_t bitsPerSample, uint8_t channels, Audio_Stream_Wave_Format waveFormat); +SPXAPI audio_stream_format_create_from_waveformat_pcm(SPXAUDIOSTREAMFORMATHANDLE* hformat, uint32_t samplesPerSecond, uint8_t bitsPerSample, uint8_t channels); +SPXAPI audio_stream_format_create_from_default_output(SPXAUDIOSTREAMFORMATHANDLE* hformat); +SPXAPI audio_stream_format_create_from_compressed_format(SPXAUDIOSTREAMFORMATHANDLE* hformat, Audio_Stream_Container_Format compressedFormat); +SPXAPI audio_stream_format_release(SPXAUDIOSTREAMFORMATHANDLE hformat); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_auto_detect_source_lang_config.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_auto_detect_source_lang_config.h new file mode 100644 index 0000000..f62c5eb --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_auto_detect_source_lang_config.h @@ -0,0 +1,15 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI create_auto_detect_source_lang_config_from_open_range(SPXAUTODETECTSOURCELANGCONFIGHANDLE* hAutoDetectSourceLanguageconfig); +SPXAPI create_auto_detect_source_lang_config_from_languages(SPXAUTODETECTSOURCELANGCONFIGHANDLE* hAutoDetectSourceLanguageconfig, const char* languages); +SPXAPI create_auto_detect_source_lang_config_from_source_lang_config(SPXAUTODETECTSOURCELANGCONFIGHANDLE* hAutoDetectSourceLanguageconfig, SPXSOURCELANGCONFIGHANDLE hSourceLanguageConfig); +SPXAPI add_source_lang_config_to_auto_detect_source_lang_config(SPXAUTODETECTSOURCELANGCONFIGHANDLE hAutoDetectSourceLanguageconfig, SPXSOURCELANGCONFIGHANDLE hSourceLanguageConfig); +SPXAPI_(bool) auto_detect_source_lang_config_is_handle_valid(SPXAUTODETECTSOURCELANGCONFIGHANDLE hAutoDetectSourceLanguageconfig); +SPXAPI auto_detect_source_lang_config_release(SPXAUTODETECTSOURCELANGCONFIGHANDLE hAutoDetectSourceLanguageconfig); +SPXAPI auto_detect_source_lang_config_get_property_bag(SPXAUTODETECTSOURCELANGCONFIGHANDLE hAutoDetectSourceLanguageconfig, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_common.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_common.h new file mode 100644 index 0000000..ebfb802 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_common.h @@ -0,0 +1,81 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_common.h: Public API declarations for global C definitions and typedefs +// + +#pragma once + +#include +#include // must include after spxdebug.h or speechapi*.h (can NOT be included before) +#include +#include + +#define SPX_EXTERN_C AZAC_EXTERN_C +#ifndef SPXAPI_EXPORT +#define SPXAPI_EXPORT AZAC_API_EXPORT +#endif + +#define SPXAPI_NOTHROW AZAC_API_NOTHROW +#define SPXAPI_RESULTTYPE SPXHR +#define SPXAPI_CALLTYPE AZAC_API_CALLTYPE +#define SPXAPI_VCALLTYPE AZAC_VCALLTYPE + +#define SPXDLL_EXPORT AZAC_DLL_EXPORT + +#define SPXAPI SPX_EXTERN_C SPXAPI_EXPORT SPXAPI_RESULTTYPE SPXAPI_NOTHROW SPXAPI_CALLTYPE +#define SPXAPI_(type) SPX_EXTERN_C SPXAPI_EXPORT type SPXAPI_NOTHROW SPXAPI_CALLTYPE +#define SPXAPI__(type) SPX_EXTERN_C SPXAPI_EXPORT SPXAPI_NOTHROW type SPXAPI_CALLTYPE + +#define SPXAPIV SPX_EXTERN_C SPXAPI_EXPORT SPXAPI_NOTHROW SPXAPI_RESULTTYPE SPXAPI_VCALLTYPE +#define SPXAPIV_(type) SPX_EXTERN_C SPXAPI_EXPORT SPXAPI_NOTHROW type SPXAPI_VCALLTYPE + +#define SPXAPI_PRIVATE SPX_EXTERN_C SPXAPI_RESULTTYPE SPXAPI_NOTHROW SPXAPI_CALLTYPE +#define SPXAPI_PRIVATE_(type) SPX_EXTERN_C type SPXAPI_NOTHROW SPXAPI_CALLTYPE + +#define _spx_empty _azac_empty +#define _spxhandle _azac_handle +#define SPXHANDLE AZAC_HANDLE +#define SPXERRORHANDLE AZAC_HANDLE + +#define SPXPROPERTYBAGHANDLE AZAC_HANDLE +typedef SPXHANDLE SPXASYNCHANDLE; +typedef SPXHANDLE SPXFACTORYHANDLE; +typedef SPXHANDLE SPXRECOHANDLE; +typedef SPXHANDLE SPXSYNTHHANDLE; +typedef SPXHANDLE SPXRESULTHANDLE; +typedef SPXHANDLE SPXEVENTHANDLE; +typedef SPXHANDLE SPXSESSIONHANDLE; +typedef SPXHANDLE SPXTRIGGERHANDLE; +typedef SPXHANDLE SPXLUMODELHANDLE; +typedef SPXHANDLE SPXKEYWORDHANDLE; +typedef SPXHANDLE SPXAUDIOSTREAMFORMATHANDLE; +typedef SPXHANDLE SPXAUDIOSTREAMHANDLE; +typedef SPXHANDLE SPXAUDIOCONFIGHANDLE; +typedef SPXHANDLE SPXSPEECHCONFIGHANDLE; +typedef SPXHANDLE SPXCONNECTIONHANDLE; +typedef SPXHANDLE SPXCONNECTIONMESSAGEHANDLE; +typedef SPXHANDLE SPXACTIVITYHANDLE; +typedef SPXHANDLE SPXACTIVITYJSONHANDLE; +typedef SPXHANDLE SPXGRAMMARHANDLE; +typedef SPXHANDLE SPXPHRASEHANDLE; +typedef SPXHANDLE SPXUSERHANDLE; +typedef SPXHANDLE SPXPARTICIPANTHANDLE; +typedef SPXHANDLE SPXAUTODETECTSOURCELANGCONFIGHANDLE; +typedef SPXHANDLE SPXSOURCELANGCONFIGHANDLE; +typedef SPXHANDLE SPXCONVERSATIONHANDLE; +typedef SPXHANDLE SPXMEETINGHANDLE; +typedef SPXHANDLE SPXCONVERSATIONTRANSLATORHANDLE; +typedef SPXHANDLE SPXVOICEPROFILECLIENTHANDLE; +typedef SPXHANDLE SPXVOICEPROFILEHANDLE; +typedef SPXHANDLE SPXSPEAKERIDHANDLE; +typedef SPXHANDLE SPXSIMODELHANDLE; +typedef SPXHANDLE SPXSVMODELHANDLE; +typedef SPXHANDLE SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE; +typedef SPXHANDLE SPXAUDIOPROCESSINGOPTIONSHANDLE; +typedef SPXHANDLE SPXSPEECHRECOMODELHANDLE; +typedef SPXHANDLE SPXREQUESTHANDLE; + +#define SPXHANDLE_INVALID ((SPXHANDLE)-1) +#define SPXHANDLE_RESERVED1 ((SPXHANDLE)+1) diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_connection.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_connection.h new file mode 100644 index 0000000..3bb69c9 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_connection.h @@ -0,0 +1,46 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI connection_from_recognizer(SPXRECOHANDLE recognizerHandle, SPXCONNECTIONHANDLE* connectionHandle); +SPXAPI connection_from_conversation_translator(SPXCONVERSATIONTRANSLATORHANDLE convTransHandle, SPXCONNECTIONHANDLE* connectionHandle); +SPXAPI connection_from_dialog_service_connector(SPXRECOHANDLE convTransHandle, SPXCONNECTIONHANDLE* connectionHandle); +SPXAPI connection_from_speech_synthesizer(SPXSYNTHHANDLE synthesizerHandle, SPXCONNECTIONHANDLE* connectionHandle); + +SPXAPI_(bool) connection_handle_is_valid(SPXCONNECTIONHANDLE handle); +SPXAPI connection_handle_release(SPXCONNECTIONHANDLE handle); +SPXAPI connection_async_handle_release(SPXASYNCHANDLE hasync); + +SPXAPI connection_open(SPXCONNECTIONHANDLE handle, bool forContinuousRecognition); +SPXAPI connection_close(SPXCONNECTIONHANDLE handle); +SPXAPI connection_set_message_property(SPXCONNECTIONHANDLE handle, const char* path, const char* name, const char* value); +SPXAPI connection_send_message(SPXCONNECTIONHANDLE handle, const char* path, const char* payload); +SPXAPI connection_send_message_async(SPXCONNECTIONHANDLE handle, const char* path, const char* payload, SPXASYNCHANDLE* phasync); + +SPXAPI connection_send_message_data(SPXCONNECTIONHANDLE handle, const char* path, uint8_t* data, uint32_t size); +SPXAPI connection_send_message_data_async(SPXCONNECTIONHANDLE handle, const char* path, uint8_t* data, uint32_t size, SPXASYNCHANDLE* phasync); + +SPXAPI connection_send_message_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +SPXAPI connection_get_property_bag(SPXRECOHANDLE hconn, SPXPROPERTYBAGHANDLE* hpropbag); + +typedef void(*CONNECTION_CALLBACK_FUNC)(SPXEVENTHANDLE event, void* context); +SPXAPI connection_connected_set_callback(SPXCONNECTIONHANDLE connection, CONNECTION_CALLBACK_FUNC callback, void* context); +SPXAPI connection_disconnected_set_callback(SPXCONNECTIONHANDLE connection, CONNECTION_CALLBACK_FUNC callback, void* context); +SPXAPI connection_message_received_set_callback(SPXCONNECTIONHANDLE connection, CONNECTION_CALLBACK_FUNC callback, void* context); + +SPXAPI_(bool) connection_message_received_event_handle_is_valid(SPXEVENTHANDLE hevent); +SPXAPI connection_message_received_event_handle_release(SPXEVENTHANDLE hevent); + +SPXAPI connection_message_received_event_get_message(SPXEVENTHANDLE hevent, SPXCONNECTIONMESSAGEHANDLE* hcm); + +SPXAPI_(bool) connection_message_handle_is_valid(SPXCONNECTIONMESSAGEHANDLE handle); +SPXAPI connection_message_handle_release(SPXCONNECTIONMESSAGEHANDLE handle); + +SPXAPI connection_message_get_property_bag(SPXCONNECTIONMESSAGEHANDLE hcm, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI connection_message_get_data(SPXCONNECTIONMESSAGEHANDLE hcm, uint8_t* data, uint32_t size); +SPXAPI_(uint32_t) connection_message_get_data_size(SPXCONNECTIONMESSAGEHANDLE hcm); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_conversation.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_conversation.h new file mode 100644 index 0000000..be12f34 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_conversation.h @@ -0,0 +1,28 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_conversation.h: Public API declarations for conversation related C methods and typedefs +// + +#pragma once +#include + +SPXAPI conversation_create_from_config(SPXCONVERSATIONHANDLE* phconv, SPXSPEECHCONFIGHANDLE hspeechconfig, const char* id); +SPXAPI conversation_update_participant_by_user_id(SPXCONVERSATIONHANDLE hconv, bool add, const char* userId); +SPXAPI conversation_update_participant_by_user(SPXCONVERSATIONHANDLE hconv, bool add, SPXUSERHANDLE huser); +SPXAPI conversation_update_participant(SPXCONVERSATIONHANDLE hconv, bool add, SPXPARTICIPANTHANDLE hparticipant); +SPXAPI conversation_get_conversation_id(SPXCONVERSATIONHANDLE hconv, char* id, size_t size); +SPXAPI conversation_end_conversation(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_get_property_bag(SPXCONVERSATIONHANDLE hconv, SPXPROPERTYBAGHANDLE* phpropbag); +SPXAPI conversation_release_handle(SPXHANDLE handle); + +SPXAPI conversation_start_conversation(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_delete_conversation(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_lock_conversation(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_unlock_conversation(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_mute_all_participants(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_unmute_all_participants(SPXCONVERSATIONHANDLE hconv); +SPXAPI conversation_mute_participant(SPXCONVERSATIONHANDLE hconv, const char * participantId); +SPXAPI conversation_unmute_participant(SPXCONVERSATIONHANDLE hconv, const char * participantId); + diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_conversation_transcription_result.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_conversation_transcription_result.h new file mode 100644 index 0000000..7bcc7b1 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_conversation_transcription_result.h @@ -0,0 +1,11 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_conversation_transcriber_result.h: Public API declarations for ConversationTranscriberResult related C methods and enumerations +// + +#pragma once +#include + +SPXAPI conversation_transcription_result_get_speaker_id(SPXRESULTHANDLE hresult, char* pszSpeakerId, uint32_t cchSpeakerId); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_conversation_translator.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_conversation_translator.h new file mode 100644 index 0000000..0b1881b --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_conversation_translator.h @@ -0,0 +1,63 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_conversation_translator.h: Public API declarations for conversation translator related C methods and typedefs +// + +#pragma once +#include +#include + +#ifdef __cplusplus +#include +typedef Microsoft::CognitiveServices::Speech::Transcription::ParticipantChangedReason ParticipantChangedReason; +#else +#include +#endif + +typedef void(*PCONV_TRANS_CALLBACK)(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, SPXEVENTHANDLE hEvent, void* pvContext); + +SPXAPI conversation_translator_create_from_config(SPXCONVERSATIONTRANSLATORHANDLE* phandle, SPXAUDIOCONFIGHANDLE haudioinput); +SPXAPI conversation_translator_get_property_bag(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator, SPXPROPERTYBAGHANDLE* phpropertyBag); + +SPXAPI conversation_translator_join(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator, SPXCONVERSATIONHANDLE hconv, const char* psznickname); +SPXAPI conversation_translator_join_with_id(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator, const char *pszconversationid, const char* psznickname, const char * pszlang); +SPXAPI conversation_translator_start_transcribing(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator); +SPXAPI conversation_translator_stop_transcribing(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator); +SPXAPI conversation_translator_send_text_message(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator, const char *pszmessage); +SPXAPI conversation_translator_leave(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator); +SPXAPI conversation_translator_set_authorization_token(SPXCONVERSATIONTRANSLATORHANDLE hconvtranslator, const char* pszAuthToken, const char* pszRegion); + +SPXAPI_(bool) conversation_translator_handle_is_valid(SPXCONVERSATIONTRANSLATORHANDLE handle); +SPXAPI conversation_translator_handle_release(SPXHANDLE handle); + +SPXAPI conversation_translator_session_started_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_session_stopped_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_canceled_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_participants_changed_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_conversation_expiration_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_transcribing_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_transcribed_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); +SPXAPI conversation_translator_text_message_recevied_set_callback(SPXCONVERSATIONTRANSLATORHANDLE hConvTrans, PCONV_TRANS_CALLBACK pCallback, void* pvContext); + +SPXAPI conversation_translator_connection_connected_set_callback(SPXCONNECTIONHANDLE hConnection, CONNECTION_CALLBACK_FUNC pCallback, void * pvContext); +SPXAPI conversation_translator_connection_disconnected_set_callback(SPXCONNECTIONHANDLE hConnection, CONNECTION_CALLBACK_FUNC pCallback, void * pvContext); + +SPXAPI_(bool) conversation_translator_event_handle_is_valid(SPXCONVERSATIONTRANSLATORHANDLE handle); +SPXAPI conversation_translator_event_handle_release(SPXHANDLE handle); + +SPXAPI conversation_translator_event_get_expiration_time(SPXEVENTHANDLE hevent, int32_t* pexpirationminutes); +SPXAPI conversation_translator_event_get_participant_changed_reason(SPXEVENTHANDLE hevent, ParticipantChangedReason* preason); +SPXAPI conversation_translator_event_get_participant_changed_at_index(SPXEVENTHANDLE hevent, int index, SPXPARTICIPANTHANDLE* phparticipant); + +SPXAPI conversation_translator_result_get_user_id(SPXRESULTHANDLE hresult, char* pszUserId, uint32_t cchUserId); + +SPXAPI conversation_translator_result_get_original_lang(SPXRESULTHANDLE hresult, char * psz, uint32_t * pcch); + +SPXAPI conversation_translator_participant_get_avatar(SPXEVENTHANDLE hevent, char * psz, uint32_t * pcch); +SPXAPI conversation_translator_participant_get_displayname(SPXEVENTHANDLE hevent, char * psz, uint32_t * pcch); +SPXAPI conversation_translator_participant_get_id(SPXEVENTHANDLE hevent, char * psz, uint32_t * pcch); +SPXAPI conversation_translator_participant_get_is_muted(SPXEVENTHANDLE hevent, bool * pMuted); +SPXAPI conversation_translator_participant_get_is_host(SPXEVENTHANDLE hevent, bool * pIsHost); +SPXAPI conversation_translator_participant_get_is_using_tts(SPXEVENTHANDLE hevent, bool * ptts); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_diagnostics.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_diagnostics.h new file mode 100644 index 0000000..25b1a73 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_diagnostics.h @@ -0,0 +1,8 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include // must include after spxdebug.h or speechapi*.h (can NOT be included before) diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_dialog_service_config.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_dialog_service_config.h new file mode 100644 index 0000000..4fb8b35 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_dialog_service_config.h @@ -0,0 +1,15 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_dialog_service_config.h: Public API declarations for dialog service connector configuration related C methods and types +// +#pragma once + +#include + +SPXAPI bot_framework_config_from_subscription(SPXSPEECHCONFIGHANDLE* ph_config, const char* subscription, const char* region, const char *bot_Id); +SPXAPI bot_framework_config_from_authorization_token(SPXSPEECHCONFIGHANDLE* ph_config, const char* auth_token, const char* region, const char* bot_Id); + +SPXAPI custom_commands_config_from_subscription(SPXSPEECHCONFIGHANDLE* ph_dialog_service_config, const char* app_id, const char *subscription, const char* region); +SPXAPI custom_commands_config_from_authorization_token(SPXSPEECHCONFIGHANDLE* ph_dialog_service_config, const char* app_id, const char *auth_token, const char* region); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_dialog_service_connector.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_dialog_service_connector.h new file mode 100644 index 0000000..94e0fbf --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_dialog_service_connector.h @@ -0,0 +1,92 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_dialog_service_connector.h: Public API declaration for Dialog Service Connector related C methods. +// + +#pragma once +#include + +SPXAPI_(bool) dialog_service_connector_handle_is_valid(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_handle_release(SPXRECOHANDLE h_connector); + +SPXAPI_(bool) dialog_service_connector_async_handle_is_valid(SPXASYNCHANDLE h_async); +SPXAPI dialog_service_connector_async_handle_release(SPXASYNCHANDLE h_async); + +SPXAPI_(bool) dialog_service_connector_async_void_handle_is_valid(SPXASYNCHANDLE h_async); +SPXAPI dialog_service_connector_async_void_handle_release(SPXASYNCHANDLE h_async); + +SPXAPI_(bool) dialog_service_connector_async_string_handle_is_valid(SPXASYNCHANDLE h_async); +SPXAPI dialog_service_connector_async_string_handle_release(SPXASYNCHANDLE h_async); + +SPXAPI_(bool) dialog_service_connector_async_reco_result_handle_is_valid(SPXASYNCHANDLE h_async); +SPXAPI dialog_service_connector_async_reco_result_handle_release(SPXASYNCHANDLE h_async); + +SPXAPI_(bool) dialog_service_connector_activity_received_event_handle_is_valid(SPXEVENTHANDLE h_event); +SPXAPI dialog_service_connector_activity_received_event_release(SPXEVENTHANDLE h_event); + +SPXAPI_(bool) dialog_service_connector_turn_status_received_handle_is_valid(SPXEVENTHANDLE h_event); +SPXAPI dialog_service_connector_turn_status_received_release(SPXEVENTHANDLE h_event); + +SPXAPI dialog_service_connector_get_property_bag(SPXRECOHANDLE h_connector, SPXPROPERTYBAGHANDLE* h_prop_bag); + +SPXAPI dialog_service_connector_connect(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_connect_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_connect_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds); + +SPXAPI dialog_service_connector_disconnect(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_disconnect_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_disconnect_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds); + +SPXAPI dialog_service_connector_send_activity(SPXRECOHANDLE h_connector, const char* activity, char* interaction_id); +SPXAPI dialog_service_connector_send_activity_async(SPXRECOHANDLE h_connector, const char* activity, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_send_activity_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds, char* interaction_id); + +SPXAPI dialog_service_connector_start_keyword_recognition(SPXRECOHANDLE h_connector, SPXKEYWORDHANDLE h_keyword); +SPXAPI dialog_service_connector_start_keyword_recognition_async(SPXRECOHANDLE h_connector, SPXKEYWORDHANDLE h_keyword, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_start_keyword_recognition_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds); + +SPXAPI dialog_service_connector_stop_keyword_recognition(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_stop_keyword_recognition_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_stop_keyword_recognition_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds); + +SPXAPI dialog_service_connector_listen_once(SPXRECOHANDLE h_connector, SPXRESULTHANDLE* p_result); +SPXAPI dialog_service_connector_listen_once_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); +SPXAPI dialog_service_connector_listen_once_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds, SPXRESULTHANDLE* p_result); + +SPXAPI dialog_service_connector_start_continuous_listening(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_start_continuous_listening_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); + +SPXAPI dialog_service_connector_stop_listening(SPXRECOHANDLE h_connector); +SPXAPI dialog_service_connector_stop_listening_async(SPXRECOHANDLE h_connector, SPXASYNCHANDLE* p_async); + +typedef void(*PSESSION_CALLBACK_FUNC)(SPXRECOHANDLE h_connector, SPXEVENTHANDLE h_event, void* pv_context); + +SPXAPI dialog_service_connector_session_started_set_callback(SPXRECOHANDLE h_connector, PSESSION_CALLBACK_FUNC p_callback, void *pv_context); +SPXAPI dialog_service_connector_session_stopped_set_callback(SPXRECOHANDLE h_connector, PSESSION_CALLBACK_FUNC p_callback, void *pv_context); + +SPXAPI dialog_service_connector_speech_start_detected_set_callback(SPXRECOHANDLE h_connector, PSESSION_CALLBACK_FUNC p_callback, void* pv_context); +SPXAPI dialog_service_connector_speech_end_detected_set_callback(SPXRECOHANDLE h_connector, PSESSION_CALLBACK_FUNC p_callback, void* pv_context); + +typedef void(*PRECOGNITION_CALLBACK_FUNC)(SPXRECOHANDLE h_connector, SPXEVENTHANDLE h_event, void* pv_context); + +SPXAPI dialog_service_connector_recognized_set_callback(SPXRECOHANDLE h_connector, PRECOGNITION_CALLBACK_FUNC p_callback, void *pv_context); +SPXAPI dialog_service_connector_recognizing_set_callback(SPXRECOHANDLE h_connector, PRECOGNITION_CALLBACK_FUNC p_callback, void *pv_context); +SPXAPI dialog_service_connector_canceled_set_callback(SPXRECOHANDLE h_connector, PRECOGNITION_CALLBACK_FUNC p_callback, void *pv_context); +SPXAPI dialog_service_connector_activity_received_set_callback(SPXRECOHANDLE h_connector, PRECOGNITION_CALLBACK_FUNC p_callback, void *pv_context); +SPXAPI dialog_service_connector_turn_status_received_set_callback(SPXRECOHANDLE h_connector, PRECOGNITION_CALLBACK_FUNC p_callback, void* pv_context); + +SPXAPI dialog_service_connector_activity_received_event_get_activity_size(SPXEVENTHANDLE h_event, size_t* size); +SPXAPI dialog_service_connector_activity_received_event_get_activity(SPXEVENTHANDLE h_event, char* p_activity, size_t size); +SPXAPI_(bool) dialog_service_connector_activity_received_event_has_audio(SPXEVENTHANDLE h_event); +SPXAPI dialog_service_connector_activity_received_event_get_audio(SPXEVENTHANDLE h_event, SPXAUDIOSTREAMHANDLE* p_audio); + +SPXAPI dialog_service_connector_turn_status_received_get_interaction_id_size(SPXEVENTHANDLE h_event, size_t* size); +SPXAPI dialog_service_connector_turn_status_received_get_interaction_id(SPXEVENTHANDLE h_event, char* p_interaction_id, size_t size); +SPXAPI dialog_service_connector_turn_status_received_get_conversation_id_size(SPXEVENTHANDLE h_event, size_t* size); +SPXAPI dialog_service_connector_turn_status_received_get_conversation_id(SPXEVENTHANDLE h_event, char* p_interaction_id, size_t size); +SPXAPI dialog_service_connector_turn_status_received_get_status(SPXEVENTHANDLE h_event, int* p_status); + +SPXAPI dialog_service_connector_recognized_size(SPXEVENTHANDLE h_event, uint32_t* size); +SPXAPI dialog_service_connector_recognized_get_result(SPXEVENTHANDLE h_event, uint32_t* size); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_embedded_speech_config.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_embedded_speech_config.h new file mode 100644 index 0000000..fb45fc1 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_embedded_speech_config.h @@ -0,0 +1,21 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include + +SPXAPI embedded_speech_config_create(SPXSPEECHCONFIGHANDLE* hconfig); +SPXAPI embedded_speech_config_add_path(SPXSPEECHCONFIGHANDLE hconfig, const char* path); +SPXAPI embedded_speech_config_get_num_speech_reco_models(SPXSPEECHCONFIGHANDLE hconfig, uint32_t* numModels); +SPXAPI embedded_speech_config_get_speech_reco_model(SPXSPEECHCONFIGHANDLE hconfig, uint32_t index, SPXSPEECHRECOMODELHANDLE* hmodel); +SPXAPI embedded_speech_config_get_num_speech_translation_models(SPXSPEECHCONFIGHANDLE hconfig, uint32_t* numModels); +SPXAPI embedded_speech_config_get_speech_translation_model(SPXSPEECHCONFIGHANDLE hconfig, uint32_t index, SPXSPEECHRECOMODELHANDLE* hmodel); +SPXAPI embedded_speech_config_set_speech_recognition_model(SPXSPEECHCONFIGHANDLE hconfig, const char* name, const char* license); +SPXAPI embedded_speech_config_set_speech_synthesis_voice(SPXSPEECHCONFIGHANDLE hconfig, const char* name, const char* license); +SPXAPI embedded_speech_config_set_speech_translation_model(SPXSPEECHCONFIGHANDLE hconfig, const char* name, const char* license); +SPXAPI embedded_speech_config_set_keyword_recognition_model(SPXSPEECHCONFIGHANDLE hconfig, const char* name, const char* license); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_error.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_error.h new file mode 100644 index 0000000..0a581e3 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_error.h @@ -0,0 +1,9 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// + +#pragma once +#include +#include // must include after spxdebug.h or speechapi*.h (can NOT be included before) diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_ext_audiocompression.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_ext_audiocompression.h new file mode 100644 index 0000000..8af2d78 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_ext_audiocompression.h @@ -0,0 +1,105 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include "speechapi_c_common.h" + +const char CODECCREATEEXPORTNAME[] = "codec_create"; +struct codec_c_interface; +typedef struct codec_c_interface* codec_c_interface_P; +typedef codec_c_interface_P SPXCODECCTYPE; + +/*! \cond INTERNAL */ + +/** + * The SPX_CODEC_CLIENT_GET_PROPERTY represents the function reading a property value + * @param id Property id. + * @param buffer caller provided buffer to receive the value of the property + * @param buffersize buffer size. If buffer is passed as null it will return the required buffer size. + * @param codecContext A pointer to caller data provided through the codec_create call. + * @return A return code or zero if successful. + */ +typedef SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE *SPX_CODEC_CLIENT_GET_PROPERTY)(const char* id, char* buffer, uint64_t* buffersize, void* codecContext); + +/** + * The AUDIO_ENCODER_ONENCODEDDATA type represents an application-defined + * status callback function used to provide the encoded data. + * @param pBuffer audio data buffer. + * @param bytesToWrite The length of pBuffer in bytes. + * @param duration_100nanos The duration of the audio sample + * @param pContext A pointer to the application-defined callback context. + */ +typedef void(SPXAPI_CALLTYPE *AUDIO_ENCODER_ONENCODEDDATA)(const uint8_t* pBuffer, size_t bytesToWrite, uint64_t duration_100nanos, void* pContext); + +struct codec_c_interface +{ + /** + * @param codec codec Object returned by the codec_create call to be initialized + * @param inputSamplesPerSecond sample rate for the input audio + * @param inputBitsPerSample bits per sample for the input audio + * @param inputChannels number of channel of the input audio + * @param dataCallback An application defined callback. + * @param pContext A pointer to the application-defined callback context. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE *init)( + SPXCODECCTYPE codec, + uint32_t inputSamplesPerSecond, + uint8_t inputBitsPerSample, + uint8_t inputChannels, + AUDIO_ENCODER_ONENCODEDDATA datacallback, + void* pContext); + + /** + * @param codec codec object returned by the codec_create call. + * @param buffer caller provided buffer to receive the value of the property + * @param buffersize buffer size. If buffer is passed as null it will return the required buffer size. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE* get_format_type)(SPXCODECCTYPE codec, char* buffer, uint64_t* buffersize); + + /** + * Encodes raw PCM data. + * @param codec codec object returned by the codec_create call. + * @param pBuffer The PCM data. + * @param bytesToWrite The length pBuffer. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE *encode) (SPXCODECCTYPE codec, const uint8_t* pBuffer, size_t bytesToWrite); + + /** + * Flushes the encoder. + * @param codec codec object returned by the codec_create call. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE(SPXAPI_CALLTYPE* flush)(SPXCODECCTYPE codec); + + /** + * Terminate the encoded stream immediately + * @param codec codec object returned by the codec_create call. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE *endstream)(SPXCODECCTYPE codec); + + /** + * Destroys the encoder. The codec object should not be used anymore after this call. + * @param codec codec object returned by the codec_create call. + * @return A return code or zero if successful. + */ + SPXAPI_RESULTTYPE (SPXAPI_CALLTYPE *destroy) (SPXCODECCTYPE codec); +}; + +/** +* Creates a codec object. This method needs to be exported from the dll +* @param codecid - codec id, can be null or empty if the library implements only one codec. +* @param codecContext - context to be used to call back to the caller +* @param property_read_func - function to read properties +* @return A codec object +*/ + +SPX_EXTERN_C SPXDLL_EXPORT SPXCODECCTYPE codec_create(const char* codecid, void* codecContext, SPX_CODEC_CLIENT_GET_PROPERTY property_read_func); +typedef SPXCODECCTYPE (*PCODEC_CREATE_FUNC)(const char* codecid, void* codecContext, SPX_CODEC_CLIENT_GET_PROPERTY property_read_func); + +/*! \endcond */ diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_factory.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_factory.h new file mode 100644 index 0000000..43e8b4b --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_factory.h @@ -0,0 +1,29 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI recognizer_create_speech_recognizer_from_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_speech_recognizer_from_auto_detect_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUTODETECTSOURCELANGCONFIGHANDLE hautoDetectSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_speech_recognizer_from_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXSOURCELANGCONFIGHANDLE hSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_translation_recognizer_from_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_translation_recognizer_from_auto_detect_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUTODETECTSOURCELANGCONFIGHANDLE hautoDetectSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_intent_recognizer_from_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_keyword_recognizer_from_audio_config(SPXRECOHANDLE* phreco, SPXAUDIOCONFIGHANDLE haudio); +SPXAPI recognizer_create_source_language_recognizer_from_auto_detect_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUTODETECTSOURCELANGCONFIGHANDLE hautoDetectSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI synthesizer_create_speech_synthesizer_from_config(SPXSYNTHHANDLE* phsynth, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioOuput); +SPXAPI synthesizer_create_speech_synthesizer_from_auto_detect_source_lang_config(SPXSYNTHHANDLE* phsynth, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUTODETECTSOURCELANGCONFIGHANDLE hautoDetectSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioOutput); +SPXAPI dialog_service_connector_create_dialog_service_connector_from_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +//SPXAPI recognizer_create_conversation_transcriber_from_config(SPXRECOHANDLE* phreco, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_conversation_transcriber_from_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_conversation_transcriber_from_auto_detect_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUTODETECTSOURCELANGCONFIGHANDLE hautoDetectSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_create_conversation_transcriber_from_source_lang_config(SPXRECOHANDLE* phreco, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXSOURCELANGCONFIGHANDLE hSourceLangConfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_join_conversation(SPXCONVERSATIONHANDLE hconv, SPXRECOHANDLE hreco); +SPXAPI recognizer_leave_conversation(SPXRECOHANDLE hreco); +SPXAPI recognizer_create_meeting_transcriber_from_config(SPXRECOHANDLE* phreco, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI recognizer_join_meeting(SPXMEETINGHANDLE hmeeting, SPXRECOHANDLE hreco); +SPXAPI recognizer_leave_meeting(SPXRECOHANDLE hreco); +SPXAPI transcriber_get_participants_list(SPXRECOHANDLE hreco, SPXPARTICIPANTHANDLE* participants, int size); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_grammar.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_grammar.h new file mode 100644 index 0000000..2833055 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_grammar.h @@ -0,0 +1,33 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_grammar.h: Public API declarations for Grammar related C methods and typedefs +// + +#pragma once +#include + +typedef enum +{ + // A Recognition Factor will apply to grammars that are referenced as individual words. + PartialPhrase = 1 +} GrammarList_RecognitionFactorScope; + +SPXAPI_(bool) grammar_handle_is_valid(SPXGRAMMARHANDLE hgrammar); +SPXAPI phrase_list_grammar_from_recognizer_by_name(SPXGRAMMARHANDLE* hgrammar, SPXRECOHANDLE hreco, const char* name); +SPXAPI grammar_handle_release(SPXGRAMMARHANDLE hgrammar); + +SPXAPI phrase_list_grammar_add_phrase(SPXGRAMMARHANDLE hgrammar, SPXPHRASEHANDLE hphrase); +SPXAPI phrase_list_grammar_clear(SPXGRAMMARHANDLE hgrammar); + +SPXAPI_(bool) grammar_phrase_handle_is_valid(SPXPHRASEHANDLE hphrase); +SPXAPI grammar_phrase_create_from_text(SPXPHRASEHANDLE* hphrase, const char* phrase); +SPXAPI grammar_phrase_handle_release(SPXPHRASEHANDLE hphrase); + +SPXAPI grammar_create_from_storage_id(SPXGRAMMARHANDLE *hgrammarlist, const char *id); +SPXAPI grammar_list_from_recognizer(SPXGRAMMARHANDLE *hgrammarlist, SPXRECOHANDLE hreco); +SPXAPI grammar_list_add_grammar(SPXGRAMMARHANDLE hgrammarlist, SPXGRAMMARHANDLE hgrammar); +SPXAPI grammar_list_set_recognition_factor(SPXGRAMMARHANDLE hgrammarlist, double factor, GrammarList_RecognitionFactorScope scope); +SPXAPI class_language_model_from_storage_id(SPXGRAMMARHANDLE* hclm, const char *storageid); +SPXAPI class_language_model_assign_class(SPXGRAMMARHANDLE hclm, const char *classname, SPXGRAMMARHANDLE hgrammar); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_hybrid_speech_config.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_hybrid_speech_config.h new file mode 100644 index 0000000..945c3ee --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_hybrid_speech_config.h @@ -0,0 +1,9 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI hybrid_speech_config_create(SPXSPEECHCONFIGHANDLE* hconfig, SPXSPEECHCONFIGHANDLE hcloudSpeechConfig, SPXSPEECHCONFIGHANDLE hembeddedSpeechConfig); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_intent_recognizer.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_intent_recognizer.h new file mode 100644 index 0000000..241cf99 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_intent_recognizer.h @@ -0,0 +1,16 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_intent_recognizer.h: Public API declarations for IntentRecognizer related C methods and typedefs +// + +#pragma once +#include + +SPXAPI intent_recognizer_add_intent(SPXRECOHANDLE hreco, const char* intentId, SPXTRIGGERHANDLE htrigger); +SPXAPI intent_recognizer_add_intent_with_model_id(SPXRECOHANDLE hreco, SPXTRIGGERHANDLE htrigger, const char* modelId); +SPXAPI intent_recognizer_recognize_text_once(SPXRECOHANDLE hreco, const char* text, SPXRESULTHANDLE* hresult); +SPXAPI intent_recognizer_clear_language_models(SPXRECOHANDLE hreco); +SPXAPI intent_recognizer_import_pattern_matching_model(SPXRECOHANDLE hreco, const char* jsonData); +SPXAPI intent_recognizer_add_conversational_language_understanding_model(SPXRECOHANDLE hreco, const char* languageResourceKey, const char* endpoint, const char* projectName, const char* deploymentName); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_intent_result.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_intent_result.h new file mode 100644 index 0000000..491121b --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_intent_result.h @@ -0,0 +1,11 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_intent_result.h: Public API declarations for IntentResult related C methods and enumerations +// + +#pragma once +#include + +SPXAPI intent_result_get_intent_id(SPXRESULTHANDLE hresult, char* pszIntentId, uint32_t cchIntentId); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_intent_trigger.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_intent_trigger.h new file mode 100644 index 0000000..50a49e5 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_intent_trigger.h @@ -0,0 +1,17 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_intent_trigger.h: Public API declarations for IntentTrigger related C methods and typedefs +// + +#pragma once +#include + + +SPXAPI_(bool) intent_trigger_handle_is_valid(SPXTRIGGERHANDLE htrigger); + +SPXAPI intent_trigger_create_from_phrase(SPXTRIGGERHANDLE* htrigger, const char* phrase); +SPXAPI intent_trigger_create_from_language_understanding_model(SPXTRIGGERHANDLE* htrigger, SPXLUMODELHANDLE hlumodel, const char* intentName); + +SPXAPI intent_trigger_handle_release(SPXTRIGGERHANDLE htrigger); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_json.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_json.h new file mode 100644 index 0000000..afcf98a --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_json.h @@ -0,0 +1,37 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI__(const char*) ai_core_string_create(const char* str, size_t size); +SPXAPI_(void) ai_core_string_free(const char* str); + +SPXAPI_(int) ai_core_json_parser_create(SPXHANDLE* parser, const char* json, size_t jsize); // returns item for root +SPXAPI_(bool) ai_core_json_parser_handle_is_valid(SPXHANDLE parser); +SPXAPI ai_core_json_parser_handle_release(SPXHANDLE parser); + +SPXAPI_(int) ai_core_json_builder_create(SPXHANDLE* builder, const char* json, size_t jsize); // returns item for root +SPXAPI_(bool) ai_core_json_builder_handle_is_valid(SPXHANDLE builder); +SPXAPI ai_core_json_builder_handle_release(SPXHANDLE builder); + +SPXAPI_(int) ai_core_json_item_count(SPXHANDLE parserOrBuilder, int item); +SPXAPI_(int) ai_core_json_item_at(SPXHANDLE parserOrBuilder, int item, int index, const char* find); // returns item found +SPXAPI_(int) ai_core_json_item_next(SPXHANDLE parserOrBuilder, int item); // returns next item +SPXAPI_(int) ai_core_json_item_name(SPXHANDLE parserOrBuilder, int item); // returns item representing name of item specified + +SPXAPI_(int) ai_core_json_value_kind(SPXHANDLE parserOrBuilder, int item); +SPXAPI_(bool) ai_core_json_value_as_bool(SPXHANDLE parserOrBuilder, int item, bool defaultValue); +SPXAPI_(double) ai_core_json_value_as_double(SPXHANDLE parserOrBuilder, int item, double defaultValue); +SPXAPI_(int64_t) ai_core_json_value_as_int(SPXHANDLE parserOrBuilder, int item, int64_t defaultValue); +SPXAPI_(uint64_t) ai_core_json_value_as_uint(SPXHANDLE parserOrBuilder, int item, uint64_t defaultValue); + +SPXAPI__(const char*) ai_core_json_value_as_string_ptr(SPXHANDLE parserOrBuilder, int item, size_t* size); + +SPXAPI__(const char*) ai_core_json_value_as_string_copy(SPXHANDLE parserOrBuilder, int item, const char* defaultValue); +SPXAPI__(const char*) ai_core_json_value_as_json_copy(SPXHANDLE parserOrBuilder, int item); + +SPXAPI_(int) ai_core_json_builder_item_add(SPXHANDLE builder, int item, int index, const char* find); +SPXAPI ai_core_json_builder_item_set(SPXHANDLE builder, int item, const char* json, size_t jsize, int kind, const char* str, size_t ssize, bool boolean, int integer, double number); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_keyword_recognition_model.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_keyword_recognition_model.h new file mode 100644 index 0000000..45f1ae3 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_keyword_recognition_model.h @@ -0,0 +1,17 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_keyword_recognition_model.h: Public API declarations for KeywordRecognitionModel related C methods and typedefs +// + +#pragma once +#include + + +SPXAPI_(bool) keyword_recognition_model_handle_is_valid(SPXKEYWORDHANDLE hkeyword); +SPXAPI keyword_recognition_model_handle_release(SPXKEYWORDHANDLE hkeyword); + +SPXAPI keyword_recognition_model_create_from_file(const char* fileName, SPXKEYWORDHANDLE* phkwmodel); +SPXAPI keyword_recognition_model_create_from_config(SPXSPEECHCONFIGHANDLE hconfig, SPXKEYWORDHANDLE* phkwmodel); +SPXAPI keyword_recognition_model_add_user_defined_wake_word(SPXKEYWORDHANDLE hkwmodel, const char* wakeWord); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_language_understanding_model.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_language_understanding_model.h new file mode 100644 index 0000000..c86d5b4 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_language_understanding_model.h @@ -0,0 +1,18 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_language_understanding_model.h: Public API declarations for LanguageUnderstandingModel related C methods and typedefs +// + +#pragma once +#include + +SPXAPI_(bool) language_understanding_model_handle_is_valid(SPXLUMODELHANDLE hlumodel); + +SPXAPI language_understanding_model_create_from_uri(SPXLUMODELHANDLE* hlumodel, const char* uri); +SPXAPI language_understanding_model_create_from_app_id(SPXLUMODELHANDLE* hlumodel, const char* appId); +SPXAPI language_understanding_model_create_from_subscription(SPXLUMODELHANDLE* hlumodel, const char* subscriptionKey, const char* appId, const char* region); + +SPXAPI language_understanding_model__handle_release(SPXLUMODELHANDLE hlumodel); +SPXAPI__(const char *) language_understanding_model_get_model_id(SPXLUMODELHANDLE hlumodel); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_meeting.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_meeting.h new file mode 100644 index 0000000..b39a60f --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_meeting.h @@ -0,0 +1,28 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_meeting.h: Public API declarations for meeting related C methods and typedefs +// + +#pragma once +#include + +SPXAPI meeting_create_from_config(SPXMEETINGHANDLE* phmeeting, SPXSPEECHCONFIGHANDLE hspeechconfig, const char* id); +SPXAPI meeting_update_participant_by_user_id(SPXMEETINGHANDLE hconv, bool add, const char* userId); +SPXAPI meeting_update_participant_by_user(SPXMEETINGHANDLE hconv, bool add, SPXUSERHANDLE huser); +SPXAPI meeting_update_participant(SPXMEETINGHANDLE hconv, bool add, SPXPARTICIPANTHANDLE hparticipant); +SPXAPI meeting_get_meeting_id(SPXMEETINGHANDLE hconv, char* id, size_t size); +SPXAPI meeting_end_meeting(SPXMEETINGHANDLE hconv); +SPXAPI meeting_get_property_bag(SPXMEETINGHANDLE hconv, SPXPROPERTYBAGHANDLE* phpropbag); +SPXAPI meeting_release_handle(SPXHANDLE handle); + +SPXAPI meeting_start_meeting(SPXMEETINGHANDLE hconv); +SPXAPI meeting_delete_meeting(SPXMEETINGHANDLE hconv); +SPXAPI meeting_lock_meeting(SPXMEETINGHANDLE hconv); +SPXAPI meeting_unlock_meeting(SPXMEETINGHANDLE hconv); +SPXAPI meeting_mute_all_participants(SPXMEETINGHANDLE hconv); +SPXAPI meeting_unmute_all_participants(SPXMEETINGHANDLE hconv); +SPXAPI meeting_mute_participant(SPXMEETINGHANDLE hconv, const char * participantId); +SPXAPI meeting_unmute_participant(SPXMEETINGHANDLE hconv, const char * participantId); + diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_meeting_transcription_result.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_meeting_transcription_result.h new file mode 100644 index 0000000..bcfdd35 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_meeting_transcription_result.h @@ -0,0 +1,12 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_meeting_transcriber_result.h: Public API declarations for MeetingTranscriberResult related C methods and enumerations +// + +#pragma once +#include + +SPXAPI meeting_transcription_result_get_user_id(SPXRESULTHANDLE hresult, char* pszUserId, uint32_t cchUserId); +SPXAPI meeting_transcription_result_get_utterance_id(SPXRESULTHANDLE hresult, char* pszUtteranceId, uint32_t cchUtteranceId); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_operations.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_operations.h new file mode 100644 index 0000000..ed556d7 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_operations.h @@ -0,0 +1,12 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_operations.h: Public API declaration for common operation methods in the C API layer. +// + +#pragma once +#include + +SPXAPI speechapi_async_handle_release(SPXASYNCHANDLE h_async); +SPXAPI speechapi_async_wait_for(SPXASYNCHANDLE h_async, uint32_t milliseconds); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_participant.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_participant.h new file mode 100644 index 0000000..1ddb4c8 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_participant.h @@ -0,0 +1,15 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_participant.h: Public API declarations for conversation transcriber participant related C methods and enumerations +// + +#pragma once +#include + +SPXAPI participant_create_handle(SPXPARTICIPANTHANDLE* hparticipant, const char* userId, const char* preferred_language, const char* voice_signature); +SPXAPI participant_release_handle(SPXPARTICIPANTHANDLE hparticipant); +SPXAPI participant_set_preferred_langugage(SPXPARTICIPANTHANDLE hparticipant, const char* preferred_language); +SPXAPI participant_set_voice_signature(SPXPARTICIPANTHANDLE hparticipant, const char* voice_signature); +SPXAPI participant_get_property_bag(SPXPARTICIPANTHANDLE hparticipant, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_pattern_matching_model.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_pattern_matching_model.h new file mode 100644 index 0000000..264f063 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_pattern_matching_model.h @@ -0,0 +1,33 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_pattern_matching_model.h: Public API declarations for PatternMatchingModel related C methods and typedefs +// + +#pragma once +#include + +SPXAPI_(bool) pattern_matching_model_handle_is_valid(SPXLUMODELHANDLE hlumodel); + +SPXAPI pattern_matching_model_create(SPXLUMODELHANDLE* hlumodel, SPXRECOHANDLE hIntentReco, const char* id); +SPXAPI pattern_matching_model_create_from_id(SPXLUMODELHANDLE* hlumodel, const char* id); + +typedef SPXAPI_RESULTTYPE(SPXAPI_CALLTYPE* PATTERN_MATCHING_MODEL_GET_STR_FROM_INDEX)(void* context, size_t index, const char** str, size_t* size); + +SPXAPI pattern_matching_model_add_entity( + SPXLUMODELHANDLE hlumodel, + const char* id, + int32_t type, + int32_t mode, + size_t numPhrases, + void* phraseContext, + PATTERN_MATCHING_MODEL_GET_STR_FROM_INDEX phraseGetter); + +SPXAPI pattern_matching_model_add_intent( + SPXLUMODELHANDLE hlumodel, + const char* id, + uint32_t priority, + size_t numPhrases, + void* phraseContext, + PATTERN_MATCHING_MODEL_GET_STR_FROM_INDEX phraseGetter); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_pronunciation_assessment_config.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_pronunciation_assessment_config.h new file mode 100644 index 0000000..1d5df85 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_pronunciation_assessment_config.h @@ -0,0 +1,33 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +typedef enum +{ + PronunciationAssessmentGradingSystem_FivePoint = 1, + PronunciationAssessmentGradingSystem_HundredMark = 2 +} Pronunciation_Assessment_Grading_System; + +typedef enum +{ + PronunciationAssessmentGranularity_Phoneme = 1, + PronunciationAssessmentGranularity_Word = 2, + PronunciationAssessmentGranularity_FullText = 3 +} Pronunciation_Assessment_Granularity; + +SPXAPI create_pronunciation_assessment_config(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE* hPronunciationAssessmentConfig, + const char* referenceText, + Pronunciation_Assessment_Grading_System gradingSystem, + Pronunciation_Assessment_Granularity granularity, + bool enableMiscue); +SPXAPI create_pronunciation_assessment_config_from_json(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE* hPronunciationAssessmentConfig, const char* json); +SPXAPI_(bool) pronunciation_assessment_config_is_handle_valid(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hPronunciationAssessmentConfig); +SPXAPI pronunciation_assessment_config_release(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hPronunciationAssessmentConfig); +SPXAPI pronunciation_assessment_config_get_property_bag( + SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hPronunciationAssessmentConfig, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI__(const char*) pronunciation_assessment_config_to_json(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hPronunciationAssessmentConfig); +SPXAPI pronunciation_assessment_config_apply_to_recognizer(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hPronunciationAssessmentConfig, SPXRECOHANDLE hreco); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_property_bag.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_property_bag.h new file mode 100644 index 0000000..5ab9f18 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_property_bag.h @@ -0,0 +1,162 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_property_bag.h: Public API declarations for Property Bag related C methods +// + +#pragma once +#include + +SPXAPI property_bag_create(SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI_(bool) property_bag_is_valid(SPXPROPERTYBAGHANDLE hpropbag); +SPXAPI property_bag_set_string(SPXPROPERTYBAGHANDLE hpropbag, int id, const char* name, const char* value); +SPXAPI__(const char*) property_bag_get_string(SPXPROPERTYBAGHANDLE hpropbag, int id, const char* name, const char* defaultValue); +SPXAPI property_bag_free_string(const char* value); +SPXAPI property_bag_release(SPXPROPERTYBAGHANDLE hpropbag); +SPXAPI property_bag_copy(SPXPROPERTYBAGHANDLE hfrom, SPXPROPERTYBAGHANDLE hto); + +// NOTE: Currently this enum is duplicated with C++ side, +// because SWIG cannot properly resolve conditional compilation. +#ifndef __cplusplus +enum PropertyId +{ + SpeechServiceConnection_Key = 1000, + SpeechServiceConnection_Endpoint = 1001, + SpeechServiceConnection_Region = 1002, + SpeechServiceAuthorization_Token = 1003, + SpeechServiceAuthorization_Type = 1004, + SpeechServiceConnection_EndpointId = 1005, + SpeechServiceConnection_Host = 1006, + + SpeechServiceConnection_ProxyHostName = 1100, + SpeechServiceConnection_ProxyPort = 1101, + SpeechServiceConnection_ProxyUserName = 1102, + SpeechServiceConnection_ProxyPassword = 1103, + SpeechServiceConnection_Url = 1104, + SpeechServiceConnection_ProxyHostBypass = 1105, + + SpeechServiceConnection_TranslationToLanguages = 2000, + SpeechServiceConnection_TranslationVoice = 2001, + SpeechServiceConnection_TranslationFeatures = 2002, + SpeechServiceConnection_IntentRegion = 2003, + + SpeechServiceConnection_RecoMode = 3000, + SpeechServiceConnection_RecoLanguage = 3001, + Speech_SessionId = 3002, + SpeechServiceConnection_UserDefinedQueryParameters = 3003, + SpeechServiceConnection_RecoModelBackend = 3004, + SpeechServiceConnection_RecoModelName = 3005, + SpeechServiceConnection_RecoModelKey = 3006, + SpeechServiceConnection_RecoModelIniFile = 3007, + + SpeechServiceConnection_SynthLanguage = 3100, + SpeechServiceConnection_SynthVoice = 3101, + SpeechServiceConnection_SynthOutputFormat = 3102, + SpeechServiceConnection_SynthEnableCompressedAudioTransmission = 3103, + SpeechServiceConnection_SynthBackend = 3110, + SpeechServiceConnection_SynthOfflineDataPath = 3112, + SpeechServiceConnection_SynthOfflineVoice = 3113, + SpeechServiceConnection_SynthModelKey = 3114, + SpeechServiceConnection_VoicesListEndpoint = 3130, + + SpeechServiceConnection_InitialSilenceTimeoutMs = 3200, + SpeechServiceConnection_EndSilenceTimeoutMs = 3201, + SpeechServiceConnection_EnableAudioLogging = 3202, + SpeechServiceConnection_LanguageIdMode = 3205, + SpeechServiceConnection_TranslationCategoryId = 3206, + + SpeechServiceConnection_AutoDetectSourceLanguages = 3300, + SpeechServiceConnection_AutoDetectSourceLanguageResult = 3301, + + SpeechServiceResponse_RequestDetailedResultTrueFalse = 4000, + SpeechServiceResponse_RequestProfanityFilterTrueFalse = 4001, + SpeechServiceResponse_ProfanityOption = 4002, + SpeechServiceResponse_PostProcessingOption = 4003, + SpeechServiceResponse_RequestWordLevelTimestamps = 4004, + SpeechServiceResponse_StablePartialResultThreshold = 4005, + SpeechServiceResponse_OutputFormatOption = 4006, + SpeechServiceResponse_RequestSnr = 4007, + + SpeechServiceResponse_TranslationRequestStablePartialResult = 4100, + + SpeechServiceResponse_RequestWordBoundary = 4200, + SpeechServiceResponse_RequestPunctuationBoundary = 4201, + SpeechServiceResponse_RequestSentenceBoundary = 4202, + SpeechServiceResponse_SynthesisEventsSyncToAudio = 4210, + + SpeechServiceResponse_JsonResult = 5000, + SpeechServiceResponse_JsonErrorDetails = 5001, + SpeechServiceResponse_RecognitionLatencyMs = 5002, + SpeechServiceResponse_RecognitionBackend = 5003, + + SpeechServiceResponse_SynthesisFirstByteLatencyMs = 5010, + SpeechServiceResponse_SynthesisFinishLatencyMs = 5011, + SpeechServiceResponse_SynthesisUnderrunTimeMs = 5012, + SpeechServiceResponse_SynthesisConnectionLatencyMs = 5013, + SpeechServiceResponse_SynthesisNetworkLatencyMs = 5014, + SpeechServiceResponse_SynthesisServiceLatencyMs = 5015, + SpeechServiceResponse_DiarizeIntermediateResults = 5025, + + CancellationDetails_Reason = 6000, + CancellationDetails_ReasonText = 6001, + CancellationDetails_ReasonDetailedText = 6002, + + LanguageUnderstandingServiceResponse_JsonResult = 7000, + + AudioConfig_DeviceNameForCapture = 8000, + AudioConfig_NumberOfChannelsForCapture = 8001, + AudioConfig_SampleRateForCapture = 8002, + AudioConfig_BitsPerSampleForCapture = 8003, + AudioConfig_AudioSource = 8004, + AudioConfig_DeviceNameForRender = 8005, + AudioConfig_PlaybackBufferLengthInMs = 8006, + + Speech_LogFilename = 9001, + Speech_SegmentationSilenceTimeoutMs = 9002, + Speech_SegmentationMaximumTimeMs = 9003, + Speech_SegmentationStrategy = 9004, + + Conversation_ApplicationId = 10000, + Conversation_DialogType = 10001, + Conversation_Initial_Silence_Timeout = 10002, + Conversation_From_Id = 10003, + Conversation_Conversation_Id = 10004, + Conversation_Custom_Voice_Deployment_Ids = 10005, + Conversation_Speech_Activity_Template = 10006, + Conversation_ParticipantId = 10007, + DataBuffer_TimeStamp = 11001, + DataBuffer_UserId = 11002, + + PronunciationAssessment_ReferenceText = 12001, + PronunciationAssessment_GradingSystem = 12002, + PronunciationAssessment_Granularity = 12003, + PronunciationAssessment_EnableMiscue = 12005, + PronunciationAssessment_PhonemeAlphabet = 12006, + PronunciationAssessment_NBestPhonemeCount = 12007, + PronunciationAssessment_EnableProsodyAssessment = 12008, + PronunciationAssessment_Json = 12009, + PronunciationAssessment_Params = 12010, + PronunciationAssessment_ContentTopic = 12020, + SpeakerRecognition_Api_Version = 13001, + + SpeechTranslation_ModelName = 13100, + SpeechTranslation_ModelKey = 13101, + + KeywordRecognition_ModelName = 13200, + KeywordRecognition_ModelKey = 13201, + + EmbeddedSpeech_EnablePerformanceMetrics = 13300, + + SpeechSynthesis_FrameTimeoutInterval = 14101, + SpeechSynthesis_RtfTimeoutThreshold = 14102 +}; + +typedef enum _ParticipantChangedReason +{ + JoinedConversation, + LeftConversation, + Updated +} ParticipantChangedReason; +#endif + diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_recognizer.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_recognizer.h new file mode 100644 index 0000000..c7eecbb --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_recognizer.h @@ -0,0 +1,67 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_recognizer.h: Public API declarations for Recognizer related C methods and typedefs +// + +#pragma once +#include + + +SPXAPI_(bool) recognizer_handle_is_valid(SPXRECOHANDLE hreco); +SPXAPI recognizer_handle_release(SPXRECOHANDLE hreco); + +SPXAPI_(bool) recognizer_async_handle_is_valid(SPXASYNCHANDLE hasync); +SPXAPI recognizer_async_handle_release(SPXASYNCHANDLE hasync); + +SPXAPI_(bool) recognizer_result_handle_is_valid(SPXRESULTHANDLE hresult); +SPXAPI recognizer_result_handle_release(SPXRESULTHANDLE hresult); + +SPXAPI_(bool) recognizer_event_handle_is_valid(SPXEVENTHANDLE hevent); +SPXAPI recognizer_event_handle_release(SPXEVENTHANDLE hevent); + +SPXAPI recognizer_get_property_bag(SPXRECOHANDLE hreco, SPXPROPERTYBAGHANDLE* hpropbag); + +SPXAPI recognizer_recognize_once(SPXRECOHANDLE hreco, SPXRESULTHANDLE* phresult); +SPXAPI recognizer_recognize_once_async(SPXRECOHANDLE hreco, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_recognize_text_once_async(SPXRECOHANDLE hreco, const char* text, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_recognize_once_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds, SPXRESULTHANDLE* phresult); + +SPXAPI recognizer_start_continuous_recognition(SPXRECOHANDLE hreco); +SPXAPI recognizer_start_continuous_recognition_async(SPXRECOHANDLE hreco, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_start_continuous_recognition_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +SPXAPI recognizer_stop_continuous_recognition(SPXRECOHANDLE hreco); +SPXAPI recognizer_stop_continuous_recognition_async(SPXRECOHANDLE hreco, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_stop_continuous_recognition_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +SPXAPI recognizer_start_keyword_recognition(SPXRECOHANDLE hreco, SPXKEYWORDHANDLE hkeyword); +SPXAPI recognizer_start_keyword_recognition_async(SPXRECOHANDLE hreco, SPXKEYWORDHANDLE hkeyword, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_start_keyword_recognition_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +SPXAPI recognizer_recognize_keyword_once(SPXRECOHANDLE hreco, SPXKEYWORDHANDLE hkeyword, SPXRESULTHANDLE* phresult); +SPXAPI recognizer_recognize_keyword_once_async(SPXRECOHANDLE hreco, SPXKEYWORDHANDLE hkeyword, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_recognize_keyword_once_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds, SPXRESULTHANDLE* phresult); + +SPXAPI recognizer_stop_keyword_recognition(SPXRECOHANDLE hreco); +SPXAPI recognizer_stop_keyword_recognition_async(SPXRECOHANDLE hreco, SPXASYNCHANDLE* phasync); +SPXAPI recognizer_stop_keyword_recognition_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +typedef void (*PSESSION_CALLBACK_FUNC)(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext); +SPXAPI recognizer_session_started_set_callback(SPXRECOHANDLE hreco, PSESSION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_session_stopped_set_callback(SPXRECOHANDLE hreco, PSESSION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_token_requested_set_callback(SPXRECOHANDLE hreco, PSESSION_CALLBACK_FUNC pCallback, void* pvContext); + +typedef void (*PRECOGNITION_CALLBACK_FUNC)(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext); +SPXAPI recognizer_recognizing_set_callback(SPXRECOHANDLE hreco, PRECOGNITION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_recognized_set_callback(SPXRECOHANDLE hreco, PRECOGNITION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_canceled_set_callback(SPXRECOHANDLE hreco, PRECOGNITION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_speech_start_detected_set_callback(SPXRECOHANDLE hreco, PRECOGNITION_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI recognizer_speech_end_detected_set_callback(SPXRECOHANDLE hreco, PRECOGNITION_CALLBACK_FUNC pCallback, void* pvContext); + +SPXAPI recognizer_session_event_get_session_id(SPXEVENTHANDLE hevent, char* pszSessionId, uint32_t cchSessionId); +SPXAPI recognizer_recognition_event_get_offset(SPXEVENTHANDLE hevent, uint64_t *pszOffset); +SPXAPI recognizer_recognition_event_get_result(SPXEVENTHANDLE hevent, SPXRESULTHANDLE* phresult); + +SPXAPI recognizer_connection_event_get_property_bag(SPXEVENTHANDLE hevent, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_result.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_result.h new file mode 100644 index 0000000..48874a7 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_result.h @@ -0,0 +1,109 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_result.h: Public API declarations for Result related C methods and enumerations +// + +#pragma once +#include + +enum Result_Reason +{ + ResultReason_NoMatch = 0, + ResultReason_Canceled = 1, + ResultReason_RecognizingSpeech = 2, + ResultReason_RecognizedSpeech = 3, + ResultReason_RecognizingIntent = 4, + ResultReason_RecognizedIntent = 5, + ResultReason_TranslatingSpeech = 6, + ResultReason_TranslatedSpeech = 7, + ResultReason_SynthesizingAudio = 8, + ResultReason_SynthesizingAudioComplete = 9, + ResultReason_RecognizingKeyword = 10, + ResultReason_RecognizedKeyword = 11, + ResultReason_SynthesizingAudioStart = 12 +}; +typedef enum Result_Reason Result_Reason; + +enum Result_CancellationReason +{ + CancellationReason_Error = 1, + CancellationReason_EndOfStream = 2, + CancellationReason_UserCancelled = 3, +}; + +typedef enum Result_CancellationReason Result_CancellationReason; + +enum Result_CancellationErrorCode +{ + CancellationErrorCode_NoError = 0, + CancellationErrorCode_AuthenticationFailure = 1, + CancellationErrorCode_BadRequest = 2, + CancellationErrorCode_TooManyRequests = 3, + CancellationErrorCode_Forbidden = 4, + CancellationErrorCode_ConnectionFailure = 5, + CancellationErrorCode_ServiceTimeout = 6, + CancellationErrorCode_ServiceError = 7, + CancellationErrorCode_ServiceUnavailable = 8, + CancellationErrorCode_RuntimeError = 9 +}; +typedef enum Result_CancellationErrorCode Result_CancellationErrorCode; + +enum Result_NoMatchReason +{ + NoMatchReason_NotRecognized = 1, + NoMatchReason_InitialSilenceTimeout = 2, + NoMatchReason_InitialBabbleTimeout = 3, + NoMatchReason_KeywordNotRecognized = 4, + NoMatchReason_EndSilenceTimeout = 5 +}; +typedef enum Result_NoMatchReason Result_NoMatchReason; + +enum Synthesis_VoiceType +{ + SynthesisVoiceType_OnlineNeural = 1, + SynthesisVoiceType_OnlineStandard = 2, + SynthesisVoiceType_OfflineNeural = 3, + SynthesisVoiceType_OfflineStandard = 4 +}; +typedef enum Synthesis_VoiceType Synthesis_VoiceType; + +SPXAPI result_get_reason(SPXRESULTHANDLE hresult, Result_Reason* reason); +SPXAPI result_get_reason_canceled(SPXRESULTHANDLE hresult, Result_CancellationReason* reason); +SPXAPI result_get_canceled_error_code(SPXRESULTHANDLE hresult, Result_CancellationErrorCode* errorCode); +SPXAPI result_get_no_match_reason(SPXRESULTHANDLE hresult, Result_NoMatchReason* reason); + +SPXAPI result_get_result_id(SPXRESULTHANDLE hresult, char* pszResultId, uint32_t cchResultId); + +SPXAPI result_get_text(SPXRESULTHANDLE hresult, char* pszText, uint32_t cchText); +SPXAPI result_get_offset(SPXRESULTHANDLE hresult, uint64_t* offset); +SPXAPI result_get_duration(SPXRESULTHANDLE hresult, uint64_t* duration); +SPXAPI result_get_channel(SPXRESULTHANDLE hresult, uint32_t* channel); + +SPXAPI result_get_property_bag(SPXRESULTHANDLE hresult, SPXPROPERTYBAGHANDLE* hpropbag); + +SPXAPI synth_result_get_result_id(SPXRESULTHANDLE hresult, char* resultId, uint32_t resultIdLength); +SPXAPI synth_result_get_reason(SPXRESULTHANDLE hresult, Result_Reason* reason); +SPXAPI synth_result_get_reason_canceled(SPXRESULTHANDLE hresult, Result_CancellationReason* reason); +SPXAPI synth_result_get_canceled_error_code(SPXRESULTHANDLE hresult, Result_CancellationErrorCode* errorCode); +SPXAPI synth_result_get_audio_data(SPXRESULTHANDLE hresult, uint8_t* buffer, uint32_t bufferSize, uint32_t* filledSize); +SPXAPI synth_result_get_audio_length_duration(SPXRESULTHANDLE hresult, uint32_t* audioLength, uint64_t* audioDuration); +SPXAPI synth_result_get_audio_format(SPXRESULTHANDLE hresult, SPXAUDIOSTREAMFORMATHANDLE* hformat); +SPXAPI synth_result_get_property_bag(SPXRESULTHANDLE hresult, SPXPROPERTYBAGHANDLE* hpropbag); + +SPXAPI synthesis_voices_result_get_result_id(SPXRESULTHANDLE hresult, char* resultId, uint32_t resultIdLength); +SPXAPI synthesis_voices_result_get_reason(SPXRESULTHANDLE hresult, Result_Reason* reason); +SPXAPI synthesis_voices_result_get_voice_num(SPXRESULTHANDLE hresult, uint32_t* voiceNum); +SPXAPI synthesis_voices_result_get_voice_info(SPXRESULTHANDLE hresult, uint32_t index, SPXRESULTHANDLE* hVoiceInfo); +SPXAPI synthesis_voices_result_get_property_bag(SPXRESULTHANDLE hresult, SPXPROPERTYBAGHANDLE* hpropbag); + +SPXAPI voice_info_handle_release(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_name(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_locale(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_short_name(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_local_name(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_style_list(SPXRESULTHANDLE hVoiceInfo); +SPXAPI__(const char*) voice_info_get_voice_path(SPXRESULTHANDLE hVoiceInfo); +SPXAPI voice_info_get_voice_type(SPXRESULTHANDLE hVoiceInfo, Synthesis_VoiceType* voiceType); +SPXAPI voice_info_get_property_bag(SPXRESULTHANDLE hVoiceInfo, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_session.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_session.h new file mode 100644 index 0000000..c0a8186 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_session.h @@ -0,0 +1,16 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_session.h: Public API declarations for Session related C methods +// + +#pragma once +#include + +SPXAPI session_from_recognizer(SPXRECOHANDLE hreco, SPXSESSIONHANDLE* phsession); + +SPXAPI_(bool) session_handle_is_valid(SPXSESSIONHANDLE hsession); +SPXAPI session_handle_release(SPXSESSIONHANDLE hsession); + +SPXAPI session_get_property_bag(SPXSESSIONHANDLE hsession, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_source_lang_config.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_source_lang_config.h new file mode 100644 index 0000000..53da418 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_source_lang_config.h @@ -0,0 +1,13 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI source_lang_config_from_language(SPXSOURCELANGCONFIGHANDLE* hconfig, const char* language); +SPXAPI source_lang_config_from_language_and_endpointId(SPXSOURCELANGCONFIGHANDLE* hconfig, const char* language, const char* endpointId); +SPXAPI_(bool) source_lang_config_is_handle_valid(SPXSOURCELANGCONFIGHANDLE hconfig); +SPXAPI source_lang_config_release(SPXSOURCELANGCONFIGHANDLE hconfig); +SPXAPI source_lang_config_get_property_bag(SPXSOURCELANGCONFIGHANDLE hconfig, SPXPROPERTYBAGHANDLE* hpropbag); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_speaker_recognition.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_speaker_recognition.h new file mode 100644 index 0000000..84d4b74 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_speaker_recognition.h @@ -0,0 +1,37 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_speaker_recogntion.h: c API declarations for speaker recognition. +// + +#pragma once + +#include + +SPXAPI create_voice_profile_client_from_config(SPXVOICEPROFILECLIENTHANDLE* phclient, SPXSPEECHCONFIGHANDLE hSpeechConfig); +SPXAPI voice_profile_client_release_handle(SPXVOICEPROFILECLIENTHANDLE hVoiceClient); +SPXAPI create_voice_profile(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, int id, const char* locale, SPXVOICEPROFILEHANDLE* pProfileHandle); + +SPXAPI enroll_voice_profile(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, SPXVOICEPROFILEHANDLE hProfileHandle, SPXAUDIOCONFIGHANDLE hAudioInput, SPXRESULTHANDLE* phresult); +SPXAPI voice_profile_client_get_property_bag(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI create_voice_profile_from_id_and_type(SPXVOICEPROFILEHANDLE* phVoiceProfile, const char* id, int type); +SPXAPI voice_profile_get_id(SPXVOICEPROFILEHANDLE hVoiceProfile, char* psz, uint32_t* pcch); +SPXAPI voice_profile_get_type(SPXVOICEPROFILEHANDLE hVoiceProfile, int* ptype); +SPXAPI voice_profile_release_handle(SPXVOICEPROFILEHANDLE hVoiceProfile); +SPXAPI voice_profile_get_property_bag(SPXVOICEPROFILEHANDLE voiceprofilehandle, SPXPROPERTYBAGHANDLE* pProperties); +SPXAPI delete_voice_profile(SPXVOICEPROFILECLIENTHANDLE hclient, SPXVOICEPROFILEHANDLE hProfileHandle, SPXRESULTHANDLE* phresult); +SPXAPI reset_voice_profile(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, SPXVOICEPROFILEHANDLE hProfileHandle, SPXRESULTHANDLE* phresult); +SPXAPI get_profiles_json(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, int type, char** ppsz, size_t* pcch); +SPXAPI retrieve_enrollment_result(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, const char* pId, int type, SPXVOICEPROFILEHANDLE* phVoiceProfile); +SPXAPI get_activation_phrases(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient, const char* pLocale, int type, SPXRESULTHANDLE* phresult); +SPXAPI recognizer_create_speaker_recognizer_from_config(SPXSPEAKERIDHANDLE* phspeakerid, SPXSPEECHCONFIGHANDLE hspeechconfig, SPXAUDIOCONFIGHANDLE haudioInput); +SPXAPI speaker_recognizer_release_handle(SPXSPEAKERIDHANDLE phspeakerid); +SPXAPI speaker_recognizer_get_property_bag(SPXSPEAKERIDHANDLE phspeakerid, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI speaker_identification_model_create(SPXSIMODELHANDLE* psimodel); +SPXAPI speaker_identification_model_add_profile(SPXSIMODELHANDLE hsimodel, SPXVOICEPROFILEHANDLE hprofile); +SPXAPI speaker_identification_model_release_handle(SPXSIMODELHANDLE hmodel); +SPXAPI speaker_recognizer_identify(SPXSPEAKERIDHANDLE phspeakerid, SPXSIMODELHANDLE hsimodel, SPXRESULTHANDLE* phresult); +SPXAPI speaker_recognizer_verify(SPXSPEAKERIDHANDLE phspeakerid, SPXSVMODELHANDLE hsvmodel, SPXRESULTHANDLE* phresult); +SPXAPI speaker_verification_model_create(SPXSVMODELHANDLE* psvmodel, SPXVOICEPROFILEHANDLE hprofile); +SPXAPI speaker_verification_model_release_handle(SPXSVMODELHANDLE hsvmodel); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_config.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_config.h new file mode 100644 index 0000000..ef2c521 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_config.h @@ -0,0 +1,171 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +typedef enum { SpeechOutputFormat_Simple = 0, SpeechOutputFormat_Detailed = 1 } SpeechOutputFormat; + +typedef enum +{ + // raw-8khz-8bit-mono-mulaw + SpeechSynthesisOutputFormat_Raw8Khz8BitMonoMULaw = 1, + + // riff-16khz-16kbps-mono-siren + // Unsupported by the service. Do not use this value. + SpeechSynthesisOutputFormat_Riff16Khz16KbpsMonoSiren = 2, + + // audio-16khz-16kbps-mono-siren + // Unsupported by the service. Do not use this value. + SpeechSynthesisOutputFormat_Audio16Khz16KbpsMonoSiren = 3, + + // audio-16khz-32kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio16Khz32KBitRateMonoMp3 = 4, + + // audio-16khz-128kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio16Khz128KBitRateMonoMp3 = 5, + + // audio-16khz-64kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio16Khz64KBitRateMonoMp3 = 6, + + // audio-24khz-48kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio24Khz48KBitRateMonoMp3 = 7, + + // audio-24khz-96kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio24Khz96KBitRateMonoMp3 = 8, + + // audio-24khz-160kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio24Khz160KBitRateMonoMp3 = 9, + + // raw-16khz-16bit-mono-truesilk + SpeechSynthesisOutputFormat_Raw16Khz16BitMonoTrueSilk = 10, + + // riff-16khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Riff16Khz16BitMonoPcm = 11, + + // riff-8khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Riff8Khz16BitMonoPcm = 12, + + // riff-24khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Riff24Khz16BitMonoPcm = 13, + + // riff-8khz-8bit-mono-mulaw + SpeechSynthesisOutputFormat_Riff8Khz8BitMonoMULaw = 14, + + // raw-16khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Raw16Khz16BitMonoPcm = 15, + + // raw-24khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Raw24Khz16BitMonoPcm = 16, + + // raw-8khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Raw8Khz16BitMonoPcm = 17, + + // ogg-16khz-16bit-mono-opus + SpeechSynthesisOutputFormat_Ogg16khz16BitMonoOpus = 18, + + // ogg-24khz-24bit-mono-opus + SpeechSynthesisOutputFormat_Ogg24Khz16BitMonoOpus = 19, + + // raw-48khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Raw48Khz16BitMonoPcm = 20, + + // riff-48khz-16bit-mono-pcm + SpeechSynthesisOutputFormat_Riff48Khz16BitMonoPcm = 21, + + // audio-48khz-96kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio48Khz96KBitRateMonoMp3 = 22, + + // audio-48khz-192kbitrate-mono-mp3 + SpeechSynthesisOutputFormat_Audio48Khz192KBitRateMonoMp3 = 23, + + // ogg-48khz-16bit-mono-opus + SpeechSynthesisOutputFormat_Ogg48Khz16BitMonoOpus = 24, + + // webm-16khz-16bit-mono-opus + SpeechSynthesisOutputFormat_Webm16Khz16BitMonoOpus = 25, + + // webm-24khz-16bit-mono-opus + SpeechSynthesisOutputFormat_Webm24Khz16BitMonoOpus = 26, + + // raw-24khz-16bit-mono-truesilk + SpeechSynthesisOutputFormat_Raw24Khz16BitMonoTrueSilk = 27, + + // raw-8khz-8bit-mono-alaw + SpeechSynthesisOutputFormat_Raw8Khz8BitMonoALaw = 28, + + // riff-8khz-8bit-mono-alaw + SpeechSynthesisOutputFormat_Riff8Khz8BitMonoALaw = 29, + + // webm-24khz-16bit-24kbps-mono-opus + // Audio compressed by OPUS codec in a WebM container, with bitrate of 24kbps, optimized for IoT scenario. + SpeechSynthesisOutputFormat_Webm24Khz16Bit24KbpsMonoOpus = 30, + + // audio-16khz-16bit-32kbps-mono-opus + // Audio compressed by OPUS codec without container, with bitrate of 32kbps. + SpeechSynthesisOutputFormat_Audio16Khz16Bit32KbpsMonoOpus = 31, + + // audio-24khz-48bit-mono-opus + // Audio compressed by OPUS codec without container, with bitrate of 48kbps. + SpeechSynthesisOutputFormat_Audio24Khz16Bit48KbpsMonoOpus = 32, + + // audio-24khz-24bit-mono-opus + // Audio compressed by OPUS codec without container, with bitrate of 24kbps. + SpeechSynthesisOutputFormat_Audio24Khz16Bit24KbpsMonoOpus = 33, + + // raw-22050hz-16bit-mono-pcm + // Raw PCM audio at 22050Hz sampling rate and 16-bit depth. + SpeechSynthesisOutputFormat_Raw22050Hz16BitMonoPcm = 34, + + // riff-22050hz-16bit-mono-pcm + // PCM audio at 22050Hz sampling rate and 16-bit depth, with RIFF header. + SpeechSynthesisOutputFormat_Riff22050Hz16BitMonoPcm = 35, + + // raw-44100hz-16bit-mono-pcm + // Raw PCM audio at 44100Hz sampling rate and 16-bit depth. + SpeechSynthesisOutputFormat_Raw44100Hz16BitMonoPcm = 36, + + // riff-44100hz-16bit-mono-pcm + // PCM audio at 44100Hz sampling rate and 16-bit depth, with RIFF header. + SpeechSynthesisOutputFormat_Riff44100Hz16BitMonoPcm = 37, + + /// amr-wb-16000hz + /// AMR-WB audio at 16kHz sampling rate. + /// (Added in 1.24.0) + SpeechSynthesisOutputFormat_AmrWb16000Hz = 38, + + /// g722-16khz-64kbps + /// G.722 audio at 16kHz sampling rate and 64kbps bitrate. + /// (Added in 1.38.0) + SpeechSynthesisOutputFormat_G72216Khz64Kbps = 39, +} Speech_Synthesis_Output_Format; + +typedef enum +{ + // Using URI query parameter to pass property settings to service. + SpeechConfig_ServicePropertyChannel_UriQueryParameter = 0, + + // Using HttpHeader to set a key/value in a HTTP header. + SpeechConfig_ServicePropertyChannel_HttpHeader = 1 +} SpeechConfig_ServicePropertyChannel; + +typedef enum +{ + SpeechConfig_ProfanityMasked = 0, + SpeechConfig_ProfanityRemoved = 1, + SpeechConfig_ProfanityRaw = 2 +} SpeechConfig_ProfanityOption; + +SPXAPI_(bool) speech_config_is_handle_valid(SPXSPEECHCONFIGHANDLE hconfig); +SPXAPI speech_config_from_subscription(SPXSPEECHCONFIGHANDLE* hconfig, const char* subscription, const char* region); +SPXAPI speech_config_from_authorization_token(SPXSPEECHCONFIGHANDLE* hconfig, const char* authToken, const char* region); +SPXAPI speech_config_from_endpoint(SPXSPEECHCONFIGHANDLE * hconfig, const char* endpoint, const char* subscription); +SPXAPI speech_config_from_host(SPXSPEECHCONFIGHANDLE* hconfig, const char* host, const char* subscription); +SPXAPI speech_config_release(SPXSPEECHCONFIGHANDLE hconfig); +SPXAPI speech_config_get_property_bag(SPXSPEECHCONFIGHANDLE hconfig, SPXPROPERTYBAGHANDLE* hpropbag); +SPXAPI speech_config_set_audio_output_format(SPXSPEECHCONFIGHANDLE hconfig, Speech_Synthesis_Output_Format formatId); +SPXAPI speech_config_set_service_property(SPXSPEECHCONFIGHANDLE configHandle, const char* propertyName, const char* propertyValue, SpeechConfig_ServicePropertyChannel channel); +SPXAPI speech_config_set_profanity(SPXSPEECHCONFIGHANDLE configHandle, SpeechConfig_ProfanityOption profanity); + diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_recognition_model.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_recognition_model.h new file mode 100644 index 0000000..0c0dada --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_recognition_model.h @@ -0,0 +1,13 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI speech_recognition_model_handle_release(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_recognition_model_get_name(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_recognition_model_get_locales(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_recognition_model_get_path(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_recognition_model_get_version(SPXSPEECHRECOMODELHANDLE hmodel); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_translation_config.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_translation_config.h new file mode 100644 index 0000000..ed2ea7c --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_translation_config.h @@ -0,0 +1,16 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI speech_translation_config_from_subscription(SPXSPEECHCONFIGHANDLE* configHandle, const char* subscription, const char* region); +SPXAPI speech_translation_config_from_authorization_token(SPXSPEECHCONFIGHANDLE* configHandle, const char* authToken, const char* region); +SPXAPI speech_translation_config_from_endpoint(SPXSPEECHCONFIGHANDLE* configHandle, const char* endpoint, const char* subscription); +SPXAPI speech_translation_config_from_host(SPXSPEECHCONFIGHANDLE* configHandle, const char* host, const char* subscription); + +SPXAPI speech_translation_config_add_target_language(SPXSPEECHCONFIGHANDLE configHandle, const char* language); +SPXAPI speech_translation_config_remove_target_language(SPXSPEECHCONFIGHANDLE configHandle, const char* language); +SPXAPI speech_translation_config_set_custom_model_category_id(SPXSPEECHCONFIGHANDLE configHandle, const char* categoryId); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_translation_model.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_translation_model.h new file mode 100644 index 0000000..cca8d4d --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_speech_translation_model.h @@ -0,0 +1,14 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI speech_translation_model_handle_release(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_translation_model_get_name(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_translation_model_get_source_languages(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_translation_model_get_target_languages(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_translation_model_get_path(SPXSPEECHRECOMODELHANDLE hmodel); +SPXAPI__(const char*) speech_translation_model_get_version(SPXSPEECHRECOMODELHANDLE hmodel); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_synthesis_request.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_synthesis_request.h new file mode 100644 index 0000000..90af519 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_synthesis_request.h @@ -0,0 +1,17 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI speech_synthesis_request_create(bool textStreamingEnabled, bool isSSML, const char* inputText, uint32_t textLength, SPXREQUESTHANDLE* hrequest); +SPXAPI speech_synthesis_request_set_voice(SPXREQUESTHANDLE hrequest, const char* voice, const char* personalVoice, const char* modelName); +SPXAPI speech_synthesis_request_send_text_piece(SPXREQUESTHANDLE hrequest, const char* text, uint32_t textLength); +SPXAPI speech_synthesis_request_finish(SPXREQUESTHANDLE hrequest); +SPXAPI speech_synthesis_request_handle_is_valid(SPXREQUESTHANDLE hrequest); +SPXAPI speech_synthesis_request_release(SPXREQUESTHANDLE hrequest); + +SPXAPI speech_synthesis_request_get_property_bag(SPXREQUESTHANDLE hrequest, SPXPROPERTYBAGHANDLE* hpropbag); + diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_synthesizer.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_synthesizer.h new file mode 100644 index 0000000..6e61f5b --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_synthesizer.h @@ -0,0 +1,75 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_synthesizer.h: Public API declarations for Synthesizer related C methods and typedefs +// + +#pragma once +#include +#include +#include + + +enum SpeechSynthesis_BoundaryType +{ + SpeechSynthesis_BoundaryType_Word = 0, + SpeechSynthesis_BoundaryType_Punctuation = 1, + SpeechSynthesis_BoundaryType_Sentence = 2 +}; +typedef enum SpeechSynthesis_BoundaryType SpeechSynthesis_BoundaryType; + +SPXAPI_(bool) synthesizer_handle_is_valid(SPXSYNTHHANDLE hsynth); +SPXAPI synthesizer_handle_release(SPXSYNTHHANDLE hsynth); + +SPXAPI_(bool) synthesizer_async_handle_is_valid(SPXASYNCHANDLE hasync); +SPXAPI synthesizer_async_handle_release(SPXASYNCHANDLE hasync); + +SPXAPI_(bool) synthesizer_result_handle_is_valid(SPXRESULTHANDLE hresult); +SPXAPI synthesizer_result_handle_release(SPXRESULTHANDLE hresult); + +SPXAPI_(bool) synthesizer_event_handle_is_valid(SPXEVENTHANDLE hevent); +SPXAPI synthesizer_event_handle_release(SPXEVENTHANDLE hevent); + +SPXAPI synthesizer_get_property_bag(SPXSYNTHHANDLE hsynth, SPXPROPERTYBAGHANDLE* hpropbag); + +SPXAPI synthesizer_speak_text(SPXSYNTHHANDLE hsynth, const char* text, uint32_t textLength, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_speak_ssml(SPXSYNTHHANDLE hsynth, const char* ssml, uint32_t ssmlLength, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_speak_request(SPXSYNTHHANDLE hsynth, SPXREQUESTHANDLE hrequest, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_speak_text_async(SPXSYNTHHANDLE hsynth, const char* text, uint32_t textLength, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_speak_ssml_async(SPXSYNTHHANDLE hsynth, const char* ssml, uint32_t ssmlLength, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_speak_request_async(SPXSYNTHHANDLE hsynth, SPXREQUESTHANDLE hrequest, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_start_speaking_text(SPXSYNTHHANDLE hsynth, const char* text, uint32_t textLength, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_start_speaking_ssml(SPXSYNTHHANDLE hsynth, const char* ssml, uint32_t ssmlLength, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_start_speaking_request(SPXSYNTHHANDLE hsynth, SPXREQUESTHANDLE hrequest, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_start_speaking_text_async(SPXSYNTHHANDLE hsynth, const char* text, uint32_t textLength, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_start_speaking_ssml_async(SPXSYNTHHANDLE hsynth, const char* ssml, uint32_t ssmlLength, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_speak_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_stop_speaking(SPXSYNTHHANDLE hsynth); +SPXAPI synthesizer_stop_speaking_async(SPXSYNTHHANDLE hsynth, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_stop_speaking_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds); + +SPXAPI synthesizer_get_voices_list(SPXSYNTHHANDLE hsynth, const char* locale, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_get_voices_list_async(SPXSYNTHHANDLE hsynth, const char* locale, SPXASYNCHANDLE* phasync); +SPXAPI synthesizer_get_voices_list_async_wait_for(SPXASYNCHANDLE hasync, uint32_t milliseconds, SPXRESULTHANDLE* phresult); + +typedef void(*PSYNTHESIS_CALLBACK_FUNC)(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext); +SPXAPI synthesizer_started_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_token_request_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_synthesizing_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_completed_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_canceled_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_word_boundary_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_viseme_received_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_bookmark_reached_set_callback(SPXSYNTHHANDLE hsynth, PSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); +SPXAPI synthesizer_connection_connected_set_callback(SPXCONNECTIONHANDLE hConnection, CONNECTION_CALLBACK_FUNC pCallback, void * pvContext); +SPXAPI synthesizer_connection_disconnected_set_callback(SPXCONNECTIONHANDLE hConnection, CONNECTION_CALLBACK_FUNC pCallback, void * pvContext); + +SPXAPI synthesizer_synthesis_event_get_result(SPXEVENTHANDLE hevent, SPXRESULTHANDLE* phresult); +SPXAPI synthesizer_word_boundary_event_get_values(SPXEVENTHANDLE hevent, uint64_t *pAudioOffset, uint64_t *pDuration, + uint32_t *pTextOffset, uint32_t *pWordLength, SpeechSynthesis_BoundaryType *pBoundaryType); +SPXAPI synthesizer_event_get_result_id(SPXEVENTHANDLE hEvent, char* resultId, uint32_t resultIdLength); +SPXAPI__(const char*) synthesizer_event_get_text(SPXEVENTHANDLE hEvent); +SPXAPI synthesizer_viseme_event_get_values(SPXEVENTHANDLE hevent, uint64_t* pAudioOffset, uint32_t* pVisemeId); +SPXAPI__(const char*) synthesizer_viseme_event_get_animation(SPXEVENTHANDLE hEvent); +SPXAPI synthesizer_bookmark_event_get_values(SPXEVENTHANDLE hevent, uint64_t* pAudioOffset); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_translation_recognizer.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_translation_recognizer.h new file mode 100644 index 0000000..8912ffe --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_translation_recognizer.h @@ -0,0 +1,17 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + + +#pragma once +#include + + +// Todo: Translation recognizer management API. + +typedef void(*PTRANSLATIONSYNTHESIS_CALLBACK_FUNC)(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext); +SPXAPI translator_synthesizing_audio_set_callback(SPXRECOHANDLE hreco, PTRANSLATIONSYNTHESIS_CALLBACK_FUNC pCallback, void* pvContext); + +SPXAPI translator_add_target_language(SPXRECOHANDLE hreco, const char* language); +SPXAPI translator_remove_target_language(SPXRECOHANDLE hreco, const char* language); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_translation_result.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_translation_result.h new file mode 100644 index 0000000..60bcae0 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_translation_result.h @@ -0,0 +1,14 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include + +SPXAPI translation_text_result_get_translation_count(SPXRESULTHANDLE handle, size_t * size); +SPXAPI translation_text_result_get_translation(SPXRESULTHANDLE handle, size_t index, char * language, char * text, size_t * language_size, size_t * text_size); + +// audioBuffer: point to the header for storing synthesis audio data. The parameter lengthPointer points to the variable saving the size of buffer. On return, *lengthPointer is set to the size of the buffer returned. +// If textBuffer is nullptr or the length is smaller than the size required, the function returns SPXERR_BUFFER_TOO_SMALL. +SPXAPI translation_synthesis_result_get_audio_data(SPXRESULTHANDLE handle, uint8_t* audioBuffer, size_t* lengthPointer); diff --git a/third_party/azure_speech_sdk/include/c_api/speechapi_c_user.h b/third_party/azure_speech_sdk/include/c_api/speechapi_c_user.h new file mode 100644 index 0000000..bd4d705 --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/speechapi_c_user.h @@ -0,0 +1,13 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_c_user.h: Public API declarations for user related C methods and enumerations +// + +#pragma once +#include + +SPXAPI user_create_from_id(const char* user_id, SPXUSERHANDLE* huser); +SPXAPI user_release_handle(SPXUSERHANDLE huser); +SPXAPI user_get_id(SPXUSERHANDLE huser, char* user_id, size_t user_id_size); diff --git a/third_party/azure_speech_sdk/include/c_api/spxdebug.h b/third_party/azure_speech_sdk/include/c_api/spxdebug.h new file mode 100644 index 0000000..6e0131b --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/spxdebug.h @@ -0,0 +1,548 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// spxdebug.h: Public API definitions for global C Trace/Debug methods and related #defines +// + +#pragma once + +//------------------------------------------------------- +// Re-enabled ability to compile out all macros... +// However, currently still need to keep all macros until +// final review of all macros is complete. +//------------------------------------------------------- +#define SPX_CONFIG_TRACE_INCLUDE_DBG_WITH_ALL 1 + +#ifdef SPX_CONFIG_TRACE_INCLUDE_DBG_WITH_ALL +#if defined(SPX_CONFIG_TRACE_ALL) && !defined(SPX_CONFIG_DBG_TRACE_ALL) && (!defined(DEBUG) || !defined(_DEBUG)) +#define SPX_CONFIG_DBG_TRACE_ALL 1 +#endif +#endif + +//------------------------------------------------------- +// SPX_ and AZAC_ compatibility section +// (must preceed #include ) +//------------------------------------------------------- + +#if defined(SPX_CONFIG_DBG_TRACE_ALL) && !defined(AZAC_CONFIG_DBG_TRACE_ALL) +#define AZAC_CONFIG_DBG_TRACE_ALL SPX_CONFIG_DBG_TRACE_ALL +#elif !defined(SPX_CONFIG_DBG_TRACE_ALL) && defined(AZAC_CONFIG_DBG_TRACE_ALL) +#define SPX_CONFIG_DBG_TRACE_ALL AZAC_CONFIG_DBG_TRACE_ALL +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_VERBOSE) && !defined(AZAC_CONFIG_DBG_TRACE_VERBOSE) +#define AZAC_CONFIG_DBG_TRACE_VERBOSE SPX_CONFIG_DBG_TRACE_VERBOSE +#elif !defined(SPX_CONFIG_DBG_TRACE_VERBOSE) && defined(AZAC_CONFIG_DBG_TRACE_VERBOSE) +#define SPX_CONFIG_DBG_TRACE_VERBOSE AZAC_CONFIG_DBG_TRACE_VERBOSE +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_INFO) && !defined(AZAC_CONFIG_DBG_TRACE_INFO) +#define AZAC_CONFIG_DBG_TRACE_INFO SPX_CONFIG_DBG_TRACE_INFO +#elif !defined(SPX_CONFIG_DBG_TRACE_INFO) && defined(AZAC_CONFIG_DBG_TRACE_INFO) +#define SPX_CONFIG_DBG_TRACE_INFO AZAC_CONFIG_DBG_TRACE_INFO +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_WARNING) && !defined(AZAC_CONFIG_DBG_TRACE_WARNING) +#define AZAC_CONFIG_DBG_TRACE_WARNING SPX_CONFIG_DBG_TRACE_WARNING +#elif !defined(SPX_CONFIG_DBG_TRACE_WARNING) && defined(AZAC_CONFIG_DBG_TRACE_WARNING) +#define SPX_CONFIG_DBG_TRACE_WARNING AZAC_CONFIG_DBG_TRACE_WARNING +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_ERROR) && !defined(AZAC_CONFIG_DBG_TRACE_ERROR) +#define AZAC_CONFIG_DBG_TRACE_ERROR SPX_CONFIG_DBG_TRACE_ERROR +#elif !defined(SPX_CONFIG_DBG_TRACE_ERROR) && defined(AZAC_CONFIG_DBG_TRACE_ERROR) +#define SPX_CONFIG_DBG_TRACE_ERROR AZAC_CONFIG_DBG_TRACE_ERROR +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_FUNCTION) && !defined(AZAC_CONFIG_DBG_TRACE_FUNCTION) +#define AZAC_CONFIG_DBG_TRACE_FUNCTION SPX_CONFIG_DBG_TRACE_FUNCTION +#elif !defined(SPX_CONFIG_DBG_TRACE_FUNCTION) && defined(AZAC_CONFIG_DBG_TRACE_FUNCTION) +#define SPX_CONFIG_DBG_TRACE_FUNCTION AZAC_CONFIG_DBG_TRACE_FUNCTION +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_SCOPE) && !defined(AZAC_CONFIG_DBG_TRACE_SCOPE) +#define AZAC_CONFIG_DBG_TRACE_SCOPE SPX_CONFIG_DBG_TRACE_SCOPE +#elif !defined(SPX_CONFIG_DBG_TRACE_SCOPE) && defined(AZAC_CONFIG_DBG_TRACE_SCOPE) +#define SPX_CONFIG_DBG_TRACE_SCOPE AZAC_CONFIG_DBG_TRACE_SCOPE +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_ASSERT) && !defined(AZAC_CONFIG_DBG_TRACE_ASSERT) +#define AZAC_CONFIG_DBG_TRACE_ASSERT SPX_CONFIG_DBG_TRACE_ASSERT +#elif !defined(SPX_CONFIG_DBG_TRACE_ASSERT) && defined(AZAC_CONFIG_DBG_TRACE_ASSERT) +#define SPX_CONFIG_DBG_TRACE_ASSERT AZAC_CONFIG_DBG_TRACE_ASSERT +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_VERIFY) && !defined(AZAC_CONFIG_DBG_TRACE_VERIFY) +#define AZAC_CONFIG_DBG_TRACE_VERIFY SPX_CONFIG_DBG_TRACE_VERIFY +#elif !defined(SPX_CONFIG_DBG_TRACE_VERIFY) && defined(AZAC_CONFIG_DBG_TRACE_VERIFY) +#define SPX_CONFIG_DBG_TRACE_VERIFY AZAC_CONFIG_DBG_TRACE_VERIFY +#endif + +#if defined(SPX_CONFIG_TRACE_ALL) && !defined(AZAC_CONFIG_TRACE_ALL) +#define AZAC_CONFIG_TRACE_ALL SPX_CONFIG_TRACE_ALL +#elif !defined(SPX_CONFIG_TRACE_ALL) && defined(AZAC_CONFIG_TRACE_ALL) +#define SPX_CONFIG_TRACE_ALL AZAC_CONFIG_TRACE_ALL +#endif + +#if defined(SPX_CONFIG_TRACE_VERBOSE) && !defined(AZAC_CONFIG_TRACE_VERBOSE) +#define AZAC_CONFIG_TRACE_VERBOSE SPX_CONFIG_TRACE_VERBOSE +#elif !defined(SPX_CONFIG_TRACE_VERBOSE) && defined(AZAC_CONFIG_TRACE_VERBOSE) +#define SPX_CONFIG_TRACE_VERBOSE AZAC_CONFIG_TRACE_VERBOSE +#endif + +#if defined(SPX_CONFIG_TRACE_INFO) && !defined(AZAC_CONFIG_TRACE_INFO) +#define AZAC_CONFIG_TRACE_INFO SPX_CONFIG_TRACE_INFO +#elif !defined(SPX_CONFIG_TRACE_INFO) && defined(AZAC_CONFIG_TRACE_INFO) +#define SPX_CONFIG_TRACE_INFO AZAC_CONFIG_TRACE_INFO +#endif + +#if defined(SPX_CONFIG_TRACE_WARNING) && !defined(AZAC_CONFIG_TRACE_WARNING) +#define AZAC_CONFIG_TRACE_WARNING SPX_CONFIG_TRACE_WARNING +#elif !defined(SPX_CONFIG_TRACE_WARNING) && defined(AZAC_CONFIG_TRACE_WARNING) +#define SPX_CONFIG_TRACE_WARNING AZAC_CONFIG_TRACE_WARNING +#endif + +#if defined(SPX_CONFIG_TRACE_ERROR) && !defined(AZAC_CONFIG_TRACE_ERROR) +#define AZAC_CONFIG_TRACE_ERROR SPX_CONFIG_TRACE_ERROR +#elif !defined(SPX_CONFIG_TRACE_ERROR) && defined(AZAC_CONFIG_TRACE_ERROR) +#define SPX_CONFIG_TRACE_ERROR AZAC_CONFIG_TRACE_ERROR +#endif + +#if defined(SPX_CONFIG_TRACE_FUNCTION) && !defined(AZAC_CONFIG_TRACE_FUNCTION) +#define AZAC_CONFIG_TRACE_FUNCTION SPX_CONFIG_TRACE_FUNCTION +#elif !defined(SPX_CONFIG_TRACE_FUNCTION) && defined(AZAC_CONFIG_TRACE_FUNCTION) +#define SPX_CONFIG_TRACE_FUNCTION AZAC_CONFIG_TRACE_FUNCTION +#endif + +#if defined(SPX_CONFIG_TRACE_SCOPE) && !defined(AZAC_CONFIG_TRACE_SCOPE) +#define AZAC_CONFIG_TRACE_SCOPE SPX_CONFIG_TRACE_SCOPE +#elif !defined(SPX_CONFIG_TRACE_SCOPE) && defined(AZAC_CONFIG_TRACE_SCOPE) +#define SPX_CONFIG_TRACE_SCOPE AZAC_CONFIG_TRACE_SCOPE +#endif + +#if defined(SPX_CONFIG_TRACE_THROW_ON_FAIL) && !defined(AZAC_CONFIG_TRACE_THROW_ON_FAIL) +#define AZAC_CONFIG_TRACE_THROW_ON_FAIL SPX_CONFIG_TRACE_THROW_ON_FAIL +#elif !defined(SPX_CONFIG_TRACE_THROW_ON_FAIL) && defined(AZAC_CONFIG_TRACE_THROW_ON_FAIL) +#define SPX_CONFIG_TRACE_THROW_ON_FAIL AZAC_CONFIG_TRACE_THROW_ON_FAIL +#endif + +#if defined(SPX_CONFIG_TRACE_REPORT_ON_FAIL) && !defined(AZAC_CONFIG_TRACE_REPORT_ON_FAIL) +#define AZAC_CONFIG_TRACE_REPORT_ON_FAIL SPX_CONFIG_TRACE_REPORT_ON_FAIL +#elif !defined(SPX_CONFIG_TRACE_REPORT_ON_FAIL) && defined(AZAC_CONFIG_TRACE_REPORT_ON_FAIL) +#define SPX_CONFIG_TRACE_REPORT_ON_FAIL AZAC_CONFIG_TRACE_REPORT_ON_FAIL +#endif + +#if defined(SPX_CONFIG_TRACE_RETURN_ON_FAIL) && !defined(AZAC_CONFIG_TRACE_RETURN_ON_FAIL) +#define AZAC_CONFIG_TRACE_RETURN_ON_FAIL SPX_CONFIG_TRACE_RETURN_ON_FAIL +#elif !defined(SPX_CONFIG_TRACE_RETURN_ON_FAIL) && defined(AZAC_CONFIG_TRACE_RETURN_ON_FAIL) +#define SPX_CONFIG_TRACE_RETURN_ON_FAIL AZAC_CONFIG_TRACE_RETURN_ON_FAIL +#endif + +#if defined(SPX_CONFIG_TRACE_EXITFN_ON_FAIL) && !defined(AZAC_CONFIG_TRACE_EXITFN_ON_FAIL) +#define AZAC_CONFIG_TRACE_EXITFN_ON_FAIL SPX_CONFIG_TRACE_EXITFN_ON_FAIL +#elif !defined(SPX_CONFIG_TRACE_EXITFN_ON_FAIL) && defined(AZAC_CONFIG_TRACE_EXITFN_ON_FAIL) +#define SPX_CONFIG_TRACE_EXITFN_ON_FAIL AZAC_CONFIG_TRACE_EXITFN_ON_FAIL +#endif + +#if !defined(__AZAC_THROW_HR_IMPL) && defined(__SPX_THROW_HR_IMPL) +#define __AZAC_THROW_HR_IMPL __SPX_THROW_HR_IMPL +#elif !defined(__SPX_THROW_HR_IMPL) && defined(__AZAC_THROW_HR_IMPL) +#define __SPX_THROW_HR_IMPL __AZAC_THROW_HR_IMPL +#elif !defined(__AZAC_THROW_HR_IMPL) && !defined(__SPX_THROW_HR_IMPL) +#define __AZAC_THROW_HR_IMPL __azac_rethrow +#define __SPX_THROW_HR_IMPL __azac_rethrow +#else +#error Both __AZAC_THROW_HR_IMPL and __SPX_THROW_HR_IMPL cannot be defined at the same time +#endif + +//------------------------------------------------------- +// SPX_ and SPX_DBG_ macro configuration +//------------------------------------------------------- + +#ifdef SPX_CONFIG_DBG_TRACE_ALL +#define SPX_CONFIG_DBG_TRACE_VERBOSE 1 +#define SPX_CONFIG_DBG_TRACE_INFO 1 +#define SPX_CONFIG_DBG_TRACE_WARNING 1 +#define SPX_CONFIG_DBG_TRACE_ERROR 1 +#define SPX_CONFIG_DBG_TRACE_FUNCTION 1 +#define SPX_CONFIG_DBG_TRACE_SCOPE 1 +#define SPX_CONFIG_DBG_TRACE_ASSERT 1 +#define SPX_CONFIG_DBG_TRACE_VERIFY 1 +#ifndef SPX_CONFIG_TRACE_ALL +#define SPX_CONFIG_TRACE_ALL 1 +#endif +#endif // SPX_CONFIG_DBG_TRACE_ALL + +#ifdef SPX_CONFIG_TRACE_ALL +#define SPX_CONFIG_TRACE_VERBOSE 1 +#define SPX_CONFIG_TRACE_INFO 1 +#define SPX_CONFIG_TRACE_WARNING 1 +#define SPX_CONFIG_TRACE_ERROR 1 +#define SPX_CONFIG_TRACE_FUNCTION 1 +#define SPX_CONFIG_TRACE_SCOPE 1 +#define SPX_CONFIG_TRACE_THROW_ON_FAIL 1 +#define SPX_CONFIG_TRACE_REPORT_ON_FAIL 1 +#define SPX_CONFIG_TRACE_RETURN_ON_FAIL 1 +#define SPX_CONFIG_TRACE_EXITFN_ON_FAIL 1 +#endif // SPX_CONFIG_TRACE_ALL + +//------------------------------------------------------- +// #include section ... +// (must come after everything above) +//------------------------------------------------------- + +#include +#include +#include + +#ifndef _MSC_VER +// macros in this header generate a bunch of +// "ISO C++11 requires at least one argument for the "..." in a variadic macro" errors. +// system_header pragma is the only mechanism that helps to suppress them. +// https://stackoverflow.com/questions/35587137/how-to-suppress-gcc-variadic-macro-argument-warning-for-zero-arguments-for-a-par +// TODO: try to make macros standard-compliant. +#pragma GCC system_header +#endif + +//----------------------------------------------------------- +// SPX_TRACE macro common implementations +//----------------------------------------------------------- + +#define __SPX_TRACE_LEVEL_INFO __AZAC_TRACE_LEVEL_INFO // Trace_Info +#define __SPX_TRACE_LEVEL_WARNING __AZAC_TRACE_LEVEL_WARNING // Trace_Warning +#define __SPX_TRACE_LEVEL_ERROR __AZAC_TRACE_LEVEL_ERROR // Trace_Error +#define __SPX_TRACE_LEVEL_VERBOSE __AZAC_TRACE_LEVEL_VERBOSE // Trace_Verbose + +#ifndef __SPX_DO_TRACE_IMPL +#define __SPX_DO_TRACE_IMPL __AZAC_DO_TRACE_IMPL +#endif + +#define __SPX_DOTRACE(level, title, fileName, lineNumber, ...) \ + __AZAC_DOTRACE(level, title, fileName, lineNumber, ##__VA_ARGS__) + +#define __SPX_TRACE_INFO(title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_INFO(title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_INFO_IF(cond, title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_INFO_IF(cond, title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_WARNING(title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_WARNING(title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_WARNING_IF(cond, title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_WARNING_IF(cond, title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_ERROR(title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_ERROR(title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_ERROR_IF(cond, title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_ERROR_IF(cond, title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_VERBOSE(title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_VERBOSE(title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define __SPX_TRACE_VERBOSE_IF(cond, title, fileName, lineNumber, msg, ...) \ + __AZAC_TRACE_VERBOSE_IF(cond, title, fileName, lineNumber, msg, ##__VA_ARGS__) + +#define ___SPX_EXPR_AS_STRING(_String) \ + ___AZAC_EXPR_AS_STRING(_String) + +#define __SPX_EXPR_AS_STRING(_String) \ + __AZAC_EXPR_AS_STRING(_String) + +#define __SPX_TRACE_HR(title, fileName, lineNumber, hr, x) \ + __AZAC_TRACE_HR(title, fileName, lineNumber, hr, x) + +#define __SPX_REPORT_ON_FAIL(title, fileName, lineNumber, hr) \ + __AZAC_REPORT_ON_FAIL(title, fileName, lineNumber, hr) + +#define __SPX_REPORT_ON_FAIL_IFNOT(title, fileName, lineNumber, hr, hrNot) \ + __AZAC_REPORT_ON_FAIL_IFNOT(title, fileName, lineNumber, hr, hrNot) + +#define __SPX_T_RETURN_HR(title, fileName, lineNumber, hr) \ + __AZAC_T_RETURN_HR(title, fileName, lineNumber, hr) + +#define __SPX_T_RETURN_HR_IF(title, fileName, lineNumber, hr, cond) \ + __AZAC_T_RETURN_HR_IF(title, fileName, lineNumber, hr, cond) + +#define __SPX_T_RETURN_ON_FAIL(title, fileName, lineNumber, hr) \ + __AZAC_T_RETURN_ON_FAIL(title, fileName, lineNumber, hr) + +#define __SPX_T_RETURN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + __AZAC_T_RETURN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) + +#define __SPX_RETURN_HR(hr) \ + __AZAC_RETURN_HR(hr) + +#define __SPX_RETURN_HR_IF(hr, cond) \ + __AZAC_RETURN_HR_IF(hr, cond) + +#define __SPX_RETURN_ON_FAIL(hr) \ + __AZAC_RETURN_ON_FAIL(hr) + +#define __SPX_RETURN_ON_FAIL_IF_NOT(hr, hrNot) \ + __AZAC_RETURN_ON_FAIL_IF_NOT(hr, hrNot) + +#define SPX_EXITFN_CLEANUP AZAC_EXITFN_CLEANUP + +#define __SPX_T_EXITFN_HR(title, fileName, lineNumber, hr) \ + __AZAC_T_EXITFN_HR(title, fileName, lineNumber, hr) + +#define __SPX_T_EXITFN_HR_IF(title, fileName, lineNumber, hr, cond) \ + __AZAC_T_EXITFN_HR_IF(title, fileName, lineNumber, hr, cond) + +#define __SPX_T_EXITFN_ON_FAIL(title, fileName, lineNumber, hr) \ + __AZAC_T_EXITFN_ON_FAIL(title, fileName, lineNumber, hr) + +#define __SPX_T_EXITFN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + __AZAC_T_EXITFN_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) + +#define __SPX_EXITFN_HR(hr) \ + __AZAC_EXITFN_HR(hr) + +#define __SPX_EXITFN_HR_IF(hr, cond) \ + __AZAC_EXITFN_HR_IF(hr, cond) + +#define __SPX_EXITFN_ON_FAIL(hr) \ + __AZAC_EXITFN_ON_FAIL(hr) + +#define __SPX_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) \ + __AZAC_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) + +#define __SPX_TRACE_ASSERT(title, fileName, lineNumber, expr) \ + __AZAC_TRACE_ASSERT(title, fileName, lineNumber, expr) + +#define __SPX_TRACE_ASSERT_MSG(title, fileName, lineNumber, expr, ...) \ + __AZAC_TRACE_ASSERT_MSG(title, fileName, lineNumber, expr, ##__VA_ARGS__) + +#define __SPX_DBG_ASSERT(title, fileName, lineNumber, expr) \ + __AZAC_DBG_ASSERT(title, fileName, lineNumber, expr) + +#define __SPX_DBG_ASSERT_WITH_MESSAGE(title, fileName, lineNumber, expr, ...) \ + __AZAC_DBG_ASSERT_WITH_MESSAGE(title, fileName, lineNumber, expr, ##__VA_ARGS__) + +#define __SPX_DBG_VERIFY(title, fileName, lineNumber, expr) \ + __AZAC_DBG_VERIFY(title, fileName, lineNumber, expr) + +#define __SPX_DBG_VERIFY_WITH_MESSAGE(title, fileName, lineNumber, expr, ...) \ + __AZAC_DBG_VERIFY_WITH_MESSAGE(title, fileName, lineNumber, expr, ##__VA_ARGS__) + +#ifdef __cplusplus + +#define __SPX_TRACE_SCOPE(t1, fileName, lineNumber, t2, x, y) \ + __AZAC_TRACE_SCOPE(t1, fileName, lineNumber, t2, x, y) + +#ifndef __SPX_THROW_HR +#define __SPX_THROW_HR(hr) __SPX_THROW_HR_IMPL(hr) +#endif + +#define __SPX_T_THROW_ON_FAIL(title, fileName, lineNumber, hr) \ + __AZAC_T_THROW_ON_FAIL(title, fileName, lineNumber, hr) + +#define __SPX_T_THROW_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) \ + __AZAC_T_THROW_ON_FAIL_IF_NOT(title, fileName, lineNumber, hr, hrNot) + +#define __SPX_T_THROW_HR_IF(title, fileName, lineNumber, hr, cond) \ + __AZAC_T_THROW_HR_IF(title, fileName, lineNumber, hr, cond) + +#define __SPX_T_THROW_HR(title, fileName, lineNumber, hr) \ + __AZAC_T_THROW_HR(title, fileName, lineNumber, hr) + +#define __SPX_THROW_ON_FAIL(hr) \ + __AZAC_THROW_ON_FAIL(hr) + +#define __SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) \ + __AZAC_THROW_ON_FAIL_IF_NOT(hr, hrNot) + +#define __SPX_THROW_HR_IF(hr, cond) \ + __AZAC_THROW_HR_IF(hr, cond) + +#endif // __cplusplus + + +//------------------------------------------------------- +// SPX_ macro definitions +//------------------------------------------------------- + +#ifdef SPX_CONFIG_TRACE_VERBOSE +#define SPX_TRACE_VERBOSE(msg, ...) __SPX_TRACE_VERBOSE("SPX_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_TRACE_VERBOSE_IF(cond, msg, ...) __SPX_TRACE_VERBOSE_IF(cond, "SPX_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_TRACE_VERBOSE(...) +#define SPX_TRACE_VERBOSE_IF(...) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_VERBOSE +#define SPX_DBG_TRACE_VERBOSE(msg, ...) __SPX_TRACE_VERBOSE("SPX_DBG_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_DBG_TRACE_VERBOSE_IF(cond, msg, ...) __SPX_TRACE_VERBOSE_IF(cond, "SPX_DBG_TRACE_VERBOSE: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_DBG_TRACE_VERBOSE(...) +#define SPX_DBG_TRACE_VERBOSE_IF(...) +#endif + +#ifdef SPX_CONFIG_TRACE_INFO +#define SPX_TRACE_INFO(msg, ...) __SPX_TRACE_INFO("SPX_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_TRACE_INFO_IF(cond, msg, ...) __SPX_TRACE_INFO_IF(cond, "SPX_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_TRACE_INFO(...) +#define SPX_TRACE_INFO_IF(...) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_INFO +#define SPX_DBG_TRACE_INFO(msg, ...) __SPX_TRACE_INFO("SPX_DBG_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_DBG_TRACE_INFO_IF(cond, msg, ...) __SPX_TRACE_INFO_IF(cond, "SPX_DBG_TRACE_INFO: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_DBG_TRACE_INFO(...) +#define SPX_DBG_TRACE_INFO_IF(...) +#endif + +#ifdef SPX_CONFIG_TRACE_WARNING +#define SPX_TRACE_WARNING(msg, ...) __SPX_TRACE_WARNING("SPX_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_TRACE_WARNING_IF(cond, msg, ...) __SPX_TRACE_WARNING_IF(cond, "SPX_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_TRACE_WARNING(...) +#define SPX_TRACE_WARNING_IF(...) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_WARNING +#define SPX_DBG_TRACE_WARNING(msg, ...) __SPX_TRACE_WARNING("SPX_DBG_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_DBG_TRACE_WARNING_IF(cond, msg, ...) __SPX_TRACE_WARNING_IF(cond, "SPX_DBG_TRACE_WARNING:", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_DBG_TRACE_WARNING(...) +#define SPX_DBG_TRACE_WARNING_IF(...) +#endif + +#ifdef SPX_CONFIG_TRACE_ERROR +#define SPX_TRACE_ERROR(msg, ...) __SPX_TRACE_ERROR("SPX_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_TRACE_ERROR_IF(cond, msg, ...) __SPX_TRACE_ERROR_IF(cond, "SPX_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_TRACE_ERROR(...) +#define SPX_TRACE_ERROR_IF(...) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_ERROR +#define SPX_DBG_TRACE_ERROR(msg, ...) __SPX_TRACE_ERROR("SPX_DBG_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#define SPX_DBG_TRACE_ERROR_IF(cond, msg, ...) __SPX_TRACE_ERROR_IF(cond, "SPX_DBG_TRACE_ERROR: ", __FILE__, __LINE__, msg, ##__VA_ARGS__) +#else +#define SPX_DBG_TRACE_ERROR(...) +#define SPX_DBG_TRACE_ERROR_IF(...) +#endif + +#ifdef SPX_CONFIG_TRACE_FUNCTION +#define SPX_TRACE_FUNCTION(...) __SPX_TRACE_VERBOSE("SPX_TRACE_FUNCTION: ", __FILE__, __LINE__, __FUNCTION__) +#else +#define SPX_TRACE_FUNCTION(...) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_FUNCTION +#define SPX_DBG_TRACE_FUNCTION(...) __SPX_TRACE_VERBOSE("SPX_DBG_TRACE_FUNCTION: ", __FILE__, __LINE__, __FUNCTION__) +#else +#define SPX_DBG_TRACE_FUNCTION(...) +#endif + +#ifdef SPX_CONFIG_TRACE_REPORT_ON_FAIL +#define SPX_REPORT_ON_FAIL(hr) __SPX_REPORT_ON_FAIL("SPX_REPORT_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_REPORT_ON_FAIL_IFNOT(hr, hrNot) __SPX_REPORT_ON_FAIL_IFNOT("SPX_REPORT_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define SPX_REPORT_ON_FAIL(hr) UNUSED(hr) +#define SPX_REPORT_ON_FAIL_IFNOT(hr, hrNot) UNUSED(hr); UNUSED(hrNot) +#endif + +#ifdef SPX_CONFIG_TRACE_RETURN_ON_FAIL +#define SPX_RETURN_HR(hr) __SPX_T_RETURN_HR("SPX_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_RETURN_HR_IF(hr, cond) __SPX_T_RETURN_HR_IF("SPX_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr, cond) +#define SPX_RETURN_ON_FAIL(hr) __SPX_T_RETURN_ON_FAIL("SPX_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_RETURN_ON_FAIL_IF_NOT(hr, hrNot) __SPX_T_RETURN_ON_FAIL_IF_NOT("SPX_RETURN_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define SPX_RETURN_HR(hr) __SPX_RETURN_HR(hr) +#define SPX_RETURN_HR_IF(hr, cond) __SPX_RETURN_HR_IF(hr, cond) +#define SPX_RETURN_ON_FAIL(hr) __SPX_RETURN_ON_FAIL(hr) +#define SPX_RETURN_ON_FAIL_IF_NOT(hr, hrNot) __SPX_RETURN_ON_FAIL_IF_NOT(hr, hrNot) +#endif + +#define SPX_IFTRUE_RETURN_HR(cond, hr) SPX_RETURN_HR_IF(hr, cond) +#define SPX_IFFALSE_RETURN_HR(cond, hr) SPX_RETURN_HR_IF(hr, !(cond)) +#define SPX_IFFAILED_RETURN_HR(hr) SPX_RETURN_ON_FAIL(hr) +#define SPX_IFFAILED_RETURN_HR_IFNOT(hr, hrNot) SPX_RETURN_ON_FAIL_IF_NOT(hr, hrNot) + +#ifdef SPX_CONFIG_TRACE_EXITFN_ON_FAIL +#define SPX_EXITFN_HR(hr) __SPX_T_EXITFN_HR("SPX_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_EXITFN_HR_IF(hr, cond) __SPX_T_EXITFN_HR_IF("SPX_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr, cond) +#define SPX_EXITFN_ON_FAIL(hr) __SPX_T_EXITFN_ON_FAIL("SPX_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) __SPX_T_EXITFN_ON_FAIL_IF_NOT("SPX_EXITFN_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#else +#define SPX_EXITFN_HR(hr) __SPX_EXITFN_HR(hr) +#define SPX_EXITFN_HR_IF(hr, cond) __SPX_EXITFN_HR_IF(hr, cond) +#define SPX_EXITFN_ON_FAIL(hr) __SPX_EXITFN_ON_FAIL(hr) +#define SPX_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) __SPX_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) +#endif + +#define SPX_IFTRUE_EXITFN_WHR(cond, hr) SPX_EXITFN_HR_IF(hr, cond) +#define SPX_IFFALSE_EXITFN_WHR(cond, hr) SPX_EXITFN_HR_IF(hr, !(cond)) +#define SPX_IFFAILED_EXITFN_WHR(hr) SPX_EXITFN_ON_FAIL(hr) +#define SPX_IFFAILED_EXITFN_WHR_IFNOT(hr, hrNot) SPX_EXITFN_ON_FAIL_IF_NOT(hr, hrNot) + +#define SPX_IFTRUE_EXITFN_CLEANUP(cond, expr) AZAC_IFTRUE_EXITFN_CLEANUP(cond, expr) +#define SPX_IFFALSE_EXITFN_CLEANUP(cond, expr) AZAC_IFFALSE_EXITFN_CLEANUP(cond, expr) + +#if defined(SPX_CONFIG_DBG_TRACE_ASSERT) && (defined(DEBUG) || defined(_DEBUG)) +#define SPX_DBG_ASSERT(expr) __SPX_DBG_ASSERT("SPX_ASSERT: ", __FILE__, __LINE__, expr) +#define SPX_DBG_ASSERT_WITH_MESSAGE(expr, ...) __SPX_DBG_ASSERT_WITH_MESSAGE("SPX_ASSERT: ", __FILE__, __LINE__, expr, ##__VA_ARGS__) +#else +#define SPX_DBG_ASSERT(expr) +#define SPX_DBG_ASSERT_WITH_MESSAGE(expr, ...) +#endif + +#if defined(SPX_CONFIG_DBG_TRACE_VERIFY) && (defined(DEBUG) || defined(_DEBUG)) +#define SPX_DBG_VERIFY(expr) __SPX_DBG_VERIFY("SPX_VERIFY: ", __FILE__, __LINE__, expr) +#define SPX_DBG_VERIFY_WITH_MESSAGE(expr, ...) __SPX_DBG_VERIFY_WITH_MESSAGE("SPX_VERIFY: ", __FILE__, __LINE__, expr, ##__VA_ARGS__) +#else +#define SPX_DBG_VERIFY(expr) (expr) +#define SPX_DBG_VERIFY_WITH_MESSAGE(expr, ...) (expr) +#endif + +#define SPX_IFTRUE(cond, expr) AZAC_IFTRUE(cond, expr) +#define SPX_IFFALSE(cond, expr) AZAC_IFFALSE(cond, expr) + +#ifdef __cplusplus + +#ifdef SPX_CONFIG_TRACE_SCOPE +#define SPX_TRACE_SCOPE(x, y) __SPX_TRACE_SCOPE("SPX_TRACE_SCOPE_ENTER: ", __FILE__, __LINE__, "SPX_TRACE_SCOPE_EXIT: ", x, y) +#else +#define SPX_TRACE_SCOPE(x, y) +#endif + +#ifdef SPX_CONFIG_DBG_TRACE_SCOPE +#define SPX_DBG_TRACE_SCOPE(x, y) __SPX_TRACE_SCOPE("SPX_DBG_TRACE_SCOPE_ENTER: ", __FILE__, __LINE__, "SPX_DBG_TRACE_SCOPE_EXIT: ", x, y) +#else +#define SPX_DBG_TRACE_SCOPE(x, y) +#endif + +#ifdef SPX_CONFIG_TRACE_THROW_ON_FAIL +#define SPX_THROW_ON_FAIL(hr) __SPX_T_THROW_ON_FAIL("SPX_THROW_ON_FAIL: ", __FILE__, __LINE__, hr) +#define SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) __SPX_T_THROW_ON_FAIL_IF_NOT("SPX_THROW_ON_FAIL: ", __FILE__, __LINE__, hr, hrNot) +#define SPX_THROW_HR_IF(hr, cond) __SPX_T_THROW_HR_IF("SPX_THROW_HR_IF: ", __FILE__, __LINE__, hr, cond) +#define SPX_THROW_HR(hr) __SPX_T_THROW_HR("SPX_THROW_HR: ", __FILE__, __LINE__, hr) +#else +#define SPX_THROW_ON_FAIL(hr) __SPX_THROW_ON_FAIL(hr) +#define SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) __SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) +#define SPX_THROW_HR_IF(hr, cond) __SPX_THROW_HR_IF(hr, cond) +#define SPX_THROW_HR(hr) __SPX_THROW_HR(hr) +#endif + +#define SPX_IFFAILED_THROW_HR(hr) SPX_THROW_ON_FAIL(hr) +#define SPX_IFFAILED_THROW_HR_IFNOT(hr, hrNot) SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) + +#else // __cplusplus + +#define SPX_TRACE_SCOPE(x, y) static_assert(false) +#define SPX_DBG_TRACE_SCOPE(x, y) static_assert(false) +#define SPX_THROW_ON_FAIL(hr) static_assert(false) +#define SPX_THROW_ON_FAIL_IF_NOT(hr, hrNot) static_assert(false) +#define SPX_THROW_HR_IF(hr, cond) static_assert(false) +#define SPX_THROW_HR(hr) static_assert(false) +#define SPX_IFFAILED_THROW_HR(hr) static_assert(false) +#define SPX_IFFAILED_THROW_HR_IFNOT(hr, hrNot) static_assert(false) + +#endif // __cplusplus diff --git a/third_party/azure_speech_sdk/include/c_api/spxerror.h b/third_party/azure_speech_sdk/include/c_api/spxerror.h new file mode 100644 index 0000000..1d266fd --- /dev/null +++ b/third_party/azure_speech_sdk/include/c_api/spxerror.h @@ -0,0 +1,455 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include // must include after spxdebug.h or speechapi*.h (can NOT be included before) + +#define SPXHR AZACHR +#define SPX_NOERROR AZAC_ERR_NONE +#define SPX_INIT_HR(hr) AZAC_INIT_HR(hr) +#define SPX_SUCCEEDED(x) AZAC_SUCCEEDED(x) +#define SPX_FAILED(x) AZAC_FAILED(x) +#define __SPX_ERRCODE_FAILED(x) __AZAC_ERRCODE_FAILED(x) + +/// +/// The function is not implemented. +/// +#define SPXERR_NOT_IMPL \ + AZAC_ERR_NOT_IMPL + +/// +/// The object has not been properly initialized. +/// +#define SPXERR_UNINITIALIZED \ + AZAC_ERR_UNINITIALIZED + +/// +/// The object has already been initialized. +/// +#define SPXERR_ALREADY_INITIALIZED \ + AZAC_ERR_ALREADY_INITIALIZED + +/// +/// An unhandled exception was detected. +/// +#define SPXERR_UNHANDLED_EXCEPTION \ + AZAC_ERR_UNHANDLED_EXCEPTION + +/// +/// The object or property was not found. +/// +#define SPXERR_NOT_FOUND \ + AZAC_ERR_NOT_FOUND + +/// +/// One or more arguments are not valid. +/// +#define SPXERR_INVALID_ARG \ + AZAC_ERR_INVALID_ARG + +/// +/// The specified timeout value has elapsed. +/// +#define SPXERR_TIMEOUT \ + AZAC_ERR_TIMEOUT + +/// +/// The asynchronous operation is already in progress. +/// +#define SPXERR_ALREADY_IN_PROGRESS \ + AZAC_ERR_ALREADY_IN_PROGRESS + +/// +/// The attempt to open the file failed. +/// +#define SPXERR_FILE_OPEN_FAILED \ + AZAC_ERR_FILE_OPEN_FAILED + +/// +/// The end of the file was reached unexpectedly. +/// +#define SPXERR_UNEXPECTED_EOF \ + AZAC_ERR_UNEXPECTED_EOF + +/// +/// Invalid audio header encountered. +/// +#define SPXERR_INVALID_HEADER \ + AZAC_ERR_INVALID_HEADER + +/// +/// The requested operation cannot be performed while audio is pumping +/// +#define SPXERR_AUDIO_IS_PUMPING \ + AZAC_ERR_AUDIO_IS_PUMPING + +/// +/// Unsupported audio format. +/// +#define SPXERR_UNSUPPORTED_FORMAT \ + AZAC_ERR_UNSUPPORTED_FORMAT + +/// +/// Operation aborted. +/// +#define SPXERR_ABORT \ + AZAC_ERR_ABORT + +/// +/// Microphone is not available. +/// +#define SPXERR_MIC_NOT_AVAILABLE \ + AZAC_ERR_MIC_NOT_AVAILABLE + +/// +/// An invalid state was encountered. +/// +#define SPXERR_INVALID_STATE \ + AZAC_ERR_INVALID_STATE + +/// +/// Attempting to create a UUID failed. +/// +#define SPXERR_UUID_CREATE_FAILED \ + AZAC_ERR_UUID_CREATE_FAILED + +/// +/// An unexpected session state transition was encountered when setting the session audio format. +/// +/// +/// Valid transitions are: +/// * WaitForPumpSetFormatStart --> ProcessingAudio (at the beginning of stream) +/// * StoppingPump --> WaitForAdapterCompletedSetFormatStop (at the end of stream) +/// * ProcessingAudio --> WaitForAdapterCompletedSetFormatStop (when the stream runs out of data) +/// All other state transitions are invalid. +/// +#define SPXERR_SETFORMAT_UNEXPECTED_STATE_TRANSITION \ + AZAC_ERR_SETFORMAT_UNEXPECTED_STATE_TRANSITION + +/// +/// An unexpected session state was encountered in while processing audio. +/// +/// +/// Valid states to encounter are: +/// * ProcessingAudio: We're allowed to process audio while in this state. +/// * StoppingPump: We're allowed to be called to process audio, but we'll ignore the data passed in while we're attempting to stop the pump. +/// All other states are invalid while processing audio. +/// +#define SPXERR_PROCESS_AUDIO_INVALID_STATE \ + AZAC_ERR_PROCESS_AUDIO_INVALID_STATE + +/// +/// An unexpected state transition was encountered while attempting to start recognizing. +/// +/// +/// A valid transition is: +/// * Idle --> WaitForPumpSetFormatStart +/// All other state transitions are invalid when attempting to start recognizing +/// +#define SPXERR_START_RECOGNIZING_INVALID_STATE_TRANSITION \ + AZAC_ERR_START_RECOGNIZING_INVALID_STATE_TRANSITION + +/// +/// An unexpected error was encountered when trying to create an internal object. +/// +#define SPXERR_UNEXPECTED_CREATE_OBJECT_FAILURE \ + AZAC_ERR_UNEXPECTED_CREATE_OBJECT_FAILURE + +/// +/// An error in the audio-capturing system. +/// +#define SPXERR_MIC_ERROR \ + AZAC_ERR_MIC_ERROR + +/// +/// The requested operation cannot be performed; there is no audio input. +/// +#define SPXERR_NO_AUDIO_INPUT \ + AZAC_ERR_NO_AUDIO_INPUT + +/// +/// An unexpected error was encountered when trying to access the USP site. +/// +#define SPXERR_UNEXPECTED_USP_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_USP_SITE_FAILURE + +/// +/// An unexpected error was encountered when trying to access the LuAdapterSite site. +/// +#define SPXERR_UNEXPECTED_LU_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_LU_SITE_FAILURE + +/// +/// The buffer is too small. +/// +#define SPXERR_BUFFER_TOO_SMALL \ + AZAC_ERR_BUFFER_TOO_SMALL + +/// +/// A method failed to allocate memory. +/// +#define SPXERR_OUT_OF_MEMORY \ + AZAC_ERR_OUT_OF_MEMORY + +/// +/// An unexpected runtime error occurred. +/// +#define SPXERR_RUNTIME_ERROR \ + AZAC_ERR_RUNTIME_ERROR + +/// +/// The url specified is invalid. +/// +#define SPXERR_INVALID_URL \ + AZAC_ERR_INVALID_URL + +/// +/// The region specified is invalid or missing. +/// +#define SPXERR_INVALID_REGION \ + AZAC_ERR_INVALID_REGION + +/// +/// Switch between single shot and continuous recognition is not supported. +/// +#define SPXERR_SWITCH_MODE_NOT_ALLOWED \ + AZAC_ERR_SWITCH_MODE_NOT_ALLOWED + +/// +/// Changing connection status is not supported in the current recognition state. +/// +#define SPXERR_CHANGE_CONNECTION_STATUS_NOT_ALLOWED \ + AZAC_ERR_CHANGE_CONNECTION_STATUS_NOT_ALLOWED + +/// +/// Explicit connection management is not supported by the specified recognizer. +/// +#define SPXERR_EXPLICIT_CONNECTION_NOT_SUPPORTED_BY_RECOGNIZER \ + AZAC_ERR_EXPLICIT_CONNECTION_NOT_SUPPORTED_BY_RECOGNIZER + +/// +/// The handle is invalid. +/// +#define SPXERR_INVALID_HANDLE \ + AZAC_ERR_INVALID_HANDLE + +/// +/// The recognizer is invalid. +/// +#define SPXERR_INVALID_RECOGNIZER \ + AZAC_ERR_INVALID_RECOGNIZER + +/// +/// The value is out of range. +/// Added in version 1.3.0. +/// +#define SPXERR_OUT_OF_RANGE \ + AZAC_ERR_OUT_OF_RANGE + +/// +/// Extension library not found. +/// Added in version 1.3.0. +/// +#define SPXERR_EXTENSION_LIBRARY_NOT_FOUND \ + AZAC_ERR_EXTENSION_LIBRARY_NOT_FOUND + +/// +/// An unexpected error was encountered when trying to access the TTS engine site. +/// Added in version 1.4.0. +/// +#define SPXERR_UNEXPECTED_TTS_ENGINE_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_TTS_ENGINE_SITE_FAILURE + +/// +/// An unexpected error was encountered when trying to access the audio output stream. +/// Added in version 1.4.0. +/// +#define SPXERR_UNEXPECTED_AUDIO_OUTPUT_FAILURE \ + AZAC_ERR_UNEXPECTED_AUDIO_OUTPUT_FAILURE + +/// +/// Gstreamer internal error. +/// Added in version 1.4.0. +/// +#define SPXERR_GSTREAMER_INTERNAL_ERROR \ + AZAC_ERR_GSTREAMER_INTERNAL_ERROR + +/// +/// Compressed container format not supported. +/// Added in version 1.4.0. +/// +#define SPXERR_CONTAINER_FORMAT_NOT_SUPPORTED_ERROR \ + AZAC_ERR_CONTAINER_FORMAT_NOT_SUPPORTED_ERROR + +/// +/// Codec extension or gstreamer not found. +/// Added in version 1.4.0. +/// +#define SPXERR_GSTREAMER_NOT_FOUND_ERROR \ + AZAC_ERR_GSTREAMER_NOT_FOUND_ERROR + +/// +/// The language specified is missing. +/// Added in version 1.5.0. +/// +#define SPXERR_INVALID_LANGUAGE \ + AZAC_ERR_INVALID_LANGUAGE + +/// +/// The API is not applicable. +/// Added in version 1.5.0. +/// +#define SPXERR_UNSUPPORTED_API_ERROR \ + AZAC_ERR_UNSUPPORTED_API_ERROR + +/// +/// The ring buffer is unavailable. +/// Added in version 1.8.0. +/// +#define SPXERR_RINGBUFFER_DATA_UNAVAILABLE \ + AZAC_ERR_RINGBUFFER_DATA_UNAVAILABLE + +/// +/// An unexpected error was encountered when trying to access the Conversation site. +/// Added in version 1.5.0. +/// +#define SPXERR_UNEXPECTED_CONVERSATION_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_CONVERSATION_SITE_FAILURE + +/// +/// An unexpected error was encountered when trying to access the Conversation site. +/// Added in version 1.8.0. +/// +#define SPXERR_UNEXPECTED_CONVERSATION_TRANSLATOR_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_CONVERSATION_TRANSLATOR_SITE_FAILURE + +/// +/// An asynchronous operation was canceled before it was executed. +/// Added in version 1.8.0. +/// +#define SPXERR_CANCELED \ + AZAC_ERR_CANCELED + +/// +/// Codec for compression could not be initialized. +/// Added in version 1.10.0. +/// +#define SPXERR_COMPRESS_AUDIO_CODEC_INITIFAILED \ + AZAC_ERR_COMPRESS_AUDIO_CODEC_INITIFAILED + +/// +/// Data not available. +/// Added in version 1.10.0. +/// +#define SPXERR_DATA_NOT_AVAILABLE \ + AZAC_ERR_DATA_NOT_AVAILABLE + +/// +/// Invalid result reason. +/// Added in version 1.12.0 +/// +#define SPXERR_INVALID_RESULT_REASON \ + AZAC_ERR_INVALID_RESULT_REASON + +/// +/// An unexpected error was encountered when trying to access the RNN-T site. +/// +#define SPXERR_UNEXPECTED_RNNT_SITE_FAILURE \ + AZAC_ERR_UNEXPECTED_RNNT_SITE_FAILURE + +/// +/// Sending of a network message failed. +/// +#define SPXERR_NETWORK_SEND_FAILED \ + AZAC_ERR_NETWORK_SEND_FAILED + +/// +/// Audio extension library not found. +/// Added in version 1.16.0. +/// +#define SPXERR_AUDIO_SYS_LIBRARY_NOT_FOUND \ + AZAC_ERR_AUDIO_SYS_LIBRARY_NOT_FOUND + +/// +/// An error in the audio-rendering system. +/// Added in version 1.20.0 +/// +#define SPXERR_LOUDSPEAKER_ERROR \ + AZAC_ERR_LOUDSPEAKER_ERROR + +/// +/// An unexpected error was encountered when trying to access the Vision site. +/// Added in version 1.15.0. +/// +#define SPXERR_VISION_SITE_FAILURE \ + AZAC_ERR_VISION_SITE_FAILURE + +/// +/// Stream number provided was invalid in the current context. +/// Added in version 1.15.0. +/// +#define SPXERR_MEDIA_INVALID_STREAM \ + AZAC_ERR_MEDIA_INVALID_STREAM + +/// +/// Offset required is invalid in the current context. +/// Added in version 1.15.0. +/// +#define SPXERR_MEDIA_INVALID_OFFSET \ + AZAC_ERR_MEDIA_INVALID_OFFSET + +/// +/// No more data is available in source. +/// Added in version 1.15.0. +/// +#define SPXERR_MEDIA_NO_MORE_DATA \ + AZAC_ERR_MEDIA_NO_MORE_DATA + +/// +/// Source has not been started. +/// Added in version 1.15.0. +/// +#define SPXERR_MEDIA_NOT_STARTED \ + AZAC_ERR_MEDIA_NOT_STARTED + +/// +/// Source has already been started. +/// Added in version 1.15.0. +/// +#define SPXERR_MEDIA_ALREADY_STARTED \ + AZAC_ERR_MEDIA_ALREADY_STARTED + +/// +/// Media device creation failed. +/// Added in version 1.18.0. +/// +#define SPXERR_MEDIA_DEVICE_CREATION_FAILED \ + AZAC_ERR_MEDIA_DEVICE_CREATION_FAILED + +/// +/// No devices of the selected category are available. +/// Added in version 1.18.0. +/// +#define SPXERR_MEDIA_NO_DEVICE_AVAILABLE \ + AZAC_ERR_MEDIA_NO_DEVICE_AVAILABLE + +/// +/// Enabled Voice Activity Detection while using keyword recognition is not allowed. +/// +#define SPXERR_VAD_CANNOT_BE_USED_WITH_KEYWORD_RECOGNIZER \ + AZAC_ERR_VAD_CANNOT_BE_USED_WITH_KEYWORD_RECOGNIZER + +/// +/// The specified RecoEngineAdapter could not be created. +/// +#define SPXERR_COULD_NOT_CREATE_ENGINE_ADAPTER \ + AZAC_ERR_COULD_NOT_CREATE_ENGINE_ADAPTER + +/// +/// MAS extension library not found. +/// +#define SPXERR_MAS_LIBRARY_NOT_FOUND \ + AZAC_ERR_MAS_LIBRARY_NOT_FOUND diff --git a/third_party/azure_speech_sdk/include/cxx_api/CMakeLists.txt b/third_party/azure_speech_sdk/include/cxx_api/CMakeLists.txt new file mode 100644 index 0000000..c5ee7ae --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.19) + +project(cxx_headers) + +set(SRC_DIR "${PROJECT_SOURCE_DIR}") +add_library(${PROJECT_NAME} INTERFACE ${SPEECH_CXX_API_HEADERS}) +target_include_directories(${PROJECT_NAME} INTERFACE ${PROJECT_SOURCE_DIR}) +target_link_libraries(${PROJECT_NAME} INTERFACE c_headers) +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER api) diff --git a/third_party/azure_speech_sdk/include/cxx_api/azac_api_cxx_common.h b/third_party/azure_speech_sdk/include/cxx_api/azac_api_cxx_common.h new file mode 100644 index 0000000..31e221d --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/azac_api_cxx_common.h @@ -0,0 +1,80 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/azai/license202106 for the full license information. +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#define AZAC_DISABLE_COPY_AND_MOVE(T) \ + /** \brief Disable copy constructor */ \ + T(const T&) = delete; \ + /** \brief Disable copy assignment */ \ + T& operator=(const T&) = delete; \ + /** \brief Disable move constructor */ \ + T(T&&) = delete; \ + /** \brief Disable move assignment */ \ + T& operator=(T&&) = delete + +#define AZAC_DISABLE_DEFAULT_CTORS(T) \ + /** \brief Disable default constructor */ \ + T() = delete; \ + AZAC_DISABLE_COPY_AND_MOVE(T) + +#if defined(__GNUG__) && defined(__linux__) && !defined(ANDROID) && !defined(__ANDROID__) +#include +#define SHOULD_HANDLE_FORCED_UNWIND 1 +#endif + +/*! \cond INTERNAL */ + +namespace Azure { +namespace AI { +namespace Core { +namespace _detail { + +template +class ProtectedAccess : public T +{ +public: + + static AZAC_HANDLE HandleFromPtr(T* ptr) { + if (ptr == nullptr) + { + return nullptr; + } + auto access = static_cast(ptr); + return (AZAC_HANDLE)(*access); + } + + static AZAC_HANDLE HandleFromConstPtr(const T* ptr) { + if (ptr == nullptr) + { + return nullptr; + } + auto access = static_cast(ptr); + return (AZAC_HANDLE)(*access); + } + + template + static std::shared_ptr FromHandle(AZAC_HANDLE handle, Args... extras) { + return T::FromHandle(handle, extras...); + } + + template + static std::shared_ptr Create(Args&&... args) { + return T::Create(std::forward(args)...); + } + +}; + +} } } } // Azure::AI::Core::Details + +/*! \endcond */ diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx.h new file mode 100644 index 0000000..44fe059 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx.h @@ -0,0 +1,117 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx.h: Master include header for public C++ API declarations +// + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_config.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_config.h new file mode 100644 index 0000000..abfd623 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_config.h @@ -0,0 +1,338 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_audio_config.h: Public API declarations for AudioConfig C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Audio { + + +/// +/// Represents audio input or output configuration. Audio input can be from a microphone, file, +/// or input stream. Audio output can be to a speaker, audio file output in WAV format, or output +/// stream. +/// +class AudioConfig +{ +public: + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUDIOCONFIGHANDLE() const { return m_haudioConfig.get(); } + + /// + /// Creates an AudioConfig object representing the default microphone on the system. + /// + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromDefaultMicrophoneInput() + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_default_microphone(&haudioConfig)); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the default microphone on the system. + /// + /// Audio processing options. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromDefaultMicrophoneInput(std::shared_ptr audioProcessingOptions) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, audioProcessingOptions == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_default_microphone(&haudioConfig)); + SPX_THROW_ON_FAIL(audio_config_set_audio_processing_options(haudioConfig, static_cast(*audioProcessingOptions.get()))); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing a specific microphone on the system. + /// Added in version 1.3.0. + /// + /// Specifies the device name. Please refer to this page on how to retrieve platform-specific microphone names. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromMicrophoneInput(const SPXSTRING& deviceName) + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_a_microphone(&haudioConfig, Utils::ToUTF8(deviceName).c_str())); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing a specific microphone on the system. + /// + /// Specifies the device name. Please refer to this page on how to retrieve platform-specific microphone names. + /// Audio processing options. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromMicrophoneInput(const SPXSTRING& deviceName, std::shared_ptr audioProcessingOptions) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, audioProcessingOptions == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_a_microphone(&haudioConfig, Utils::ToUTF8(deviceName).c_str())); + SPX_THROW_ON_FAIL(audio_config_set_audio_processing_options(haudioConfig, static_cast(*audioProcessingOptions.get()))); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified file. + /// + /// Specifies the audio input file. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromWavFileInput(const SPXSTRING& fileName) + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_wav_file_name(&haudioConfig, Utils::ToUTF8(fileName).c_str())); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified file. + /// + /// Specifies the audio input file. + /// Audio processing options. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromWavFileInput(const SPXSTRING& fileName, std::shared_ptr audioProcessingOptions) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, audioProcessingOptions == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_wav_file_name(&haudioConfig, Utils::ToUTF8(fileName).c_str())); + SPX_THROW_ON_FAIL(audio_config_set_audio_processing_options(haudioConfig, static_cast(*audioProcessingOptions.get()))); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified stream. + /// + /// Specifies the custom audio input stream. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromStreamInput(std::shared_ptr stream) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, stream == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_stream(&haudioConfig, GetStreamHandle(stream))); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified stream. + /// + /// Specifies the custom audio input stream. + /// Audio processing options. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromStreamInput(std::shared_ptr stream, std::shared_ptr audioProcessingOptions) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, stream == nullptr); + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, audioProcessingOptions == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_input_from_stream(&haudioConfig, GetStreamHandle(stream))); + SPX_THROW_ON_FAIL(audio_config_set_audio_processing_options(haudioConfig, static_cast(*audioProcessingOptions.get()))); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the default audio output device (speaker) on the system. + /// Added in version 1.4.0 + /// + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromDefaultSpeakerOutput() + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_output_from_default_speaker(&haudioConfig)); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing a specific speaker on the system. + /// Added in version 1.14.0. + /// + /// Specifies the device name. Please refer to this page on how to retrieve platform-specific audio device names. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromSpeakerOutput(const SPXSTRING& deviceName) + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_output_from_a_speaker(&haudioConfig, Utils::ToUTF8(deviceName).c_str())); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified file for audio output. + /// Added in version 1.4.0 + /// + /// Specifies the audio output file. The parent directory must already exist. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromWavFileOutput(const SPXSTRING& fileName) + { + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_output_from_wav_file_name(&haudioConfig, Utils::ToUTF8(fileName).c_str())); + + auto config = new AudioConfig(haudioConfig); + return std::shared_ptr(config); + } + + /// + /// Creates an AudioConfig object representing the specified output stream. + /// Added in version 1.4.0 + /// + /// Specifies the custom audio output stream. + /// A shared pointer to the AudioConfig object + static std::shared_ptr FromStreamOutput(std::shared_ptr stream) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, stream == nullptr); + + SPXAUDIOCONFIGHANDLE haudioConfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_create_audio_output_from_stream(&haudioConfig, GetOutputStreamHandle(stream))); + + auto config = new AudioConfig(haudioConfig); + config->m_outputStream = stream; + return std::shared_ptr(config); + } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), Utils::ToUTF8(value).c_str()); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, static_cast(id), nullptr, Utils::ToUTF8(value).c_str()); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + const char* value = property_bag_get_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + + /// + /// Gets a property value by ID. + /// + /// The parameter id. + /// The property value. + SPXSTRING GetProperty(PropertyId id) const + { + const char* value = property_bag_get_string(m_propertybag, static_cast(id), nullptr, ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + + /// + /// Gets an instance of AudioProcessingOptions class which contains the parameters for audio processing used by Speech SDK. + /// + /// A shared pointer to the AudioProcessingOptions object. + std::shared_ptr GetAudioProcessingOptions() const + { + SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_config_get_audio_processing_options(m_haudioConfig, &hoptions)); + + return std::make_shared(hoptions); + } + + /// + /// Destructs the object. + /// + virtual ~AudioConfig() + { + property_bag_release(m_propertybag); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AudioConfig(SPXAUDIOCONFIGHANDLE haudioConfig) + : m_haudioConfig(haudioConfig) + { + SPX_THROW_ON_FAIL(audio_config_get_property_bag(m_haudioConfig, &m_propertybag)); + } + + /// + /// Internal helper method to get the audio stream format handle. + /// + static SPXAUDIOSTREAMHANDLE GetStreamHandle(std::shared_ptr stream) { return (SPXAUDIOSTREAMHANDLE)(*stream.get()); } + + /// + /// Internal helper method to get the audio output stream format handle. + /// + static SPXAUDIOSTREAMHANDLE GetOutputStreamHandle(std::shared_ptr stream) { return (SPXAUDIOSTREAMHANDLE)(*stream.get()); } + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(AudioConfig); + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_haudioConfig; + + /// + /// Internal member variable that holds the properties of the audio config + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + std::shared_ptr m_stream; + std::shared_ptr m_outputStream; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Audio diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_data_stream.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_data_stream.h new file mode 100644 index 0000000..65ea8bc --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_data_stream.h @@ -0,0 +1,248 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_audio_data_stream.h: Public API declarations for AudioDataStream C++ class +// + +#pragma once + +#include + +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +class SpeechSynthesisResult; +class KeywordRecognitionResult; + +/// +/// Represents audio data stream used for operating audio data as a stream. +/// Added in version 1.4.0 +/// +class AudioDataStream : public std::enable_shared_from_this +{ +private: + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_haudioStream; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXAUDIOSTREAMHANDLE hstream) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + audio_data_stream_get_property_bag(hstream, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties associating to the audio data stream. + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Destroy the instance. + /// + ~AudioDataStream() + { + DetachInput(); + } + + /// + /// Creates a memory backed AudioDataStream for the specified audio input file. + /// Added in version 1.14.0 + /// + /// Specifies the audio input file. + /// A shared pointer to AudioDataStream + static std::shared_ptr FromWavFileInput(const SPXSTRING& fileName) + { + SPXAUDIOSTREAMHANDLE hstream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_data_stream_create_from_file(&hstream, Utils::ToUTF8(fileName).c_str())); + + auto stream = new AudioDataStream(hstream); + return std::shared_ptr(stream); + } + + /// + /// Creates a memory backed AudioDataStream from given speech synthesis result. + /// + /// The speech synthesis result. + /// A shared pointer to AudioDataStream + static std::shared_ptr FromResult(std::shared_ptr result); + + /// + /// Obtains the memory backed AudioDataStream associated with a given KeywordRecognition result. + /// + /// The keyword recognition result. + /// An audio stream with the input to the KeywordRecognizer starting from right before the Keyword. + static std::shared_ptr FromResult(std::shared_ptr result); + + /// + /// Get current status of the audio data stream. + /// + /// Current status + StreamStatus GetStatus() + { + Stream_Status status = StreamStatus_Unknown; + SPX_THROW_ON_FAIL(audio_data_stream_get_status(m_haudioStream, &status)); + return (StreamStatus)status; + } + + /// + /// Check whether the stream has enough data to be read. + /// + /// The requested data size in bytes. + /// A bool indicating whether the stream has enough data to be read. + bool CanReadData(uint32_t bytesRequested) + { + return audio_data_stream_can_read_data(m_haudioStream, bytesRequested); + } + + /// + /// Check whether the stream has enough data to be read, starting from the specified position. + /// + /// The position counting from start of the stream. + /// The requested data size in bytes. + /// A bool indicating whether the stream has enough data to be read. + bool CanReadData(uint32_t pos, uint32_t bytesRequested) + { + return audio_data_stream_can_read_data_from_position(m_haudioStream, bytesRequested, pos); + } + + /// + /// Get the available size of the audio data stream. + /// + /// Available size of the audio data stream. + uint32_t GetAvailableSize() + { + return audio_data_stream_get_available_size(m_haudioStream); + } + + /// + /// Reads a chunk of the audio data and fill it to given buffer + /// + /// A buffer to receive read data. + /// Size of the buffer. + /// Size of data filled to the buffer, 0 means end of stream + uint32_t ReadData(uint8_t* buffer, uint32_t bufferSize) + { + uint32_t filledSize = 0; + SPX_THROW_ON_FAIL(audio_data_stream_read(m_haudioStream, buffer, bufferSize, &filledSize)); + + return filledSize; + } + + /// + /// Reads a chunk of the audio data and fill it to given buffer, starting from the specified position. + /// + /// The position counting from start of the stream. + /// A buffer to receive read data. + /// Size of the buffer. + /// Size of data filled to the buffer, 0 means end of stream + uint32_t ReadData(uint32_t pos, uint8_t* buffer, uint32_t bufferSize) + { + uint32_t filledSize = 0; + SPX_THROW_ON_FAIL(audio_data_stream_read_from_position(m_haudioStream, buffer, bufferSize, pos, &filledSize)); + + return filledSize; + } + + /// + /// Save the audio data to a file, synchronously. + /// + /// The file name with full path. + void SaveToWavFile(const SPXSTRING& fileName) + { + SPX_THROW_ON_FAIL(audio_data_stream_save_to_wave_file(m_haudioStream, Utils::ToUTF8(fileName).c_str())); + } + + /// + /// Save the audio data to a file, asynchronously. + /// + /// The file name with full path. + /// An asynchronous operation representing the saving. + std::future SaveToWavFileAsync(const SPXSTRING& fileName) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, fileName]() -> void { + SPX_THROW_ON_FAIL(audio_data_stream_save_to_wave_file(m_haudioStream, Utils::ToUTF8(fileName).c_str())); + }); + + return future; + } + + /// + /// Get current position of the audio data stream. + /// + /// Current position + uint32_t GetPosition() + { + uint32_t position = 0; + SPX_THROW_ON_FAIL(audio_data_stream_get_position(m_haudioStream, &position)); + return position; + } + + /// + /// Set current position of the audio data stream. + /// + /// Position to be set. + void SetPosition(uint32_t pos) + { + SPX_THROW_ON_FAIL(audio_data_stream_set_position(m_haudioStream, pos)); + } + + /// + /// Stops any more data from getting to the stream. + /// + void DetachInput() + { + SPX_THROW_ON_FAIL(audio_data_stream_detach_input(m_haudioStream)); + } + + /// + /// Explicit conversion operator. + /// + /// A handle. + explicit operator SPXAUDIOSTREAMHANDLE() { return m_haudioStream; } + + /// + /// Collection of additional SpeechSynthesisResult properties. + /// + const PropertyCollection& Properties; + +private: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AudioDataStream(SPXAUDIOSTREAMHANDLE haudioStream) : + m_haudioStream(haudioStream), + m_properties(haudioStream), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_processing_options.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_processing_options.h new file mode 100644 index 0000000..de3354b --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_processing_options.h @@ -0,0 +1,358 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_audio_processing_options.h: Public API declarations for AudioProcessingOptions and related C++ classes +// + +#pragma once +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Audio { + +/// +/// Types of preset microphone array geometries. +/// See [Microphone Array Recommendations](/azure/cognitive-services/speech-service/speech-devices-sdk-microphone) for more details. +/// +enum class PresetMicrophoneArrayGeometry +{ + /// + /// Indicates that no geometry specified. Speech SDK will determine the microphone array geometry. + /// + Uninitialized, + /// + /// Indicates a microphone array with one microphone in the center and six microphones evenly spaced + /// in a circle with radius approximately equal to 42.5 mm. + /// + Circular7, + /// + /// Indicates a microphone array with one microphone in the center and three microphones evenly spaced + /// in a circle with radius approximately equal to 42.5 mm. + /// + Circular4, + /// + /// Indicates a microphone array with four linearly placed microphones with 40 mm spacing between them. + /// + Linear4, + /// + /// Indicates a microphone array with two linearly placed microphones with 40 mm spacing between them. + /// + Linear2, + /// + /// Indicates a microphone array with a single microphone. + /// + Mono, + /// + /// Indicates a microphone array with custom geometry. + /// + Custom +}; + +/// +/// Types of microphone arrays. +/// +enum class MicrophoneArrayType +{ + /// + /// Indicates that the microphone array has microphones in a straight line. + /// + Linear, + /// + /// Indicates that the microphone array has microphones in a plane. + /// + Planar +}; + +/// +/// Defines speaker reference channel position in input audio. +/// +enum class SpeakerReferenceChannel +{ + /// + /// Indicates that the input audio does not have a speaker reference channel. + /// + None, + /// + /// Indicates that the last channel in the input audio corresponds to the speaker + /// reference for echo cancellation. + /// + LastChannel +}; + +typedef AudioProcessingOptions_MicrophoneCoordinates MicrophoneCoordinates; + +/// +/// Represents the geometry of a microphone array. +/// +struct MicrophoneArrayGeometry +{ + /// + /// Type of microphone array. + /// + MicrophoneArrayType microphoneArrayType; + /// + /// Start angle for beamforming in degrees. + /// + uint16_t beamformingStartAngle; + /// + /// End angle for beamforming in degrees. + /// + uint16_t beamformingEndAngle; + /// + /// Coordinates of microphones in the microphone array. + /// + std::vector microphoneCoordinates; + + /// + /// Creates a new instance of MicrophoneArrayGeometry. + /// Beamforming start angle is set to zero. + /// Beamforming end angle is set to 180 degrees if microphoneArrayType is Linear, otherwise it is set to 360 degrees. + /// + /// Type of microphone array. + /// Coordinates of microphones in the microphone array. + MicrophoneArrayGeometry(MicrophoneArrayType microphoneArrayType, const std::vector& microphoneCoordinates) + { + this->microphoneArrayType = microphoneArrayType; + this->beamformingStartAngle = 0; + this->beamformingEndAngle = (microphoneArrayType == MicrophoneArrayType::Linear) ? 180 : 360; + this->microphoneCoordinates.resize(microphoneCoordinates.size()); + for (size_t i = 0; i < microphoneCoordinates.size(); i++) + { + this->microphoneCoordinates[i] = microphoneCoordinates[i]; + } + } + + /// + /// Creates a new instance of MicrophoneArrayGeometry. + /// + /// Type of microphone array. + /// Start angle for beamforming in degrees. + /// End angle for beamforming in degrees. + /// Coordinates of microphones in the microphone array. + MicrophoneArrayGeometry(MicrophoneArrayType microphoneArrayType, uint16_t beamformingStartAngle, uint16_t beamformingEndAngle, const std::vector& microphoneCoordinates) + { + this->microphoneArrayType = microphoneArrayType; + this->beamformingStartAngle = beamformingStartAngle; + this->beamformingEndAngle = beamformingEndAngle; + this->microphoneCoordinates.resize(microphoneCoordinates.size()); + for (size_t i = 0; i < microphoneCoordinates.size(); i++) + { + this->microphoneCoordinates[i] = microphoneCoordinates[i]; + } + } +}; + +/// +/// Represents audio processing options used with audio config class. +/// +class AudioProcessingOptions +{ +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// A handle to audio processing options. + explicit AudioProcessingOptions(SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions) + : m_hoptions(hoptions) + { + SPX_THROW_ON_FAIL(audio_processing_options_get_property_bag(m_hoptions, &m_propertybag)); + } + + /// + /// Destructs an instance of the AudioProcessingOptions class. + /// + ~AudioProcessingOptions() = default; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUDIOPROCESSINGOPTIONSHANDLE() const { return m_hoptions.get(); } + + /// + /// Creates a new instance of the AudioProcessingOptions class. + /// + /// Specifies flags to control the audio processing performed by Speech SDK. It is bitwise OR of AUDIO_INPUT_PROCESSING_XXX constants. + /// The newly created AudioProcessingOptions wrapped inside a std::shared_ptr. + /// + /// This function should only be used when the audio input is from a microphone array. + /// On Windows, this function will try to query the microphone array geometry from the audio driver. Audio data is also read from speaker reference channel. + /// On Linux, it assumes that the microphone is a single channel microphone. + /// + static std::shared_ptr Create(int audioProcessingFlags) + { + SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_processing_options_create(&hoptions, audioProcessingFlags)); + + auto options = new AudioProcessingOptions(hoptions); + return std::shared_ptr(options); + } + + /// + /// Creates a new instance of the AudioProcessingOptions class with preset microphone array geometry. + /// + /// Specifies flags to control the audio processing performed by Speech SDK. It is bitwise OR of AUDIO_INPUT_PROCESSING_XXX constants. + /// Specifies the type of microphone array geometry. + /// Specifies the speaker reference channel position in the input audio. + /// The newly created AudioProcessingOptions wrapped inside a std::shared_ptr. + static std::shared_ptr Create(int audioProcessingFlags, PresetMicrophoneArrayGeometry microphoneArrayGeometry, SpeakerReferenceChannel speakerReferenceChannel = SpeakerReferenceChannel::None) + { + SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_processing_options_create_from_preset_microphone_array_geometry(&hoptions, audioProcessingFlags, (AudioProcessingOptions_PresetMicrophoneArrayGeometry)microphoneArrayGeometry, (AudioProcessingOptions_SpeakerReferenceChannel)speakerReferenceChannel)); + + auto options = new AudioProcessingOptions(hoptions); + return std::shared_ptr(options); + } + + /// + /// Creates a new instance of the AudioProcessingOptions class with microphone array geometry. + /// + /// Specifies flags to control the audio processing performed by Speech SDK. It is bitwise OR of AUDIO_INPUT_PROCESSING_XXX constants. + /// Specifies the microphone array geometry. + /// Specifies the speaker reference channel position in the input audio. + /// The newly created AudioProcessingOptions wrapped inside a std::shared_ptr. + static std::shared_ptr Create(int audioProcessingFlags, MicrophoneArrayGeometry microphoneArrayGeometry, SpeakerReferenceChannel speakerReferenceChannel = SpeakerReferenceChannel::None) + { + AudioProcessingOptions_MicrophoneArrayGeometry geometry + { + (AudioProcessingOptions_MicrophoneArrayType)microphoneArrayGeometry.microphoneArrayType, + microphoneArrayGeometry.beamformingStartAngle, + microphoneArrayGeometry.beamformingEndAngle, + (uint16_t)microphoneArrayGeometry.microphoneCoordinates.size(), + microphoneArrayGeometry.microphoneCoordinates.data() + }; + + SPXAUDIOPROCESSINGOPTIONSHANDLE hoptions = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_processing_options_create_from_microphone_array_geometry(&hoptions, audioProcessingFlags, &geometry, (AudioProcessingOptions_SpeakerReferenceChannel)speakerReferenceChannel)); + + auto options = new AudioProcessingOptions(hoptions); + return std::shared_ptr(options); + } + + /// + /// Returns the type of audio processing performed by Speech SDK. + /// + /// Bitwise OR of AUDIO_INPUT_PROCESSING_XXX constant flags indicating the input audio processing performed by Speech SDK. + int GetAudioProcessingFlags() const + { + int audioProcessingFlags; + SPX_THROW_ON_FAIL(audio_processing_options_get_audio_processing_flags(m_hoptions, &audioProcessingFlags)); + return audioProcessingFlags; + } + + /// + /// Returns the microphone array geometry of the microphone used for audio input. + /// + /// A value of type PresetMicrophoneArrayGeometry enum. + PresetMicrophoneArrayGeometry GetPresetMicrophoneArrayGeometry() const + { + PresetMicrophoneArrayGeometry microphoneArrayGeometry = PresetMicrophoneArrayGeometry::Uninitialized; + SPX_THROW_ON_FAIL(audio_processing_options_get_preset_microphone_array_geometry(m_hoptions, (AudioProcessingOptions_PresetMicrophoneArrayGeometry*)µphoneArrayGeometry)); + return microphoneArrayGeometry; + } + + /// + /// Returns the microphone array type of the microphone used for audio input. + /// + /// A value of type MicrophoneArrayType enum. + MicrophoneArrayType GetMicrophoneArrayType() const + { + MicrophoneArrayType microphoneArrayType = MicrophoneArrayType::Linear; + SPX_THROW_ON_FAIL(audio_processing_options_get_microphone_array_type(m_hoptions, (AudioProcessingOptions_MicrophoneArrayType*)µphoneArrayType)); + return microphoneArrayType; + } + + /// + /// Returns the start angle used for beamforming. + /// + /// Beamforming start angle. + uint16_t GetBeamformingStartAngle() const + { + uint16_t startAngle; + SPX_THROW_ON_FAIL(audio_processing_options_get_beamforming_start_angle(m_hoptions, &startAngle)); + return startAngle; + } + + /// + /// Returns the end angle used for beamforming. + /// + /// Beamforming end angle. + uint16_t GetBeamformingEndAngle() const + { + uint16_t endAngle; + SPX_THROW_ON_FAIL(audio_processing_options_get_beamforming_end_angle(m_hoptions, &endAngle)); + return endAngle; + } + + /// + /// Returns the coordinates of microphones in the microphone array used for audio input. + /// + /// A std::vector of MicrophoneCoordinates elements. + std::vector GetMicrophoneCoordinates() const + { + uint16_t microphoneCount; + SPX_THROW_ON_FAIL(audio_processing_options_get_microphone_count(m_hoptions, µphoneCount)); + + std::vector microphoneCoordinates(microphoneCount); + SPX_THROW_ON_FAIL(audio_processing_options_get_microphone_coordinates(m_hoptions, microphoneCoordinates.data(), microphoneCount)); + return microphoneCoordinates; + } + + /// + /// Returns the speaker reference channel position in the audio input. + /// + /// A value of type SpeakerReferenceChannel enum. + SpeakerReferenceChannel GetSpeakerReferenceChannel() const + { + SpeakerReferenceChannel speakerReferenceChannel = SpeakerReferenceChannel::None; + SPX_THROW_ON_FAIL(audio_processing_options_get_speaker_reference_channel(m_hoptions, (AudioProcessingOptions_SpeakerReferenceChannel*)&speakerReferenceChannel)); + return speakerReferenceChannel; + } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), Utils::ToUTF8(value).c_str()); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + const char* value = property_bag_get_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + +private: + + DISABLE_COPY_AND_MOVE(AudioProcessingOptions); + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_hoptions; + + /// + /// Internal member variable that holds the properties of the audio processing options. + /// + SmartHandle m_propertybag; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Audio diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_stream.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_stream.h new file mode 100644 index 0000000..6a3f7d2 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_stream.h @@ -0,0 +1,995 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_audio_stream.h: Public API declarations for AudioInputStream / AudioOutputStream and related C++ classes +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +namespace Dialog { + class ActivityReceivedEventArgs; +} + +namespace Audio { + + + +class PullAudioInputStreamCallback; +class PushAudioInputStream; +class PullAudioInputStream; +class PushAudioOutputStreamCallback; +class PushAudioOutputStream; +class PullAudioOutputStream; + + +/// +/// Represents audio input stream used for custom audio input configurations. +/// +class AudioInputStream +{ +public: + + using ReadCallbackFunction_Type = ::std::function; + using CloseCallbackFunction_Type = ::std::function; + /// Added in version 1.5.0. + using GetPropertyCallbackFunction_Type = std::function; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUDIOSTREAMHANDLE() const { return m_haudioStream.get(); } + + /// + /// Creates a memory backed PushAudioInputStream using the default format (16 kHz, 16 bit, mono PCM). + /// + /// A shared pointer to PushAudioInputStream + static std::shared_ptr CreatePushStream(); + + /// + /// Creates a memory backed PushAudioInputStream with the specified audio format. + /// + /// Audio stream format. + /// A shared pointer to PushAudioInputStream + static std::shared_ptr CreatePushStream(std::shared_ptr format); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read() and Close() methods, using the default format (16 kHz, 16 bit, mono PCM). + /// + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback = nullptr); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read(), Close() and GetProperty() methods + /// Added in version 1.5.0. + /// + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// GetProperty callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback); + + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read() and Close() methods, using the default format (16 kHz, 16 bit, mono PCM). + /// + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback = nullptr); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read(), Close() and GetProperty() methods. + /// Added in version 1.5.0. + /// + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback); + + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback interface for the Read() and Close() methods, using the default format (16 kHz, 16 bit, mono PCM). + /// + /// Shared pointer to PullAudioInputStreamCallback instance. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr callback); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read() and Close() methods. + /// + /// Audio stream format. + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback = nullptr); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read(), Close() and GetProperty() methods. + /// Added in version 1.5.0. + /// + /// Audio stream format. + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback); + + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read() and Close() methods. + /// + /// Audio stream format. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback = nullptr); + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback functions for Read() and Close() methods. + /// Added in version 1.5.0. + /// + /// Audio stream format. + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback); + + + /// + /// Creates a PullAudioInputStream that delegates to the specified callback interface for the Read() and Close() methods, using the specified format. + /// + /// Audio stream format. + /// Shared pointer to PullAudioInputStreamCallback instance. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr CreatePullStream(std::shared_ptr format, std::shared_ptr callback); + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AudioInputStream(SPXAUDIOSTREAMHANDLE haudioStream) : m_haudioStream(haudioStream) { } + + /// + /// Destructor, does nothing. + /// + virtual ~AudioInputStream() {} + + /// + /// Internal helper method to get the default format if the specified format is nullptr. + /// + static std::shared_ptr UseDefaultFormatIfNull(std::shared_ptr format) { return format != nullptr ? format : AudioStreamFormat::GetDefaultInputFormat(); } + + /// + /// Internal helper method to get the audio stream format handle. + /// + static SPXAUDIOSTREAMFORMATHANDLE GetFormatHandle(std::shared_ptr format) { return (SPXAUDIOSTREAMFORMATHANDLE)(*format.get()); } + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_haudioStream; + + protected: + static constexpr size_t m_maxPropertyLen = 1024; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(AudioInputStream); +}; + + +/// +/// Represents memory backed push audio input stream used for custom audio input configurations. +/// +class PushAudioInputStream : public AudioInputStream +{ +public: + + /// + /// Destructor; closes the underlying stream if not already closed. + /// + virtual ~PushAudioInputStream() + { + if (audio_stream_is_handle_valid(m_haudioStream)) + { + CloseStream(); + } + } + + /// + /// Creates a memory backed PushAudioInputStream using the default format (16 kHz, 16 bit, mono PCM). + /// + /// A shared pointer to PushAudioInputStream + static std::shared_ptr Create() + { + return Create(nullptr); + } + + /// + /// Creates a memory backed PushAudioInputStream with the specified audio format. + /// + /// Audio stream format. + /// A shared pointer to PushAudioInputStream + static std::shared_ptr Create(std::shared_ptr format) + { + format = UseDefaultFormatIfNull(format); + + SPXAUDIOSTREAMHANDLE haudioStream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_create_push_audio_input_stream(&haudioStream, GetFormatHandle(format))); + + auto stream = new PushAudioInputStream(haudioStream); + return std::shared_ptr(stream); + } + + /// + /// Writes the audio data specified by making an internal copy of the data. + /// Note: The dataBuffer should not contain any audio header. + /// + /// The pointer to the audio buffer of which this function will make a copy. + /// The size of the buffer. + void Write(uint8_t* dataBuffer, uint32_t size) + { + SPX_THROW_ON_FAIL(push_audio_input_stream_write(m_haudioStream, dataBuffer, size)); + } + + /// + /// Set value of a property. The properties of the audio data should be set before writing the audio data. + /// Added in version 1.5.0. + /// + /// The id of property. See + /// value to set + void SetProperty(PropertyId id, const SPXSTRING& value) + { + SPX_THROW_ON_FAIL(push_audio_input_stream_set_property_by_id(m_haudioStream, static_cast(id), Utils::ToUTF8(value).c_str())); + } + + /// + /// Set value of a property. The properties of the audio data should be set before writing the audio data. + /// Added in version 1.5.0. + /// + /// The name of property. + /// value to set + void SetProperty(const SPXSTRING& propertyName, const SPXSTRING& value) + { + SPX_THROW_ON_FAIL(push_audio_input_stream_set_property_by_name(m_haudioStream, Utils::ToUTF8(propertyName.c_str()), Utils::ToUTF8(value.c_str()))); + } + + /// + /// Closes the stream. + /// + void Close() { SPX_THROW_ON_FAIL(CloseStream()); } + + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PushAudioInputStream(SPXAUDIOSTREAMHANDLE haudioStream) : AudioInputStream(haudioStream) { } + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PushAudioInputStream); + + SPXHR CloseStream() { return push_audio_input_stream_close(m_haudioStream); } +}; + + +/// +/// An interface that defines callback methods for an audio input stream. +/// +/// +/// Derive from this class and implement its function to provide your own +/// data as an audio input stream. +/// +class PullAudioInputStreamCallback +{ +public: + + /// + /// Destructor, does nothing. + /// + virtual ~PullAudioInputStreamCallback() {} + + /// + /// This function is called to synchronously get data from the audio stream. + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// The pointer to the buffer to which to copy the audio data. + /// The size of the buffer. + /// The number of bytes copied into the buffer, or zero to indicate end of stream + virtual int Read(uint8_t* dataBuffer, uint32_t size) = 0; + + /// + /// This function is called to synchronously to get meta information associated to stream data, such as TimeStamp or UserId . + /// Added in version 1.5.0. + /// + /// The id of the property. + /// The value of the property. + virtual SPXSTRING GetProperty(PropertyId id) + { + if (PropertyId::DataBuffer_TimeStamp == id) + { + return ""; + } + else if (PropertyId::DataBuffer_UserId == id) + { + return ""; + } + else + { + return ""; + } + } + + /// + /// This function is called to close the audio stream. + /// + /// + virtual void Close() = 0; + +protected: + + /*! \cond PROTECTED */ + + /// + /// Constructor, does nothing. + /// + PullAudioInputStreamCallback() {}; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PullAudioInputStreamCallback); +}; + + +/// +/// Pull audio input stream class. +/// +class PullAudioInputStream : public AudioInputStream +{ +public: + + /// + /// Creates a PullAudioInputStream utilizing the specified Read() and Close() "C" callback functions pointers + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback = nullptr) + { + return Create(nullptr, pvContext, readCallback, closeCallback); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read(), Close() and GetProperty() "C" callback functions pointers + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// Added in version 1.5.0. + /// + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback) + { + return Create(nullptr, pvContext, readCallback, closeCallback, getPropertyCallback); + } + + /// Creates a PullAudioInputStream utilizing the specified Read() and Close() callback functions. + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback = nullptr) + { + return Create(nullptr, readCallback, closeCallback); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read(), Close() and GetProperty() callback functions. + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// Added in version 1.5.0. + /// + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback) + { + return Create(nullptr, readCallback, closeCallback, getPropertyCallback); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Close() callback function. + /// + /// Shared pointer to PullAudioInputStreamCallback instance. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr callback) + { + return Create(nullptr, callback); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read() and Close() "C" callback functions pointers + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// Audio stream format. + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback = nullptr) + { + return Create(format, + [=](uint8_t* buffer, uint32_t size) -> int { return readCallback(pvContext, buffer, size); }, + [=]() { if (closeCallback != nullptr) { closeCallback(pvContext); } }); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read(), Close() and GetProperty() "C" callback functions pointers + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// Audio stream format. + /// Context pointer to use when invoking the callbacks. + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback) + { + return Create(format, + [=](uint8_t* buffer, uint32_t size) -> int { return readCallback(pvContext, buffer, size); }, + [=]() { if (closeCallback != nullptr) { closeCallback(pvContext); } }, + [=](PropertyId id) -> SPXSTRING + { + uint8_t result[m_maxPropertyLen]; + getPropertyCallback(pvContext, static_cast(id), result, m_maxPropertyLen); + return reinterpret_cast(result); + }); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read() and Close() callback functions. + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// + /// Audio stream format. + /// Read callback. + /// Close callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback = nullptr) + { + auto wrapper = std::make_shared(readCallback, closeCallback); + return Create(format, wrapper); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read(), Close() and GetProperty() callback functions. + /// Note: The dataBuffer returned by Read() should not contain any audio header. + /// Added in version 1.5.0. + /// + /// Audio stream format. + /// Read callback. + /// Close callback. + /// Get property callback. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback) + { + auto wrapper = std::make_shared(readCallback, closeCallback, getPropertyCallback); + return Create(format, wrapper); + } + + /// + /// Creates a PullAudioInputStream utilizing the specified Read() and Close() callback functions. + /// + /// Audio stream format. + /// Shared pointer to PullAudioInputStreamCallback instance. + /// A shared pointer to PullAudioInputStream + static std::shared_ptr Create(std::shared_ptr format, std::shared_ptr callback) + { + format = UseDefaultFormatIfNull(format); + + SPXAUDIOSTREAMHANDLE haudioStream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_create_pull_audio_input_stream(&haudioStream, GetFormatHandle(format))); + + auto stream = new PullAudioInputStream(haudioStream); + SPX_THROW_ON_FAIL(pull_audio_input_stream_set_callbacks(haudioStream, stream, ReadCallbackWrapper, CloseCallbackWrapper)); + SPX_THROW_ON_FAIL(pull_audio_input_stream_set_getproperty_callback(haudioStream, stream, GetPropertyCallbackWrapper)); + + stream->m_callback = callback; + + return std::shared_ptr(stream); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PullAudioInputStream(SPXAUDIOSTREAMHANDLE haudioStream) : AudioInputStream(haudioStream) { } + + class FunctionCallbackWrapper : public PullAudioInputStreamCallback + { + public: + + FunctionCallbackWrapper(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback = nullptr) : + m_readCallback(readCallback), + m_closeCallback(closeCallback), + m_getPropertyCallback(getPropertyCallback) + { + }; + + /// Note: The dataBuffer returned by Read() should not contain any audio header. + int Read(uint8_t* dataBuffer, uint32_t size) override { return m_readCallback(dataBuffer, size); } + void Close() override { if (m_closeCallback != nullptr) m_closeCallback(); }; + SPXSTRING GetProperty(PropertyId id) override + { + if (m_getPropertyCallback != nullptr) + { + return m_getPropertyCallback(id); + } + else + { + return ""; + } + } + + private: + + DISABLE_COPY_AND_MOVE(FunctionCallbackWrapper); + + ReadCallbackFunction_Type m_readCallback; + CloseCallbackFunction_Type m_closeCallback; + GetPropertyCallbackFunction_Type m_getPropertyCallback; + + }; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PullAudioInputStream); + + static int ReadCallbackWrapper(void* pvContext, uint8_t* dataBuffer, uint32_t size) + { + PullAudioInputStream* ptr = (PullAudioInputStream*)pvContext; + return ptr->m_callback->Read(dataBuffer, size); + } + + static void CloseCallbackWrapper(void* pvContext) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + PullAudioInputStream* ptr = (PullAudioInputStream*)pvContext; + ptr->m_callback->Close(); + } + + static void GetPropertyCallbackWrapper(void *pvContext, int id, uint8_t* result, uint32_t size) + { + PullAudioInputStream* ptr = (PullAudioInputStream*)pvContext; + auto value = ptr->m_callback->GetProperty(static_cast(id)); + auto valueSize = value.size() + 1; + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, valueSize > size); + std::memcpy(result, value.c_str(), valueSize); + } + + std::shared_ptr m_callback; +}; + + +inline std::shared_ptr AudioInputStream::CreatePushStream() +{ + return PushAudioInputStream::Create(); +} + +inline std::shared_ptr AudioInputStream::CreatePushStream(std::shared_ptr format) +{ + return PushAudioInputStream::Create(format); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback) +{ + return PullAudioInputStream::Create(pvContext, readCallback, closeCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback) +{ + return PullAudioInputStream::Create(pvContext, readCallback, closeCallback, getPropertyCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback) +{ + return PullAudioInputStream::Create(readCallback, closeCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback) +{ + return PullAudioInputStream::Create(readCallback, closeCallback, getPropertyCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr callback) +{ + return PullAudioInputStream::Create(callback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback) +{ + return PullAudioInputStream::Create(format, pvContext, readCallback, closeCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr format, void* pvContext, CUSTOM_AUDIO_PULL_STREAM_READ_CALLBACK readCallback, CUSTOM_AUDIO_PULL_STREAM_CLOSE_CALLBACK closeCallback, CUSTOM_AUDIO_PULL_STREAM_GET_PROPERTY_CALLBACK getPropertyCallback) +{ + return PullAudioInputStream::Create(format, pvContext, readCallback, closeCallback, getPropertyCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback) +{ + return PullAudioInputStream::Create(format, readCallback, closeCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr format, ReadCallbackFunction_Type readCallback, CloseCallbackFunction_Type closeCallback, GetPropertyCallbackFunction_Type getPropertyCallback) +{ + return PullAudioInputStream::Create(format, readCallback, closeCallback, getPropertyCallback); +} + +inline std::shared_ptr AudioInputStream::CreatePullStream(std::shared_ptr format, std::shared_ptr callback) +{ + return PullAudioInputStream::Create(format, callback); +} + + +/// +/// Represents audio output stream used for custom audio output configurations. +/// Updated in version 1.7.0 +/// +class AudioOutputStream +{ +public: + + using WriteCallbackFunction_Type = ::std::function; + using CloseCallbackFunction_Type = ::std::function; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUDIOSTREAMHANDLE() const { return m_haudioStream.get(); } + + /// + /// Creates a memory backed PullAudioOutputStream. + /// + /// A shared pointer to PullAudioOutputStream + static std::shared_ptr CreatePullStream(); + + /// + /// Creates a PushAudioOutputStream that delegates to the specified callback functions for Write() and Close() methods. + /// + /// Context pointer to use when invoking the callbacks. + /// Write callback. + /// Close callback. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr CreatePushStream(void* pvContext, CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK writeCallback, CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK closeCallback = nullptr); + + /// + /// Creates a PushAudioOutputStream that delegates to the specified callback functions for Write() and Close() methods. + /// + /// Write callback. + /// Close callback. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr CreatePushStream(WriteCallbackFunction_Type writeCallback, CloseCallbackFunction_Type closeCallback = nullptr); + + /// + /// Creates a PushAudioOutputStream that delegates to the specified callback interface for Write() and Close() methods. + /// + /// Shared pointer to PushAudioOutputStreamCallback instance. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr CreatePushStream(std::shared_ptr callback); + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AudioOutputStream(SPXAUDIOSTREAMHANDLE haudioStream) : m_haudioStream(haudioStream) { } + + /// + /// Destructor, does nothing. + /// + virtual ~AudioOutputStream() {} + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_haudioStream; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(AudioOutputStream); +}; + + +/// +/// Represents memory backed pull audio output stream used for custom audio output. +/// Updated in version 1.7.0 +/// +class PullAudioOutputStream : public AudioOutputStream +{ +public: + friend class Dialog::ActivityReceivedEventArgs; + + /// + /// Creates a memory backed PullAudioOutputStream. + /// + /// A shared pointer to PullAudioOutputStream + static std::shared_ptr Create() + { + SPXAUDIOSTREAMHANDLE haudioStream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_create_pull_audio_output_stream(&haudioStream)); + + auto stream = new PullAudioOutputStream(haudioStream); + return std::shared_ptr(stream); + } + + /// + /// Reads a chunk of the audio data and fill it to given buffer + /// + /// A buffer to receive read data. + /// Size of the buffer. + /// Size of data filled to the buffer, 0 means end of stream + inline uint32_t Read(uint8_t* buffer, uint32_t bufferSize) + { + uint32_t filledSize = 0; + SPX_THROW_ON_FAIL(pull_audio_output_stream_read(m_haudioStream, buffer, bufferSize, &filledSize)); + + return filledSize; + } + + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PullAudioOutputStream(SPXAUDIOSTREAMHANDLE haudioStream) : AudioOutputStream(haudioStream) { } + + /*! \endcond */ + + +private: + + template + static std::shared_ptr SpxAllocSharedBuffer(size_t sizeInBytes) + { + auto ptr = reinterpret_cast(new uint8_t[sizeInBytes]); + auto deleter = [](T* p) { delete[] reinterpret_cast(p); }; + + std::shared_ptr buffer(ptr, deleter); + return buffer; + } + + +private: + + DISABLE_COPY_AND_MOVE(PullAudioOutputStream); + + std::vector m_audioData; +}; + + +/// +/// An interface that defines callback methods for an audio output stream. +/// Updated in version 1.7.0 +/// +/// +/// Derive from this class and implement its function to provide your own +/// data as an audio output stream. +/// +class PushAudioOutputStreamCallback +{ +public: + + /// + /// Destructor, does nothing. + /// + virtual ~PushAudioOutputStreamCallback() {} + + /// + /// This function is called to synchronously put data to the audio stream. + /// + /// The pointer to the buffer from which to consume the audio data. + /// The size of the buffer. + /// The number of bytes consumed from the buffer + virtual int Write(uint8_t* dataBuffer, uint32_t size) = 0; + + /// + /// This function is called to close the audio stream. + /// + /// + virtual void Close() = 0; + +protected: + + /*! \cond PROTECTED */ + + /// + /// Constructor, does nothing. + /// + PushAudioOutputStreamCallback() {}; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PushAudioOutputStreamCallback); +}; + + +/// +/// Push audio output stream class. +/// Added in version 1.4.0 +/// +class PushAudioOutputStream : public AudioOutputStream +{ +public: + + /// + /// Creates a PushAudioOutputStream utilizing the specified Write() and Close() "C" callback functions pointers + /// + /// Context pointer to use when invoking the callbacks. + /// Write callback. + /// Close callback. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr Create(void* pvContext, CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK writeCallback, CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK closeCallback = nullptr) + { + return Create( + [=](uint8_t* buffer, uint32_t size) -> int { return writeCallback(pvContext, buffer, size); }, + [=]() { if (closeCallback != nullptr) { closeCallback(pvContext); } }); + } + + /// + /// Creates a PushAudioOutputStream utilizing the specified Write() and Close() callback functions. + /// + /// Write callback. + /// Close callback. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr Create(WriteCallbackFunction_Type writeCallback, CloseCallbackFunction_Type closeCallback = nullptr) + { + auto wrapper = std::make_shared(writeCallback, closeCallback); + return Create(wrapper); + } + + /// + /// Creates a PushAudioOutputStream utilizing the specified callback interface with Write() and Close() callback function. + /// + /// Shared pointer to PushAudioOutputStreamCallback instance. + /// A shared pointer to PushAudioOutputStream + static std::shared_ptr Create(std::shared_ptr callback) + { + SPXAUDIOSTREAMHANDLE haudioStream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_create_push_audio_output_stream(&haudioStream)); + + auto stream = new PushAudioOutputStream(haudioStream); + SPX_THROW_ON_FAIL(push_audio_output_stream_set_callbacks(haudioStream, stream, WriteCallbackWrapper, CloseCallbackWrapper)); + stream->m_callback = callback; + + return std::shared_ptr(stream); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PushAudioOutputStream(SPXAUDIOSTREAMHANDLE haudioStream) : AudioOutputStream(haudioStream) { } + + class FunctionCallbackWrapper : public PushAudioOutputStreamCallback + { + public: + + FunctionCallbackWrapper(WriteCallbackFunction_Type writeCallback, CloseCallbackFunction_Type closeCallback) : + m_writeCallback(writeCallback), + m_closeCallback(closeCallback) + { + }; + + int Write(uint8_t* dataBuffer, uint32_t size) override { return m_writeCallback(dataBuffer, size); } + void Close() override { if (m_closeCallback != nullptr) m_closeCallback(); }; + + private: + + DISABLE_COPY_AND_MOVE(FunctionCallbackWrapper); + + WriteCallbackFunction_Type m_writeCallback; + CloseCallbackFunction_Type m_closeCallback; + }; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PushAudioOutputStream); + + static int WriteCallbackWrapper(void* pvContext, uint8_t* dataBuffer, uint32_t size) + { + PushAudioOutputStream* ptr = (PushAudioOutputStream*)pvContext; + return ptr->m_callback->Write(dataBuffer, size); + } + + static void CloseCallbackWrapper(void* pvContext) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + PushAudioOutputStream* ptr = (PushAudioOutputStream*)pvContext; + ptr->m_callback->Close(); + } + + std::shared_ptr m_callback; +}; + + +inline std::shared_ptr AudioOutputStream::CreatePullStream() +{ + return PullAudioOutputStream::Create(); +} + +inline std::shared_ptr AudioOutputStream::CreatePushStream(void* pvContext, CUSTOM_AUDIO_PUSH_STREAM_WRITE_CALLBACK writeCallback, CUSTOM_AUDIO_PUSH_STREAM_CLOSE_CALLBACK closeCallback) +{ + return PushAudioOutputStream::Create(pvContext, writeCallback, closeCallback); +} + +inline std::shared_ptr AudioOutputStream::CreatePushStream(WriteCallbackFunction_Type writeCallback, CloseCallbackFunction_Type closeCallback) +{ + return PushAudioOutputStream::Create(writeCallback, closeCallback); +} + +inline std::shared_ptr AudioOutputStream::CreatePushStream(std::shared_ptr callback) +{ + return PushAudioOutputStream::Create(callback); +} + + +} } } } // Microsoft::CognitiveServices::Speech::Audio diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_stream_format.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_stream_format.h new file mode 100644 index 0000000..ea2156b --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_audio_stream_format.h @@ -0,0 +1,215 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_audio_stream_format.h: Public API declarations for AudioStreamFormat and related C++ classes +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Audio { + +/// +/// Defines supported audio stream container format. +/// Changed in version 1.4.0. +/// +enum class AudioStreamContainerFormat +{ + /// + /// Stream ContainerFormat definition for OGG OPUS. + /// + OGG_OPUS = 0x101, + + /// + /// Stream ContainerFormat definition for MP3. + /// + MP3 = 0x102, + + /// + /// Stream ContainerFormat definition for FLAC. Added in version 1.7.0. + /// + FLAC = 0x103, + + /// + /// Stream ContainerFormat definition for ALAW. Added in version 1.7.0. + /// + ALAW = 0x104, + + /// + /// Stream ContainerFormat definition for MULAW. Added in version 1.7.0. + /// + MULAW = 0x105, + + /// + /// Stream ContainerFormat definition for AMRNB. Currently not supported. + /// + AMRNB = 0x106, + + /// + /// Stream ContainerFormat definition for AMRWB. Currently not supported. + /// + AMRWB = 0x107, + + /// + /// Stream ContainerFormat definition for any other or unknown format. + /// + ANY = 0x108 +}; + +/// +/// Represents the format specified inside WAV container. +/// +enum class AudioStreamWaveFormat +{ + /// + /// AudioStreamWaveFormat definition for PCM (pulse-code modulated) data in integer format. + /// + PCM = 0x0001, + + /// + /// AudioStreamWaveFormat definition A-law-encoded format. + /// + ALAW = 0x0006, + + /// + /// AudioStreamWaveFormat definition for Mu-law-encoded format. + /// + MULAW = 0x0007, + + /// + /// AudioStreamWaveFormat definition for G.722-encoded format. + /// + G722 = 0x028F +}; + +/// +/// Class to represent the audio stream format used for custom audio input configurations. +/// Updated in version 1.5.0. +/// +class AudioStreamFormat +{ +public: + + /// + /// Destructor, does nothing. + /// + virtual ~AudioStreamFormat() {} + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUDIOSTREAMFORMATHANDLE() const { return m_hformat.get(); } + + /// + /// Creates an audio stream format object representing the default audio stream format (16 kHz, 16 bit, mono PCM). + /// + /// A shared pointer to AudioStreamFormat + static std::shared_ptr GetDefaultInputFormat() + { + SPXAUDIOSTREAMFORMATHANDLE hformat = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_format_create_from_default_input(&hformat)); + + auto format = new AudioStreamFormat(hformat); + return std::shared_ptr(format); + } + + /// + /// Creates an audio stream format object with the specified PCM waveformat characteristics. + /// + /// Samples per second. + /// Bits per sample. + /// Number of channels in the waveform-audio data. + /// The format specified inside the WAV container. + /// A shared pointer to AudioStreamFormat + static std::shared_ptr GetWaveFormat(uint32_t samplesPerSecond, uint8_t bitsPerSample, uint8_t channels, AudioStreamWaveFormat waveFormat) + { + SPXAUDIOSTREAMFORMATHANDLE hformat = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_format_create_from_waveformat(&hformat, samplesPerSecond, bitsPerSample, channels, (Audio_Stream_Wave_Format)waveFormat)); + + auto format = new AudioStreamFormat(hformat); + return std::shared_ptr(format); + } + + /// + /// Creates an audio stream format object with the specified PCM waveformat characteristics. + /// + /// Samples per second. + /// Bits per sample. + /// Number of channels in the waveform-audio data. + /// A shared pointer to AudioStreamFormat + static std::shared_ptr GetWaveFormatPCM(uint32_t samplesPerSecond, uint8_t bitsPerSample = 16, uint8_t channels = 1) + { + SPXAUDIOSTREAMFORMATHANDLE hformat = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_format_create_from_waveformat(&hformat, samplesPerSecond, bitsPerSample, channels, Audio_Stream_Wave_Format::StreamWaveFormat_PCM)); + + auto format = new AudioStreamFormat(hformat); + return std::shared_ptr(format); + } + + /// + /// Creates an audio stream format object representing the default audio stream format (16 kHz, 16 bit, mono PCM). + /// Added in version 1.4.0 + /// + /// A shared pointer to AudioStreamFormat + static std::shared_ptr GetDefaultOutputFormat() + { + SPXAUDIOSTREAMFORMATHANDLE hformat = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_format_create_from_default_output(&hformat)); + + auto format = new AudioStreamFormat(hformat); + return std::shared_ptr(format); + } + + /// + /// Creates an audio stream format object with the specified compressed audio container format, to be used as input format. + /// Support added in 1.4.0. + /// + /// + /// Formats are defined in AudioStreamContainerFormat enum. + /// + /// Compressed format type. + /// A shared pointer to AudioStreamFormat. + static std::shared_ptr GetCompressedFormat(AudioStreamContainerFormat compressedFormat) + { + SPXAUDIOSTREAMFORMATHANDLE hformat = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_stream_format_create_from_compressed_format(&hformat, (Audio_Stream_Container_Format)compressedFormat)); + + auto format = new AudioStreamFormat(hformat); + return std::shared_ptr(format); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AudioStreamFormat(SPXAUDIOSTREAMFORMATHANDLE hformat) : m_hformat(hformat) { } + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(AudioStreamFormat); + + /// + /// Internal member variable that holds the smart handle. + /// + SmartHandle m_hformat; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Audio diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_auto_detect_source_lang_config.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_auto_detect_source_lang_config.h new file mode 100644 index 0000000..e68f3fc --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_auto_detect_source_lang_config.h @@ -0,0 +1,141 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines auto detection source configuration +/// Updated in 1.13.0 +/// +class AutoDetectSourceLanguageConfig +{ +public: + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXAUTODETECTSOURCELANGCONFIGHANDLE() const { return m_hconfig; } + + /// + /// Creates an instance of the AutoDetectSourceLanguageConfig with open range as source languages + /// Note: only , embedded speech translation and multilingual support source language auto detection from open range, + /// for , please use AutoDetectSourceLanguageConfig with specific source languages. + /// Added in 1.13.0 + /// + /// A shared pointer to the new AutoDetectSourceLanguageConfig instance. + static std::shared_ptr FromOpenRange() + { + SPXAUTODETECTSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(create_auto_detect_source_lang_config_from_open_range(&hconfig)); + auto ptr = new AutoDetectSourceLanguageConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the AutoDetectSourceLanguageConfig with source languages + /// + /// The list of source languages. + /// A shared pointer to the new AutoDetectSourceLanguageConfig instance. + static std::shared_ptr FromLanguages(const std::vector& languages) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, languages.empty()); + SPXAUTODETECTSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + std::string languagesStr; + bool isFirst = true; + for (const SPXSTRING& language : languages) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, language.empty()); + if (!isFirst) + { + languagesStr += ","; + } + isFirst = false; + languagesStr += Utils::ToUTF8(language); + } + SPX_THROW_ON_FAIL(create_auto_detect_source_lang_config_from_languages(&hconfig, languagesStr.c_str())); + auto ptr = new AutoDetectSourceLanguageConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the AutoDetectSourceLanguageConfig with a list of source language config + /// + /// The list of source languages config + /// A shared pointer to the new AutoDetectSourceLanguageConfig instance. + static std::shared_ptr FromSourceLanguageConfigs(std::vector> configList) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, configList.empty()); + SPXAUTODETECTSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + bool isFirst = true; + for (const auto& config : configList) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, config == nullptr); + if (isFirst) + { + SPX_THROW_ON_FAIL(create_auto_detect_source_lang_config_from_source_lang_config(&hconfig, Utils::HandleOrInvalid(config))); + isFirst = false; + } + else + { + SPX_THROW_ON_FAIL(add_source_lang_config_to_auto_detect_source_lang_config(hconfig, Utils::HandleOrInvalid(config))); + } + } + auto ptr = new AutoDetectSourceLanguageConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Destructs the object. + /// + virtual ~AutoDetectSourceLanguageConfig() + { + auto_detect_source_lang_config_release(m_hconfig); + property_bag_release(m_propertybag); + } + +private: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit AutoDetectSourceLanguageConfig(SPXAUTODETECTSOURCELANGCONFIGHANDLE hconfig) + :m_hconfig(hconfig) + { + SPX_THROW_ON_FAIL(auto_detect_source_lang_config_get_property_bag(hconfig, &m_propertybag)); + } + + /// + /// Internal member variable that holds the config + /// + SPXAUTODETECTSOURCELANGCONFIGHANDLE m_hconfig; + + /// + /// Internal member variable that holds the properties of the speech config + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + DISABLE_COPY_AND_MOVE(AutoDetectSourceLanguageConfig); +}; + +}}} + diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_auto_detect_source_lang_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_auto_detect_source_lang_result.h new file mode 100644 index 0000000..538ea58 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_auto_detect_source_lang_result.h @@ -0,0 +1,85 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Contains auto detected source language result +/// Added in 1.8.0 +/// +class AutoDetectSourceLanguageResult +{ +public: + + /// + /// Creates an instance of AutoDetectSourceLanguageResult object for the speech recognition result. + /// + /// The speech recognition result. + /// A shared pointer to AutoDetectSourceLanguageResult. + static std::shared_ptr FromResult(std::shared_ptr result) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, result == nullptr); + auto ptr = new AutoDetectSourceLanguageResult(result); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of AutoDetectSourceLanguageResult object for the speech translation result. + /// + /// The speech translation result. + /// A shared pointer to AutoDetectSourceLanguageResult. + static std::shared_ptr FromResult(std::shared_ptr result) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, result == nullptr); + auto ptr = new AutoDetectSourceLanguageResult(result); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of AutoDetectSourceLanguageResult object for the convesation transcription result. + /// + /// The conversation transcription result. + /// A shared pointer to AutoDetectSourceLanguageResult. + static std::shared_ptr FromResult(std::shared_ptr result) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, result == nullptr); + auto ptr = new AutoDetectSourceLanguageResult(result); + return std::shared_ptr(ptr); + } + + /// + /// The language value + /// If this is empty, it means the system fails to detect the source language automatically + /// + const SPXSTRING Language; + +protected: + + /*! \cond PROTECTED */ + // Using RecognitionResult pointer, so this can cover all classes that inherit from RecognitionResult + AutoDetectSourceLanguageResult(std::shared_ptr result) : + Language(result->Properties.GetProperty(PropertyId::SpeechServiceConnection_AutoDetectSourceLanguageResult)) + { + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(AutoDetectSourceLanguageResult); +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_class_language_model.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_class_language_model.h new file mode 100644 index 0000000..a3099cc --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_class_language_model.h @@ -0,0 +1,70 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_class_language_model.h: Public API declarations for ClassLanguageModel C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Represents a list of grammars for dynamic grammar scenarios. +/// Added in version 1.7.0. +/// +/// +/// ClassLanguageModels are only usable in specific scenarios and are not generally available. +/// +class ClassLanguageModel : public Grammar +{ +public: + + /// + /// Creates a class language model from a storage ID. + /// + /// The persisted storage ID of the language model. + /// The grammar list associated with the recognizer. + /// + /// Creating a ClassLanguageModel from a storage ID is only usable in specific scenarios and is not generally available. + /// + static std::shared_ptr FromStorageId(const SPXSTRING& storageId) + { + SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(class_language_model_from_storage_id(&hgrammar, Utils::ToUTF8(storageId.c_str()))); + + return std::make_shared(hgrammar); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Class Language Model handle. + explicit ClassLanguageModel(SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID) : Grammar(hgrammar) { } + + /// + /// Assigns a grammar to a class in the language mode. + /// + /// Name of the class to assign the grammar to. + /// Grammar to assign. + template + void AssignClass(const SPXSTRING& className, std::shared_ptr grammar) + { + SPX_THROW_ON_FAIL(class_language_model_assign_class(m_hgrammar.get(), Utils::ToUTF8(className.c_str()), (SPXPHRASEHANDLE)(*grammar.get()))); + } + +private: + + DISABLE_COPY_AND_MOVE(ClassLanguageModel); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_common.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_common.h new file mode 100644 index 0000000..2e8d382 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_common.h @@ -0,0 +1,16 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_common.h: Public API declarations for global C++ APIs/namespaces +// + +#pragma once + +#include +#include +#include +#include // must include after spxdebug.h or speechapi*.h (can NOT be included before) + +#define DISABLE_COPY_AND_MOVE(T) AZAC_DISABLE_COPY_AND_MOVE(T) +#define DISABLE_DEFAULT_CTORS(T) AZAC_DISABLE_DEFAULT_CTORS(T) diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection.h new file mode 100644 index 0000000..d4293b7 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection.h @@ -0,0 +1,346 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Connection is a proxy class for managing connection to the speech service of the specified Recognizer. +/// By default, a Recognizer autonomously manages connection to service when needed. +/// The Connection class provides additional methods for users to explicitly open or close a connection and +/// to subscribe to connection status changes. +/// The use of Connection is optional. It is intended for scenarios where fine tuning of application +/// behavior based on connection status is needed. Users can optionally call Open() to manually +/// initiate a service connection before starting recognition on the Recognizer associated with this Connection. +/// After starting a recognition, calling Open() or Close() might fail. This will not impact +/// the Recognizer or the ongoing recognition. Connection might drop for various reasons, the Recognizer will +/// always try to reinstitute the connection as required to guarantee ongoing operations. In all these cases +/// Connected/Disconnected events will indicate the change of the connection status. +/// Updated in version 1.17.0. +/// +class Connection : public std::enable_shared_from_this +{ + +public: + /// + /// Gets the Connection instance from the specified recognizer. + /// + /// The recognizer associated with the connection. + /// The Connection instance of the recognizer. + static std::shared_ptr FromRecognizer(std::shared_ptr recognizer) + { + SPX_INIT_HR(hr); + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, recognizer == nullptr); + + SPXCONNECTIONHANDLE handle = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(hr = ::connection_from_recognizer(recognizer->m_hreco, &handle)); + + return std::make_shared(handle); + } + + /// + /// Gets the Connection instance from the specified conversation translator. + /// + /// The conversation translator associated with the connection. + /// The Connection instance of the conversation translator. + static std::shared_ptr FromConversationTranslator(std::shared_ptr convTrans) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, convTrans == nullptr); + + SPXCONNECTIONHANDLE handle = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::connection_from_conversation_translator(convTrans->m_handle, &handle)); + + return std::make_shared(handle); + } + + /// + /// Gets the Connection instance from the specified dialog service connector, used for observing and managing + /// connection and disconnection from the speech service. + /// + /// The dialog service connector associated with the connection. + /// The Connection instance of the dialog service connector. + static std::shared_ptr FromDialogServiceConnector(std::shared_ptr dialogServiceConnector) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, dialogServiceConnector == nullptr); + + SPXCONNECTIONHANDLE handle = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::connection_from_dialog_service_connector(dialogServiceConnector->m_handle, &handle)); + + return std::make_shared(handle); + } + + /// + /// Gets the Connection instance from the specified speech synthesizer. + /// Added in version 1.17.0 + /// + /// The speech synthesizer associated with the connection. + /// The Connection instance of the speech synthesizer. + static std::shared_ptr FromSpeechSynthesizer(std::shared_ptr synthesizer) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, synthesizer == nullptr); + + SPXCONNECTIONHANDLE handle = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::connection_from_speech_synthesizer(synthesizer->m_hsynth, &handle)); + + return std::make_shared(handle); + } + + /// + /// Starts to set up connection to the service. + /// Users can optionally call Open() to manually set up a connection in advance before starting recognition/synthesis on the + /// Recognizer/Synthesizer associated with this Connection. After starting recognition, calling Open() might fail, depending on + /// the process state of the Recognizer/Synthesizer. But the failure does not affect the state of the associated Recognizer/Synthesizer. + /// Note: On return, the connection might not be ready yet. Please subscribe to the Connected event to + /// be notified when the connection is established. + /// + /// Indicates whether the connection is used for continuous recognition or single-shot recognition. It takes no effect if the connection is from SpeechSynthsizer. + void Open(bool forContinuousRecognition) + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_connectionHandle == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::connection_open(m_connectionHandle, forContinuousRecognition)); + } + + /// + /// Closes the connection the service. + /// Users can optionally call Close() to manually shutdown the connection of the associated Recognizer/Synthesizer. The call + /// might fail, depending on the process state of the Recognizer/Synthesizer. But the failure does not affect the state of the + /// associated Recognizer/Synthesizer. + /// + void Close() + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_connectionHandle == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::connection_close(m_connectionHandle)); + } + + /// + /// Appends a parameter in a message to service. + /// Added in version 1.7.0. + /// + /// the message path. + /// Name of the property. + /// Value of the property. This is a json string. + /// void. + void SetMessageProperty(const SPXSTRING& path, const SPXSTRING& propertyName, const SPXSTRING& propertyValue) + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_connectionHandle == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::connection_set_message_property(m_connectionHandle, Utils::ToUTF8(path).c_str(), Utils::ToUTF8(propertyName).c_str(), Utils::ToUTF8(propertyValue).c_str())); + } + + /// + /// Send a message to the speech service. + /// Added in version 1.7.0. + /// + /// The path of the message. + /// The payload of the message. This is a json string. + /// An empty future. + std::future SendMessageAsync(const SPXSTRING& path, const SPXSTRING& payload) + { + auto keep_alive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keep_alive, this, path, payload]() -> void { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_connectionHandle == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::connection_send_message(m_connectionHandle, Utils::ToUTF8(path.c_str()), Utils::ToUTF8(payload.c_str()))); + }); + return future; + } + + /// + /// Send a binary message to the speech service. + /// This method doesn't work for the connection of SpeechSynthesizer. + /// Added in version 1.10.0. + /// + /// The path of the message. + /// The binary payload of the message. + /// The size of the binary payload. + /// An empty future. + std::future SendMessageAsync(const SPXSTRING& path, uint8_t* payload, uint32_t size) + { + auto keep_alive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keep_alive, this, path, payload, size]() -> void { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_connectionHandle == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::connection_send_message_data(m_connectionHandle, Utils::ToUTF8(path.c_str()), payload, size)); + }); + return future; + } + + /// + /// The Connected event to indicate that the recognizer is connected to service. + /// + EventSignal Connected; + + /// + /// The Disconnected event to indicate that the recognizer is disconnected from service. + /// + EventSignal Disconnected; + + /// + /// The MessageReceived event to indicate that the underlying protocol received a message from the service. + /// Added in version 1.10.0. + /// + EventSignal MessageReceived; + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// The connection handle. + explicit Connection(SPXCONNECTIONHANDLE handle) : + Connected(GetConnectionEventConnectionsChangedCallback(), GetConnectionEventConnectionsChangedCallback()), + Disconnected(GetConnectionEventConnectionsChangedCallback(), GetConnectionEventConnectionsChangedCallback()), + MessageReceived(GetConnectionMessageEventConnectionsChangedCallback(), GetConnectionMessageEventConnectionsChangedCallback()), + m_connectionHandle(handle) + { + SPX_DBG_TRACE_FUNCTION(); + } + + /// + /// Destructor. + /// + ~Connection() + { + SPX_DBG_TRACE_FUNCTION(); + + try + { + Disconnected.DisconnectAll(); + Connected.DisconnectAll(); + } + catch (const std::exception& ex) + { + SPX_TRACE_ERROR("Exception caught in ~Connection(): %s", ex.what()); + (void)ex; + } + catch (...) + { + SPX_TRACE_ERROR("Unknown exception happened during ~Connection()."); + } + + if (m_connectionHandle != SPXHANDLE_INVALID) + { + ::connection_handle_release(m_connectionHandle); + m_connectionHandle = SPXHANDLE_INVALID; + } + } + +private: + DISABLE_COPY_AND_MOVE(Connection); + + SPXCONNECTIONHANDLE m_connectionHandle; + + static void FireConnectionEvent(bool firingConnectedEvent, SPXEVENTHANDLE event, void* context) + { + std::exception_ptr p; + try + { + std::unique_ptr connectionEvent{ new ConnectionEventArgs(event) }; + + auto connection = static_cast(context); + auto keepAlive = connection->shared_from_this(); + if (firingConnectedEvent) + { + connection->Connected.Signal(*connectionEvent.get()); + } + else + { + connection->Disconnected.Signal(*connectionEvent.get()); + } + } + +#ifdef SHOULD_HANDLE_FORCED_UNWIND + // Currently Python forcibly kills the thread by throwing __forced_unwind, + // taking care we propagate this exception further. + catch (abi::__forced_unwind&) + { + SPX_TRACE_ERROR("__forced_unwind exception caught in FireConnectionEvent."); + throw; + } +#endif + catch (...) + { + if (recognizer_event_handle_is_valid(event)) { + recognizer_event_handle_release(event); + } + SPX_TRACE_ERROR("Caught exception in FireConnectionEvent(%s). Will rethrow later.", firingConnectedEvent ? "Connected" : "Disconnected"); + throw; + } + + // ConnectionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(event)); + recognizer_event_handle_release(event); + } + + static void FireEvent_Connected(SPXEVENTHANDLE event, void* context) + { + FireConnectionEvent(true, event, context); + } + + static void FireEvent_Disconnected(SPXEVENTHANDLE event, void* context) + { + FireConnectionEvent(false, event, context); + } + + static void FireEvent_MessageReceived(SPXEVENTHANDLE event, void* context) + { + std::unique_ptr connectionEvent { new ConnectionMessageEventArgs(event) }; + + auto connection = static_cast(context); + auto keepAlive = connection->shared_from_this(); + connection->MessageReceived.Signal(*connectionEvent.get()); + } + + void ConnectionEventConnectionsChanged(const EventSignal& connectionEvent) + { + if (m_connectionHandle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_connectionHandle=0x%8p", __FUNCTION__, (void*)m_connectionHandle); + SPX_DBG_TRACE_VERBOSE_IF(!::connection_handle_is_valid(m_connectionHandle), "%s: m_connectionHandle is INVALID!!!", __FUNCTION__); + + if (&connectionEvent == &Connected) + { + SPX_THROW_ON_FAIL(connection_connected_set_callback(m_connectionHandle, Connected.IsConnected() ? FireEvent_Connected : nullptr, this)); + } + else if (&connectionEvent == &Disconnected) + { + SPX_THROW_ON_FAIL(connection_disconnected_set_callback(m_connectionHandle, Disconnected.IsConnected() ? FireEvent_Disconnected : nullptr, this)); + } + } + } + + void ConnectionMessageEventConnectionsChanged(const EventSignal& connectionEvent) + { + if (m_connectionHandle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_connectionHandle=0x%8p", __FUNCTION__, (void*)m_connectionHandle); + SPX_DBG_TRACE_VERBOSE_IF(!::connection_handle_is_valid(m_connectionHandle), "%s: m_connectionHandle is INVALID!!!", __FUNCTION__); + + if (&connectionEvent == &MessageReceived) + { + SPX_THROW_ON_FAIL(connection_message_received_set_callback(m_connectionHandle, MessageReceived.IsConnected() ? FireEvent_MessageReceived : nullptr, this)); + } + } + } + + inline std::function&)> GetConnectionEventConnectionsChangedCallback() + { + return [=](const EventSignal& connectionEvent) { this->ConnectionEventConnectionsChanged(connectionEvent); }; + } + + inline std::function&)> GetConnectionMessageEventConnectionsChangedCallback() + { + return [=](const EventSignal& connectionEvent) { this->ConnectionMessageEventConnectionsChanged(connectionEvent); }; + } +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection_eventargs.h new file mode 100644 index 0000000..1e56f25 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection_eventargs.h @@ -0,0 +1,68 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Provides data for the ConnectionEvent. +/// Added in version 1.2.0. +/// +class ConnectionEventArgs : public SessionEventArgs +{ +protected: + /*! \cond PRIVATE */ + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXEVENTHANDLE hevent) : + PropertyCollection([=]() + { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + recognizer_connection_event_get_property_bag(hevent, &hpropbag); + return hpropbag; + }()) + {} + }; + + PrivatePropertyCollection m_properties; + /*! \endcond */ + +public: + + /// + /// Constructor. + /// + /// Event handle. + explicit ConnectionEventArgs(SPXEVENTHANDLE hevent) : + SessionEventArgs(hevent), + m_properties(hevent), + Properties(m_properties) + { + }; + + /// + virtual ~ConnectionEventArgs() {} + + /// + /// Collection of additional properties. + /// + const PropertyCollection& Properties; + +private: + + DISABLE_COPY_AND_MOVE(ConnectionEventArgs); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection_message.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection_message.h new file mode 100644 index 0000000..a1a9469 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection_message.h @@ -0,0 +1,152 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_connection_message.h: Public API declarations for ConnectionMessage C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// ConnectionMessage represents implementation specific messages sent to and received from +/// the speech service. These messages are provided for debugging purposes and should not +/// be used for production use cases with the Azure Cognitive Services Speech Service. +/// Messages sent to and received from the Speech Service are subject to change without +/// notice. This includes message contents, headers, payloads, ordering, etc. +/// Added in version 1.10.0. +/// +class ConnectionMessage +{ +private: + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXCONNECTIONMESSAGEHANDLE hcm) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + ::connection_message_get_property_bag(hcm, &hpropbag); + return hpropbag; + }()) + { + } + }; + + SPXCONNECTIONMESSAGEHANDLE m_hcm; + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Constructor. + /// + /// Event handle. + explicit ConnectionMessage(SPXCONNECTIONMESSAGEHANDLE hcm) : + m_hcm(hcm), + m_properties(hcm), + Properties(m_properties) + { + }; + + /// + /// Destructor. + /// + virtual ~ConnectionMessage() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hcm); + SPX_THROW_ON_FAIL(::connection_message_handle_release(m_hcm)); + } + + /// + /// Gets the message path. + /// + /// An std::string containing the message path. + std::string GetPath() const + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hcm == SPXHANDLE_INVALID); + return m_properties.GetProperty("connection.message.path"); + } + + /// + /// Checks to see if the ConnectionMessage is a text message. + /// See also IsBinaryMessage(). + /// + /// A bool indicated if the message payload is text. + bool IsTextMessage() const + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hcm == SPXHANDLE_INVALID); + return m_properties.GetProperty("connection.message.type") == "text"; + } + + /// + /// Checks to see if the ConnectionMessage is a binary message. + /// See also GetBinaryMessage(). + /// + /// A bool indicated if the message payload is binary. + bool IsBinaryMessage() const + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hcm == SPXHANDLE_INVALID); + return m_properties.GetProperty("connection.message.type") == "binary"; + } + + /// + /// Gets the text message payload. Typically the text message content-type is + /// application/json. To determine other content-types use + /// Properties.GetProperty("Content-Type"). + /// + /// An std::string containing the text message. + std::string GetTextMessage() const + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hcm == SPXHANDLE_INVALID); + return m_properties.GetProperty("connection.message.text.message"); + } + + /// + /// Gets the binary message payload. + /// + /// An std::vector containing the binary message. + std::vector GetBinaryMessage() const + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hcm == SPXHANDLE_INVALID); + auto size = ::connection_message_get_data_size(m_hcm); + + std::vector message(size); + SPX_THROW_ON_FAIL(::connection_message_get_data(m_hcm, message.data(), size)); + + return message; + } + + /// + /// A collection of properties and their values defined for this . + /// Message headers can be accessed via this collection (e.g. "Content-Type"). + /// + PropertyCollection& Properties; + +private: + + /*! \cond PRIVATE */ + + DISABLE_COPY_AND_MOVE(ConnectionMessage); + + /*! \endcond */ +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection_message_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection_message_eventargs.h new file mode 100644 index 0000000..3ff6f79 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_connection_message_eventargs.h @@ -0,0 +1,79 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_connection_message_eventargs.h: Public API declarations for ConnectionMessageEventArgs C++ base class +// + +#pragma once +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Provides data for the ConnectionMessageEvent +/// +class ConnectionMessageEventArgs : public EventArgs +{ +private: + + /*! \cond PRIVATE */ + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_message; + + /*! \endcond */ + +public: + + /// + /// Constructor. Creates a new instance using the provided handle. + /// + /// Event handle. + explicit ConnectionMessageEventArgs(SPXEVENTHANDLE hevent) : + m_hevent(hevent), + m_message(std::make_shared(MessageHandleFromEventHandle(hevent))) + { + }; + + /// + /// Destructor. + /// + virtual ~ConnectionMessageEventArgs() + { + SPX_THROW_ON_FAIL(::connection_message_received_event_handle_release(m_hevent)); + } + + /// + /// Gets the associated with this . + /// + /// An `std::shared` containing the message. + std::shared_ptr GetMessage() const { return m_message; } + +private: + + /*! \cond PRIVATE */ + + DISABLE_COPY_AND_MOVE(ConnectionMessageEventArgs); + + SPXCONNECTIONMESSAGEHANDLE MessageHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXCONNECTIONMESSAGEHANDLE hcm = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::connection_message_received_event_get_message(hevent, &hcm)); + return hcm; + } + + /*! \endcond */ + +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation.h new file mode 100644 index 0000000..339f22c --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation.h @@ -0,0 +1,340 @@ + +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation.h: Public API declarations for Conversation C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Class for conversation. +/// Added in version 1.8.0 +/// +class Conversation : public std::enable_shared_from_this +{ +public: + + static constexpr size_t MAX_CONVERSATION_ID_LEN = 1024; + + /// + /// Create a conversation using a speech config and an optional conversation id. + /// + /// A shared smart pointer of a speech config object. + /// Conversation Id. + /// A shared smart pointer of the created conversation object. + static std::future> CreateConversationAsync(std::shared_ptr speechConfig, const SPXSTRING& conversationId = SPXSTRING()) + { + auto future = std::async(std::launch::async, [conversationId, speechConfig]() -> std::shared_ptr { + SPXCONVERSATIONHANDLE hconversation; + SPX_THROW_ON_FAIL(conversation_create_from_config(&hconversation, (SPXSPEECHCONFIGHANDLE)(*speechConfig), Utils::ToUTF8(conversationId).c_str())); + return std::make_shared(hconversation); + }); + return future; + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit Conversation(SPXCONVERSATIONHANDLE hconversation) : + m_hconversation(hconversation), + m_properties(hconversation), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~Conversation() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + ::conversation_release_handle(m_hconversation); + m_hconversation = SPXHANDLE_INVALID; + } + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXCONVERSATIONHANDLE () const { return m_hconversation; } + + /// + /// Get the conversation id. + /// + /// Conversation id. + SPXSTRING GetConversationId() + { + char id[MAX_CONVERSATION_ID_LEN + 1]; + std::memset(id, 0, MAX_CONVERSATION_ID_LEN); + SPX_THROW_ON_FAIL(conversation_get_conversation_id(m_hconversation, id, MAX_CONVERSATION_ID_LEN)); + return id; + } + + /// + /// Add a participant to a conversation using the user's id. + /// + /// Note: The returned participant can be used to remove. If the client changes the participant's attributes, + /// the changed attributes are passed on to the service only when the participant is added again. + /// + /// A user id. + /// a shared smart pointer of the participant. + std::future> AddParticipantAsync(const SPXSTRING& userId) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, userId]() -> std::shared_ptr { + const auto participant = Participant::From(userId); + SPX_THROW_ON_FAIL(conversation_update_participant(m_hconversation, true, (SPXPARTICIPANTHANDLE)(*participant))); + return participant; + }); + return future; + } + + /// + /// Add a participant to a conversation using the User object. + /// + /// A shared smart pointer to a User object. + /// The passed in User object. + std::future> AddParticipantAsync(const std::shared_ptr& user) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, user]() -> std::shared_ptr { + SPX_THROW_ON_FAIL(conversation_update_participant_by_user(m_hconversation, true, (SPXUSERHANDLE)(*user))); + return user; + }); + return future; + } + + /// + /// Add a participant to a conversation using the participant object + /// + /// A shared smart pointer to a participant object. + /// The passed in participant object. + std::future> AddParticipantAsync(const std::shared_ptr& participant) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, participant]() -> std::shared_ptr { + SPX_THROW_ON_FAIL(conversation_update_participant(m_hconversation, true, (SPXPARTICIPANTHANDLE)(*participant))); + return participant; + }); + return future; + } + + /// + /// Remove a participant from a conversation using the participant object + /// + /// A shared smart pointer of a participant object. + /// An empty future. + std::future RemoveParticipantAsync(const std::shared_ptr& participant) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, participant]() -> void { + SPX_THROW_ON_FAIL(conversation_update_participant(m_hconversation, false, (SPXPARTICIPANTHANDLE)(*participant))); + }); + return future; + } + + /// + /// Remove a participant from a conversation using the User object + /// + /// A smart pointer of a User. + /// An empty future. + std::future RemoveParticipantAsync(const std::shared_ptr& user) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, user]() -> void { + SPX_THROW_ON_FAIL(conversation_update_participant_by_user(m_hconversation, false, SPXUSERHANDLE(*user))); + }); + return future; + } + + /// + /// Remove a participant from a conversation using a user id string. + /// + /// A user id. + /// An empty future. + std::future RemoveParticipantAsync(const SPXSTRING& userId) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, userId]() -> void { + SPX_THROW_ON_FAIL(conversation_update_participant_by_user_id(m_hconversation, false, Utils::ToUTF8(userId.c_str()))); + }); + return future; + } + + /// + /// Ends the current conversation. + /// + /// An empty future. + std::future EndConversationAsync() + { + return RunAsync(::conversation_end_conversation); + } + + /// + /// Sets the authorization token that will be used for connecting the server. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Start the conversation. + /// + /// An empty future. + std::future StartConversationAsync() + { + return RunAsync(::conversation_start_conversation); + } + + /// + /// Deletes the conversation. Any participants that are still part of the converation + /// will be ejected after this call. + /// + /// An empty future. + std::future DeleteConversationAsync() + { + return RunAsync(::conversation_delete_conversation); + } + + /// + /// Locks the conversation. After this no new participants will be able to join. + /// + /// An empty future. + std::future LockConversationAsync() + { + return RunAsync(::conversation_lock_conversation); + } + + /// + /// Unlocks the conversation. + /// + /// An empty future. + std::future UnlockConversationAsync() + { + return RunAsync(::conversation_unlock_conversation); + } + + /// + /// Mutes all participants except for the host. This prevents others from generating + /// transcriptions, or sending text messages. + /// + /// An empty future. + std::future MuteAllParticipantsAsync() + { + return RunAsync(::conversation_mute_all_participants); + } + + /// + /// Allows other participants to generate transcriptions, or send text messages. + /// + /// An empty future. + std::future UnmuteAllParticipantsAsync() + { + return RunAsync(::conversation_unmute_all_participants); + } + + /// + /// Mutes a particular participant. This will prevent them generating new transcriptions, + /// or sending text messages. + /// + /// The identifier for the participant. + /// An empty future. + std::future MuteParticipantAsync(const SPXSTRING& participantId) + { + return RunAsync([participantId = Utils::ToUTF8(participantId)](auto handle) + { + return ::conversation_mute_participant(handle, participantId.c_str()); + }); + } + + /// + /// Unmutes a particular participant. + /// + /// The identifier for the participant. + /// An empty future. + std::future UnmuteParticipantAsync(const SPXSTRING& participantId) + { + return RunAsync([participantId = Utils::ToUTF8(participantId)](auto handle) + { + return ::conversation_unmute_participant(handle, participantId.c_str()); + }); + } + +private: + + /*! \cond PRIVATE */ + + SPXCONVERSATIONHANDLE m_hconversation; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXCONVERSATIONHANDLE hconv) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + conversation_get_property_bag(hconv, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + inline std::future RunAsync(std::function func) + { + auto keepalive = this->shared_from_this(); + return std::async(std::launch::async, [keepalive, this, func]() + { + SPX_THROW_ON_FAIL(func(m_hconversation)); + }); + } + + /*! \endcond */ + +public: + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + +}; + +}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_transcriber.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_transcriber.h new file mode 100644 index 0000000..9cfaa52 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_transcriber.h @@ -0,0 +1,509 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation_transcriber.h: Public API declarations for ConversationTranscriber C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +class Session; + +/// +/// Class for ConversationTranscribers. +/// +class ConversationTranscriber final : public Recognizer +{ +public: + /// + /// Create a conversation transcriber from a speech config + /// + /// Speech configuration. + /// A smart pointer wrapped conversation transcriber pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_conversation_transcriber_from_config( + &hreco, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a conversation transcriber from a speech config and audio config. + /// + /// Speech configuration. + /// Audio configuration. + /// A smart pointer wrapped conversation transcriber pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_conversation_transcriber_from_config( + &hreco, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a conversation transcriber from a speech config, auto detection source language config and audio config + /// + /// Speech configuration. + /// Auto detection source language config. + /// Audio configuration. + /// A smart pointer wrapped conversation trasncriber pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_conversation_transcriber_from_auto_detect_source_lang_config( + &hreco, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(autoDetectSourceLangConfig), + Utils::HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a conversation transcriber from a speech config, source language config and audio config + /// + /// Speech configuration. + /// Source language config. + /// Audio configuration. + /// A smart pointer wrapped conversation transcriber pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr sourceLanguageConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_conversation_transcriber_from_source_lang_config( + &hreco, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(sourceLanguageConfig), + Utils::HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a conversation transcriber from a speech config, source language and audio config + /// + /// Speech configuration. + /// Source language. + /// Audio configuration. + /// A smart pointer wrapped conversation transcriber pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + const SPXSTRING& sourceLanguage, + std::shared_ptr audioInput = nullptr) + { + return FromConfig(speechconfig, SourceLanguageConfig::FromLanguage(sourceLanguage), audioInput); + } + + /// + /// Asynchronously starts a conversation transcribing. + /// + /// An empty future. + std::future StartTranscribingAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStartContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async(m_hreco, &m_hasyncStartContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async_wait_for(m_hasyncStartContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStartContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStartContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + /// + /// Asynchronously stops a conversation transcribing. + /// + /// An empty future. + std::future StopTranscribingAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStopContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async(m_hreco, &m_hasyncStopContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async_wait_for(m_hasyncStopContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStopContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStopContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit ConversationTranscriber(SPXRECOHANDLE hreco) throw() : + Recognizer(hreco), + SessionStarted(GetSessionEventConnectionsChangedCallback()), + SessionStopped(GetSessionEventConnectionsChangedCallback()), + SpeechStartDetected(GetRecognitionEventConnectionsChangedCallback()), + SpeechEndDetected(GetRecognitionEventConnectionsChangedCallback()), + Transcribing(GetRecoEventConnectionsChangedCallback()), + Transcribed(GetRecoEventConnectionsChangedCallback()), + Canceled(GetRecoCanceledEventConnectionsChangedCallback()), + m_hasyncStartContinuous(SPXHANDLE_INVALID), + m_hasyncStopContinuous(SPXHANDLE_INVALID), + m_properties(hreco), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~ConversationTranscriber() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + /// + /// Signal for events indicating the start of a recognition session (operation). + /// + EventSignal SessionStarted; + + /// + /// Signal for events indicating the end of a recognition session (operation). + /// + EventSignal SessionStopped; + + /// + /// Signal for events indicating the start of speech. + /// + EventSignal SpeechStartDetected; + + /// + /// Signal for events indicating the end of speech. + /// + EventSignal SpeechEndDetected; + + /// + /// Signal for events containing intermediate recognition results. + /// + EventSignal Transcribing; + + /// + /// Signal for events containing final recognition results. + /// (indicating a successful recognition attempt). + /// + EventSignal Transcribed; + + /// + /// Signal for events containing canceled recognition results + /// (indicating a recognition attempt that was canceled as a result or a direct cancellation request + /// or, alternatively, a transport or protocol failure). + /// + EventSignal Canceled; + + /// + /// Sets the authorization token that will be used for connecting the server. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + protected: + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + virtual void TermRecognizer() override + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // Disconnect the event signals in reverse construction order + Canceled.DisconnectAll(); + Transcribed.DisconnectAll(); + Transcribing.DisconnectAll(); + SpeechEndDetected.DisconnectAll(); + SpeechStartDetected.DisconnectAll(); + SessionStopped.DisconnectAll(); + SessionStarted.DisconnectAll(); + + // Close the async handles we have open for Recognize, StartContinuous, and StopContinuous + for (auto handle : { &m_hasyncStartContinuous, &m_hasyncStopContinuous }) + { + if (*handle != SPXHANDLE_INVALID && ::recognizer_async_handle_is_valid(*handle)) + { + ::recognizer_async_handle_release(*handle); + *handle = SPXHANDLE_INVALID; + } + } + + // Ask the base to term + Recognizer::TermRecognizer(); + } + + void RecoEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Transcribing) + { + recognizer_recognizing_set_callback(m_hreco, Transcribing.IsConnected() ? FireEvent_Transcribing : nullptr, this); + } + else if (&recoEvent == &Transcribed) + { + recognizer_recognized_set_callback(m_hreco, Transcribed.IsConnected() ? FireEvent_Transcribed : nullptr, this); + } + } + } + + static void FireEvent_Transcribing(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new ConversationTranscriptionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Transcribing.Signal(*recoEvent.get()); + } + + static void FireEvent_Transcribed(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new ConversationTranscriptionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Transcribed.Signal(*recoEvent.get()); + } + + void RecoCanceledEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Canceled) + { + recognizer_canceled_set_callback(m_hreco, Canceled.IsConnected() ? FireEvent_Canceled : nullptr, this); + } + } + } + + static void FireEvent_Canceled(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + + auto ptr = new ConversationTranscriptionCanceledEventArgs(hevent); + std::shared_ptr recoEvent(ptr); + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Canceled.Signal(*ptr); + } + + void SessionEventConnectionsChanged(const EventSignal& sessionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&sessionEvent == &SessionStarted) + { + recognizer_session_started_set_callback(m_hreco, SessionStarted.IsConnected() ? FireEvent_SessionStarted : nullptr, this); + } + else if (&sessionEvent == &SessionStopped) + { + recognizer_session_stopped_set_callback(m_hreco, SessionStopped.IsConnected() ? FireEvent_SessionStopped : nullptr, this); + } + } + } + + static void FireEvent_SessionStarted(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent{ new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStarted.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SessionStopped(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent{ new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStopped.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + void RecognitionEventConnectionsChanged(const EventSignal& recognitionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recognitionEvent == &SpeechStartDetected) + { + recognizer_speech_start_detected_set_callback(m_hreco, SpeechStartDetected.IsConnected() ? FireEvent_SpeechStartDetected : nullptr, this); + } + else if (&recognitionEvent == &SpeechEndDetected) + { + recognizer_speech_end_detected_set_callback(m_hreco, SpeechEndDetected.IsConnected() ? FireEvent_SpeechEndDetected : nullptr, this); + } + } + } + + static void FireEvent_SpeechStartDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechStartDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SpeechEndDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechEndDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + /*! \endcond */ + +private: + + SPXASYNCHANDLE m_hasyncStartContinuous; + SPXASYNCHANDLE m_hasyncStopContinuous; + + DISABLE_DEFAULT_CTORS(ConversationTranscriber); + friend class Microsoft::CognitiveServices::Speech::Session; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRECOHANDLE hreco) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + recognizer_get_property_bag(hreco, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + inline std::function&)> GetSessionEventConnectionsChangedCallback() + { + return [=](const EventSignal& sessionEvent) { this->SessionEventConnectionsChanged(sessionEvent); }; + } + + inline std::function&)> GetRecoEventConnectionsChangedCallback() + { + return [=](const EventSignal& recoEvent) { this->RecoEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecoCanceledEventConnectionsChangedCallback() + { + return [=](const EventSignal& recoEvent) { this->RecoCanceledEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecognitionEventConnectionsChangedCallback() + { + return [=](const EventSignal& recoEvent) { this->RecognitionEventConnectionsChanged(recoEvent); }; + } + +public: + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_transcription_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_transcription_eventargs.h new file mode 100644 index 0000000..dd03343 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_transcription_eventargs.h @@ -0,0 +1,165 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation_transcription_eventargs.h: Public API declarations for ConversationTranscriptionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Class for conversation transcriber event arguments. +/// +class ConversationTranscriptionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit ConversationTranscriptionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~ConversationTranscriptionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(recognizer_event_handle_release(m_hevent)); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Conversation transcriber result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Conversation transcriber result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(ConversationTranscriptionEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + + +/// +/// Class for conversation transcriber canceled event arguments. +/// +class ConversationTranscriptionCanceledEventArgs : public ConversationTranscriptionEventArgs +{ +private: + + std::shared_ptr m_cancellation; + CancellationReason m_cancellationReason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit ConversationTranscriptionCanceledEventArgs(SPXEVENTHANDLE hevent) : + ConversationTranscriptionEventArgs(hevent), + m_cancellation(CancellationDetails::FromResult(GetResult())), + m_cancellationReason(m_cancellation->Reason), + m_errorCode(m_cancellation->ErrorCode), + Reason(m_cancellationReason), + ErrorCode(m_errorCode), + ErrorDetails(m_cancellation->ErrorDetails) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + virtual ~ConversationTranscriptionCanceledEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// CancellationDetails. + /// + std::shared_ptr GetCancellationDetails() const { return m_cancellation; } + +private: + + DISABLE_DEFAULT_CTORS(ConversationTranscriptionCanceledEventArgs); +}; +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_transcription_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_transcription_result.h new file mode 100644 index 0000000..c655c8f --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_transcription_result.h @@ -0,0 +1,72 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation_transcription_result.h: Public API declarations for ConversationTranscription C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Represents the result of a conversation transcriber. +/// +class ConversationTranscriptionResult final : public RecognitionResult +{ +public: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Result handle. + explicit ConversationTranscriptionResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult), + SpeakerId(m_speakerId) + { + PopulateSpeakerFields(hresult, &m_speakerId); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s; reason=0x%x; text=%s, speakerid=%s, utteranceid=%s", __FUNCTION__, (void*)this, (void*)Handle, Utils::ToUTF8(ResultId).c_str(), Reason, Utils::ToUTF8(Text).c_str(), Utils::ToUTF8(SpeakerId).c_str()); + } + + /// + /// Destructor. + /// + ~ConversationTranscriptionResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)Handle); + } + + /// + /// Unique Speaker id. + /// + const SPXSTRING& SpeakerId; + +private: + DISABLE_DEFAULT_CTORS(ConversationTranscriptionResult); + + void PopulateSpeakerFields(SPXRESULTHANDLE hresult, SPXSTRING* pspeakerId) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 1024; + char sz[maxCharCount + 1] = {}; + + if (pspeakerId != nullptr && recognizer_result_handle_is_valid(hresult)) + { + SPX_THROW_ON_FAIL(hr = conversation_transcription_result_get_speaker_id(hresult, sz, maxCharCount)); + *pspeakerId = Utils::ToSPXString(sz); + } + } + + SPXSTRING m_speakerId; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_translator.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_translator.h new file mode 100644 index 0000000..d23b53b --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_translator.h @@ -0,0 +1,448 @@ + +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation_translator.h: Public API declarations for ConversationTranslator C++ class +// + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +// Forward decl: facilitates friend use of Connection::FromConversationTranslator +class Connection; + +namespace Transcription { + + /// + /// A conversation translator that enables a connected experience where participants can use their + /// own devices to see everyone else's recognitions and IMs in their own languages. Participants + /// can also speak and send IMs to others. + /// Added in 1.9.0 + /// + class ConversationTranslator : public std::enable_shared_from_this + { + private: + /*! \cond PRIVATE */ + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXCONVERSATIONHANDLE hconvtrans) : + PropertyCollection([hconvtrans]() + { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + conversation_translator_get_property_bag(hconvtrans, &hpropbag); + return hpropbag; + }()) + {} + }; + + SPXCONVERSATIONTRANSLATORHANDLE m_handle; + PrivatePropertyCollection m_properties; + /*! \endcond */ + + public: + /// + /// Creates a conversation translator from an audio config + /// + /// Audio configuration. + /// Smart pointer to conversation translator instance. + static std::shared_ptr FromConfig(std::shared_ptr audioConfig = nullptr) + { + SPXCONVERSATIONTRANSLATORHANDLE handle; + SPX_THROW_ON_FAIL(::conversation_translator_create_from_config( + &handle, + Utils::HandleOrInvalid(audioConfig) + )); + return std::shared_ptr(new ConversationTranslator(handle)); + } + + /// + /// Destructor + /// + virtual ~ConversationTranslator() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // disconnect callbacks in reverse order + TextMessageReceived.DisconnectAll(); + Transcribed.DisconnectAll(); + Transcribing.DisconnectAll(); + ConversationExpiration.DisconnectAll(); + ParticipantsChanged.DisconnectAll(); + Canceled.DisconnectAll(); + SessionStopped.DisconnectAll(); + SessionStarted.DisconnectAll(); + + ::conversation_translator_handle_release(m_handle); + m_handle = SPXHANDLE_INVALID; + } + + /// + /// Signal for events indicating the start of a transcription session (operation). + /// + EventSignal SessionStarted; + + /// + /// Signal for events indicating the end of a transcription session (operation). + /// + EventSignal SessionStopped; + + /// + /// Signal for events containing canceled recognition results + /// (indicating a recognition attempt that was canceled as a result or a direct cancellation request + /// or, alternatively, a transport or protocol failure). + /// + EventSignal Canceled; + + /// + /// Signal for events indicating the conversation participants have changed. + /// + EventSignal ParticipantsChanged; + + /// + /// Signal for event indicating how many minutes are left until a conversation expires. + /// + EventSignal ConversationExpiration; + + /// + /// Signal for events containing intermediate translated conversation transcription results. + /// + EventSignal Transcribing; + + /// + /// Signal for events containing final translated conversation transcription results. + /// (indicating a successful recognition attempt). + /// + EventSignal Transcribed; + + /// + /// Raised when a text message is received from the conversation. + /// + EventSignal TextMessageReceived; + + /// + /// Joins a conversation. After you call this, you will start receiving events. + /// + /// The conversation instance to use. This instance can be used by the + /// host to manage the conversation. + /// The display name to use for the current participant in the conversation. + /// An asynchronous operation. + std::future JoinConversationAsync(std::shared_ptr conversation, const SPXSTRING& nickname) + { + return RunAsync([conversation, nickname](auto handle) + { + return ::conversation_translator_join( + handle, + Utils::HandleOrInvalid(conversation), + Utils::ToUTF8(nickname).c_str()); + }); + } + + /// + /// Joins a conversation. After you call this, you will start receiving events. + /// + /// The identifier of the conversation you want to join. + /// The display name of the current participant in the conversation. + /// The language the participant is using. + /// An asynchronous operation. + std::future JoinConversationAsync(const SPXSTRING& conversationId, const SPXSTRING& nickname, const SPXSTRING& language) + { + return RunAsync([conversationId, nickname, language](auto handle) + { + return ::conversation_translator_join_with_id( + handle, + Utils::ToUTF8(conversationId).c_str(), + Utils::ToUTF8(nickname).c_str(), + Utils::ToUTF8(language).c_str()); + }); + } + + /// + /// Starts sending audio to the conversation service for speech recognition. + /// + /// An asynchronous operation. + std::future StartTranscribingAsync() + { + return RunAsync(::conversation_translator_start_transcribing); + } + + /// + /// Stops sending audio to the conversation service. + /// + /// An asynchronous operation. + std::future StopTranscribingAsync() + { + return RunAsync(::conversation_translator_stop_transcribing); + } + + /// + /// Sends an instant message to all participants in the conversation. This instant message + /// will be translated into each participant's text language. + /// + /// The message to send. + /// An asynchronous operation. + std::future SendTextMessageAsync(const SPXSTRING& message) + { + return RunAsync([message](auto handle) + { + return ::conversation_translator_send_text_message( + handle, + Utils::ToUTF8(message).c_str()); + }); + } + + /// + /// Leaves the current conversation. After this is called, you will no longer receive any events. + /// + /// An asynchronous operation. + std::future LeaveConversationAsync() + { + return RunAsync(::conversation_translator_leave); + } + + /// + /// Sets the Cognitive Speech authorization token that will be used for connecting to the server. + /// + /// The authorization token. + /// The Azure region for this token. + void SetAuthorizationToken(const SPXSTRING& authToken, const SPXSTRING& region) + { + SPX_THROW_ON_FAIL(::conversation_translator_set_authorization_token( + m_handle, + Utils::ToUTF8(authToken).c_str(), + Utils::ToUTF8(region).c_str())); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return m_properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token); + } + + /// + /// Gets your participant identifier + /// + /// Participant ID + SPXSTRING GetParticipantId() + { + return m_properties.GetProperty(PropertyId::Conversation_ParticipantId); + } + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + protected: + explicit ConversationTranslator(SPXCONVERSATIONTRANSLATORHANDLE handle) : + m_handle(handle), + m_properties(handle), + SessionStarted(BindHandler(&ConversationTranslator::OnSessionEventChanged)), + SessionStopped(BindHandler(&ConversationTranslator::OnSessionEventChanged)), + Canceled(BindHandler(&ConversationTranslator::OnCanceledEventChanged)), + ParticipantsChanged(BindHandler(&ConversationTranslator::OnParticipantsEventChanged)), + ConversationExpiration(BindHandler(&ConversationTranslator::OnExpirationEventChanged)), + Transcribing(BindHandler(&ConversationTranslator::OnTranscriptionEventChanged)), + Transcribed(BindHandler(&ConversationTranslator::OnTranscriptionEventChanged)), + TextMessageReceived(BindHandler(&ConversationTranslator::OnTextMessageEventChanged)), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + static inline bool ValidateHandle(SPXCONVERSATIONTRANSLATORHANDLE handle, const char* function) + { + UNUSED(function); // not used in release builds + SPX_DBG_TRACE_VERBOSE("%s: handle=0x%8p", function, (void*)handle); + bool valid = ::conversation_translator_handle_is_valid(handle); + SPX_DBG_TRACE_VERBOSE_IF(!valid, "%s: handle is INVALID!!!", function); + return valid; + } + + void OnSessionEventChanged(const EventSignal& evt) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + + if (&evt == &SessionStarted) + { + if (SessionStarted.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::SessionStarted); }; + } + + conversation_translator_session_started_set_callback(m_handle, callback, this); + } + else if (&evt == &SessionStopped) + { + if (SessionStopped.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::SessionStopped); }; + } + + conversation_translator_session_stopped_set_callback(m_handle, callback, this); + } + } + + void OnCanceledEventChanged(const EventSignal&) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + if (Canceled.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::Canceled); }; + } + + conversation_translator_canceled_set_callback(m_handle, callback, this); + } + + void OnParticipantsEventChanged(const EventSignal&) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + if (ParticipantsChanged.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::ParticipantsChanged); }; + } + + conversation_translator_participants_changed_set_callback(m_handle, callback, this); + } + + void OnExpirationEventChanged(const EventSignal&) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + if (ConversationExpiration.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::ConversationExpiration); }; + } + + conversation_translator_conversation_expiration_set_callback(m_handle, callback, this); + } + + void OnTranscriptionEventChanged(const EventSignal& evt) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + if (&evt == &Transcribing) + { + if (Transcribing.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::Transcribing); }; + } + + conversation_translator_transcribing_set_callback(m_handle, callback, this); + } + else + { + if (Transcribed.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::Transcribed); }; + } + + conversation_translator_transcribed_set_callback(m_handle, callback, this); + } + } + + void OnTextMessageEventChanged(const EventSignal&) + { + if (!ValidateHandle(m_handle, __FUNCTION__)) return; + + PCONV_TRANS_CALLBACK callback = nullptr; + if (TextMessageReceived.IsConnected()) + { + callback = [](auto, auto b, auto c) { FireEvent(b, c, &ConversationTranslator::TextMessageReceived); }; + } + + conversation_translator_text_message_recevied_set_callback(m_handle, callback, this); + } + + private: + /*! \cond PRIVATE */ + + friend class Microsoft::CognitiveServices::Speech::Connection; + + DISABLE_DEFAULT_CTORS(ConversationTranslator); + + inline std::future RunAsync(std::function func) + { + auto keepalive = this->shared_from_this(); + return std::async(std::launch::async, [keepalive, this, func]() + { + SPX_THROW_ON_FAIL(func(m_handle)); + }); + } + + template + inline std::function BindHandler(void (ConversationTranslator::*func)(TArg)) + { + return [this, func](TArg arg) + { + (this->*func)(arg); + }; + } + + static inline void FreeEventHandle(SPXEVENTHANDLE hEvt) + { + if (::conversation_translator_event_handle_is_valid(hEvt)) + { + ::conversation_translator_event_handle_release(hEvt); + } + } + + template + static inline void FireEvent(SPXEVENTHANDLE hEvt, void* pCtxt, EventSignal ConversationTranslator::*pEvent) + { + try + { + auto pThis = static_cast(pCtxt); + SPX_DBG_ASSERT(pThis != nullptr); + auto keepAlive = pThis->shared_from_this(); + + T eventArgs(hEvt); + (pThis->*pEvent).Signal(eventArgs); + + // event classes don't properly release the handles so do that here + FreeEventHandle(hEvt); + } + catch (std::exception& ex) + { + UNUSED(ex); + FreeEventHandle(hEvt); + throw; + } + catch (...) + { + FreeEventHandle(hEvt); + throw; + } + } + + /*! \endcond */ + }; + +}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_translator_events.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_translator_events.h new file mode 100644 index 0000000..0bc817c --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversation_translator_events.h @@ -0,0 +1,262 @@ + +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_conversation_translator_events.h: Public C++ class API declarations for ConversationTranslator related events +// + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + + /// + /// Helper class with additional methods + /// Added in 1.9.0 + /// + class EventHelper + { + protected: + template + static TVal GetValue(THandle hevent, SPXHR(SPXAPI_CALLTYPE * func)(THandle hevent, TVal* ptr)) + { + TVal value; + SPX_THROW_ON_FAIL(func(hevent, &value)); + return value; + } + + template + static SPXSTRING GetStringValue(THandle hevent, SPXHR(SPXAPI_CALLTYPE * func)(THandle hevent, char * psz, uint32_t cch)) + { + const uint32_t maxCharCount = 1024; + char sz[maxCharCount + 1]; + SPX_THROW_ON_FAIL(func(hevent, sz, maxCharCount)); + return Utils::ToSPXString(sz); + } + + template + static SPXSTRING GetStringValue(THandle hevent, SPXHR(SPXAPI_CALLTYPE* func)(THandle hevent, char* psz, uint32_t* pcch)) + { + // query the string length + uint32_t length = 0; + SPX_THROW_ON_FAIL(func(hevent, nullptr, &length)); + + // retrieve the string + std::unique_ptr buffer(new char[length]); + SPX_THROW_ON_FAIL(func(hevent, buffer.get(), &length)); + return Utils::ToSPXString(buffer.get()); + } + }; + + /// + /// Represents the result of a conversation translator recognition, or text message. + /// Added in 1.9.0 + /// + class ConversationTranslationResult : public Translation::TranslationRecognitionResult, public EventHelper + { + private: + SPXSTRING m_participantId; + SPXSTRING m_originalLang; + + public: + explicit ConversationTranslationResult(SPXRESULTHANDLE resultHandle) : + Translation::TranslationRecognitionResult(resultHandle), + m_participantId(GetStringValue(resultHandle, conversation_translator_result_get_user_id)), + m_originalLang(GetStringValue(resultHandle, conversation_translator_result_get_original_lang)), + ParticipantId(m_participantId), + OriginalLanguage(m_originalLang) + { + } + + /// + /// The unique participant identifier + /// + const SPXSTRING& ParticipantId; + + /// + /// Gets the language that the original recognition or text message is in + /// + const SPXSTRING& OriginalLanguage; + + private: + DISABLE_COPY_AND_MOVE(ConversationTranslationResult); + }; + + /// + /// Event arguments for the ConversationExpiration event. + /// Added in 1.9.0 + /// + class ConversationExpirationEventArgs : public SessionEventArgs, public EventHelper + { + private: + std::chrono::minutes m_expirationTime; + + public: + /// + /// Creates a new instance. + /// + /// The event handle. + explicit ConversationExpirationEventArgs(SPXEVENTHANDLE hevent) : + SessionEventArgs(hevent), + m_expirationTime(std::chrono::minutes(GetValue(hevent, conversation_translator_event_get_expiration_time))), + ExpirationTime(m_expirationTime) + { + } + + /// + /// How many minutes are left until the conversation expires + /// + const std::chrono::minutes& ExpirationTime; + + private: + DISABLE_COPY_AND_MOVE(ConversationExpirationEventArgs); + }; + + /// + /// Event arguments for the ParticipantsChanged event. + /// Added in 1.9.0 + /// + class ConversationParticipantsChangedEventArgs : public SessionEventArgs, public EventHelper + { + private: + ParticipantChangedReason m_reason; + std::vector> m_participants; + + public: + /// + /// Creates a new instance. + /// + /// The event handle. + explicit ConversationParticipantsChangedEventArgs(SPXEVENTHANDLE hevent) : + SessionEventArgs(hevent), + m_reason(GetValue(hevent, conversation_translator_event_get_participant_changed_reason)), + m_participants(GetParticipants(hevent)), + Reason(m_reason), + Participants(m_participants) + { + } + + /// + /// Why the participant changed event was raised (e.g. a participant joined) + /// + const ParticipantChangedReason& Reason; + + /// + /// The participant(s) that joined, left, or were updated + /// + const std::vector>& Participants; + + protected: + /*! \cond PROTECTED */ + + std::vector> GetParticipants(SPXEVENTHANDLE hevent) + { + std::vector> list; + + SPXPARTICIPANTHANDLE hparticipant = nullptr; + for (int i = 0; hparticipant != SPXHANDLE_INVALID; i++) + { + SPX_THROW_ON_FAIL(conversation_translator_event_get_participant_changed_at_index(hevent, i, &hparticipant)); + if (hparticipant != SPXHANDLE_INVALID) + { + list.push_back(std::make_shared(hparticipant)); + + // the Participant object correctly frees the handle so we don't need to do anything + // special here + } + } + + return list; + } + + /*! \endcond */ + + private: + DISABLE_COPY_AND_MOVE(ConversationParticipantsChangedEventArgs); + }; + + /// + /// Event arguments for the ConversationTranslator , + /// , or + /// events. + /// Added in 1.9.0 + /// + class ConversationTranslationEventArgs : public RecognitionEventArgs, public EventHelper + { + private: + std::shared_ptr m_result; + + public: + /// + /// Creates a new instance. + /// + /// The event handle returned by the C-API. + explicit ConversationTranslationEventArgs(SPXEVENTHANDLE hevent) + : RecognitionEventArgs(hevent), + m_result(std::make_shared(GetValue(hevent, recognizer_recognition_event_get_result))), + Result(m_result) + { + } + +#if defined(BINDING_OBJECTIVE_C) + private: +#endif + /// + /// Contains the conversation translation result. This could be for a canceled event, + /// a speech recognition, or a received text message. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) + public: +#else + protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Contains the conversation translation result. This could be for a canceled event, + /// a speech recognition, or a received text message. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + + private: + DISABLE_COPY_AND_MOVE(ConversationTranslationEventArgs); + }; + + + /// + /// Event arguments for the conversation translator canceled event. + /// Added in 1.9.0 + /// + class ConversationTranslationCanceledEventArgs : public ConversationTranscriptionCanceledEventArgs + { + public: + /// + /// Creates a new instance. + /// + /// The event handle. + explicit ConversationTranslationCanceledEventArgs(SPXEVENTHANDLE hevent) : + ConversationTranscriptionCanceledEventArgs(hevent) + { } + }; + +}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversational_language_understanding_model.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversational_language_understanding_model.h new file mode 100644 index 0000000..1a2eb54 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_conversational_language_understanding_model.h @@ -0,0 +1,89 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license201809 for the full license information. +// +// speechapi_cxx_conversational_language_understanding_model.h: Public API declarations for PatternMatchingModel C++ class +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + + /// + /// Represents a Conversational Language Understanding used for intent recognition. + /// + class ConversationalLanguageUnderstandingModel : public LanguageUnderstandingModel + { + public: + + /// + /// Creates a Conversational Language Understanding (CLU) model using the specified model ID. + /// + /// The Azure Language resource key. + /// The Azure Language resource endpoint. + /// The Conversational Language Understanding project name. + /// The Conversational Language Understanding deployment name. + /// A shared pointer to the Conversational Language Understanding model. + static std::shared_ptr FromResource(const SPXSTRING& languageResourceKey, const SPXSTRING& endpoint, const SPXSTRING& projectName, const SPXSTRING& deploymentName) + { + return std::shared_ptr { + new ConversationalLanguageUnderstandingModel(languageResourceKey, endpoint, projectName, deploymentName) + }; + } + + /// + /// Returns id for this model. Defaults to projectName-deploymentName. + /// + /// A string representing the id of this model. + SPXSTRING GetModelId() const { return m_modelId; } + + /// + /// Sets the id for this model. Defaults to projectName-deploymentName. + /// + /// A string representing the id of this model. + void SetModelId(SPXSTRING value) { m_modelId = value; } + + /// + /// This is the Azure language resource key to be used with this model. + /// + SPXSTRING languageResourceKey; + + /// + /// Conversational Language Understanding deployment endpoint to contact. + /// + SPXSTRING endpoint; + + /// + /// Conversational Language Understanding project name. + /// + SPXSTRING projectName; + + /// + /// Conversational Language Understanding deployment name. + /// + SPXSTRING deploymentName; + + private: + DISABLE_COPY_AND_MOVE(ConversationalLanguageUnderstandingModel); + + ConversationalLanguageUnderstandingModel(const SPXSTRING& languageResourceKey, const SPXSTRING& endpoint, const SPXSTRING& projectName, const SPXSTRING& deploymentName) : + LanguageUnderstandingModel(LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel), + languageResourceKey(languageResourceKey), + endpoint(endpoint), + projectName(projectName), + deploymentName(deploymentName) + { + m_modelId = projectName + "-" + deploymentName; + } + + SPXSTRING m_modelId; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_dialog_service_config.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_dialog_service_config.h new file mode 100644 index 0000000..641daaf --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_dialog_service_config.h @@ -0,0 +1,268 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +#pragma once + +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Dialog { + +/// +/// Class that defines base configurations for the dialog service connector object. +/// +class DialogServiceConfig +{ +protected: + /*! \cond PROTECTED */ + inline explicit DialogServiceConfig(SPXSPEECHCONFIGHANDLE h_config) : m_config{ h_config } + { + } + SpeechConfig m_config; + /*! \endcond */ + +public: + /// + /// Default destructor. + /// + virtual ~DialogServiceConfig() = default; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXSPEECHCONFIGHANDLE() const { return static_cast(m_config); } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + m_config.SetProperty(name, value); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + m_config.SetProperty(id, value); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + return m_config.GetProperty(name); + } + + /// + /// Gets a property value by ID. + /// + /// The parameter id. + /// The property value. + SPXSTRING GetProperty(PropertyId id) const + { + return m_config.GetProperty(id); + } + + /// + /// Sets a property value that will be passed to service using the specified channel. + /// + /// The property name. + /// The property value. + /// The channel used to pass the specified property to service. + void SetServiceProperty(const SPXSTRING& name, const SPXSTRING& value, ServicePropertyChannel channel) + { + m_config.SetServiceProperty(name, value, channel); + } + + + /// + /// Sets proxy configuration + /// + /// Note: Proxy functionality is not available on macOS. This function will have no effect on this platform. + /// + /// The host name of the proxy server, without the protocol scheme (`http://`) + /// The port number of the proxy server + /// The user name of the proxy server + /// The password of the proxy server + void SetProxy(const SPXSTRING& proxyHostName, uint32_t proxyPort, const SPXSTRING& proxyUserName = SPXSTRING(), const SPXSTRING& proxyPassword = SPXSTRING()) + { + m_config.SetProxy(proxyHostName, proxyPort, proxyUserName, proxyPassword); + } + + /// + /// Set the input language to the connector. + /// + /// Specifies the name of spoken language to be recognized in BCP-47 format. + void SetLanguage(const SPXSTRING& lang) + { + SetProperty(PropertyId::SpeechServiceConnection_RecoLanguage, lang); + } + + /// + /// Gets the input language to the connector. + /// The language is specified in BCP-47 format. + /// + /// The connetor language. + SPXSTRING GetLanguage() const + { + return GetProperty(PropertyId::SpeechServiceConnection_RecoLanguage); + } + +}; + +/// +/// Class that defines configurations for the dialog service connector object for using a Bot Framework backend. +/// +class BotFrameworkConfig final : public DialogServiceConfig +{ +public: + /// + /// Creates a bot framework service config instance with the specified subscription key and region. + /// + /// Subscription key associated with the bot + /// The region name (see the region page). + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromSubscription(const SPXSTRING& subscription, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(bot_framework_config_from_subscription(&h_config, Utils::ToUTF8(subscription).c_str(), Utils::ToUTF8(region).c_str(), nullptr)); + return std::shared_ptr{ new BotFrameworkConfig(h_config) }; + } + + /// + /// Creates a bot framework service config instance with the specified subscription key and region. + /// + /// Subscription key associated with the bot + /// The region name (see the region page). + /// Identifier used to select a bot associated with this subscription. + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromSubscription(const SPXSTRING& subscription, const SPXSTRING& region, const SPXSTRING& bot_Id) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(bot_framework_config_from_subscription(&h_config, Utils::ToUTF8(subscription).c_str(), Utils::ToUTF8(region).c_str(), Utils::ToUTF8(bot_Id).c_str())); + return std::shared_ptr{ new BotFrameworkConfig(h_config) }; + } + + /// + /// Creates a bot framework service config instance with the specified authorization token and region. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// As configuration values are copied when creating a new connector, the new token value will not apply to connectors that have already been created. + /// For connectors that have been created before, you need to set authorization token of the corresponding connector + /// to refresh the token. Otherwise, the connectors will encounter errors during operation. + /// + /// The authorization token. + /// The region name (see the region page). + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromAuthorizationToken(const SPXSTRING& authToken, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(bot_framework_config_from_authorization_token(&h_config, Utils::ToUTF8(authToken).c_str(), Utils::ToUTF8(region).c_str(), nullptr)); + return std::shared_ptr{ new BotFrameworkConfig(h_config) }; + } + + /// + /// Creates a bot framework service config instance with the specified authorization token and region. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// As configuration values are copied when creating a new connector, the new token value will not apply to connectors that have already been created. + /// For connectors that have been created before, you need to set authorization token of the corresponding connector + /// to refresh the token. Otherwise, the connectors will encounter errors during operation. + /// + /// The authorization token. + /// The region name (see the region page). + /// Identifier used to select a bot associated with this subscription. + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromAuthorizationToken(const SPXSTRING& authToken, const SPXSTRING& region, const SPXSTRING& bot_Id) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(bot_framework_config_from_authorization_token(&h_config, Utils::ToUTF8(authToken).c_str(), Utils::ToUTF8(region).c_str(), Utils::ToUTF8(bot_Id).c_str())); + return std::shared_ptr{ new BotFrameworkConfig(h_config) }; + } +private: + inline explicit BotFrameworkConfig(SPXSPEECHCONFIGHANDLE h_config): DialogServiceConfig{ h_config } + { + } +}; + +/// +/// Class that defines configurations for the dialog service connector object for using a CustomCommands backend. +/// +class CustomCommandsConfig: public DialogServiceConfig +{ +public: + /// + /// Creates a Custom Commands config instance with the specified application id, subscription key and region. + /// + /// Custom Commands application id. + /// Subscription key associated with the bot + /// The region name (see the region page). + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromSubscription(const SPXSTRING& appId, const SPXSTRING& subscription, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(custom_commands_config_from_subscription(&h_config, Utils::ToUTF8(appId).c_str(), Utils::ToUTF8(subscription).c_str(), Utils::ToUTF8(region).c_str())); + return std::shared_ptr{ new CustomCommandsConfig(h_config) }; + } + + /// + /// Creates a Custom Commands config instance with the specified application id authorization token and region. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// As configuration values are copied when creating a new connector, the new token value will not apply to connectors that have already been created. + /// For connectors that have been created before, you need to set authorization token of the corresponding connector + /// to refresh the token. Otherwise, the connectors will encounter errors during operation. + /// + /// Custom Commands application id. + /// The authorization token. + /// The region name (see the region page). + /// A shared pointer to the new bot framework config. + inline static std::shared_ptr FromAuthorizationToken(const SPXSTRING& appId, const SPXSTRING& authToken, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE h_config = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(custom_commands_config_from_authorization_token(&h_config, Utils::ToUTF8(appId).c_str(), Utils::ToUTF8(authToken).c_str(), Utils::ToUTF8(region).c_str())); + return std::shared_ptr{ new CustomCommandsConfig(h_config) }; + } + + /// + /// Sets the corresponding backend application identifier. + /// + /// Application identifier. + inline void SetApplicationId(const SPXSTRING& applicationId) + { + SetProperty(PropertyId::Conversation_ApplicationId, applicationId); + } + + /// + /// Gets the application identifier. + /// + /// Speech Channel Secret Key. + inline SPXSTRING GetApplicationId() const + { + return GetProperty(PropertyId::Conversation_ApplicationId); + } + +private: + inline explicit CustomCommandsConfig(SPXSPEECHCONFIGHANDLE h_config): DialogServiceConfig{ h_config } + { + } +}; + +} } } } diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_dialog_service_connector.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_dialog_service_connector.h new file mode 100644 index 0000000..ebb505a --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_dialog_service_connector.h @@ -0,0 +1,547 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_dialog_service_connector.h: Public API declarations for DialogServiceConnector C++ base class +// + +#pragma once +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +// Forward decl: facilities friend use use of Connection::FromDialogServiceConnector +class Connection; + +namespace Dialog { + +/// +/// Object used to connect DirectLineSpeech or CustomCommands. +/// +/// +/// Objects of this type are created via the factory method. +/// +class DialogServiceConnector : public std::enable_shared_from_this, public Utils::NonCopyable, public Utils::NonMovable +{ +public: + /// + /// Destroys the instance. + /// + virtual ~DialogServiceConnector() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // Disconnect the event signals in reverse construction order + TurnStatusReceived.DisconnectAll(); + ActivityReceived.DisconnectAll(); + Canceled.DisconnectAll(); + SpeechEndDetected.DisconnectAll(); + SpeechStartDetected.DisconnectAll(); + SessionStopped.DisconnectAll(); + SessionStarted.DisconnectAll(); + Recognizing.DisconnectAll(); + Recognized.DisconnectAll(); + + if (m_handle != SPXHANDLE_INVALID) + { + ::dialog_service_connector_handle_release(m_handle); + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + m_handle = SPXHANDLE_INVALID; + } + } + + /// + /// Creates a dialog service connector from a and an . + /// Users should use this function to create a dialog service connector. + /// + /// Dialog service config. + /// Audio config. + /// The shared smart pointer of the created dialog service connector. + /// + /// + /// auto audioConfig = Audio::AudioConfig::FromDefaultMicrophoneInput(); + /// auto config = CustomCommandsConfig::FromAuthorizationToken("my_app_id","my_auth_token", "my_region"); + /// auto connector = DialogServiceConnector::FromConfig(config, audioConfig); + /// + /// + /// + /// When speaking of we are referring to one of the classes that inherit from it. + /// The specific class to be used depends on the dialog backend being used: + ///
    + ///
  • for DirectLineSpeech
  • + ///
  • for CustomCommands
  • + ///
+ ///
+ static std::shared_ptr FromConfig(std::shared_ptr connectorConfig, std::shared_ptr audioConfig = nullptr) + { + SPXRECOHANDLE h_connector; + SPX_THROW_ON_FAIL(::dialog_service_connector_create_dialog_service_connector_from_config( + &h_connector, + Utils::HandleOrInvalid(connectorConfig), + Utils::HandleOrInvalid(audioConfig) + )); + return std::shared_ptr { new DialogServiceConnector(h_connector) }; + } + + /// + /// Connects with the back end. + /// + /// An asynchronous operation that starts the connection. + std::future ConnectAsync() + { + auto keep_alive = this->shared_from_this(); + return std::async(std::launch::async, [keep_alive, this]() + { + SPX_THROW_ON_FAIL(::dialog_service_connector_connect(m_handle)); + }); + } + + /// + /// Disconnects from the back end. + /// + /// An asynchronous operation that starts the disconnection. + std::future DisconnectAsync() + { + auto keep_alive = this->shared_from_this(); + return std::async(std::launch::async, [keep_alive, this]() + { + SPX_THROW_ON_FAIL(::dialog_service_connector_disconnect(m_handle)); + }); + } + + /// + /// Sends an activity to the backing dialog. + /// + /// Activity to send + /// An asynchronous operation that starts the operation. + std::future SendActivityAsync(const std::string& activity) + { + auto keep_alive = this->shared_from_this(); + return std::async(std::launch::async, [keep_alive, activity, this]() + { + std::array buffer; + SPX_THROW_ON_FAIL(::dialog_service_connector_send_activity(m_handle, activity.c_str(), buffer.data())); + return std::string{ buffer.data() }; + }); + } + + /// + /// Initiates keyword recognition. + /// + /// Specifies the keyword model to be used. + /// An asynchronous operation that starts the operation. + std::future StartKeywordRecognitionAsync(std::shared_ptr model) + { + auto keep_alive = this->shared_from_this(); + auto h_model = Utils::HandleOrInvalid(model); + return std::async(std::launch::async, [keep_alive, h_model, this]() + { + SPX_THROW_ON_FAIL(dialog_service_connector_start_keyword_recognition(m_handle, h_model)); + }); + } + + /// + /// Stop keyword recognition. + /// + /// An asynchronous operation that starts the operation. + std::future StopKeywordRecognitionAsync() + { + auto keep_alive = this->shared_from_this(); + return std::async(std::launch::async, [keep_alive, this]() + { + SPX_THROW_ON_FAIL(dialog_service_connector_stop_keyword_recognition(m_handle)); + }); + } + + /// + /// Starts a listening session that will terminate after the first utterance. + /// + /// An asynchronous operation that starts the operation. + std::future> ListenOnceAsync() + { + auto keep_alive = this->shared_from_this(); + return std::async(std::launch::async, [keep_alive, this]() + { + SPX_INIT_HR(hr); + + SPXRECOHANDLE h_result = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(dialog_service_connector_listen_once(m_handle, &h_result)); + + return std::make_shared(h_result); + }); + } + + /// + /// Requests that an active listening operation immediately finish, interrupting any ongoing + /// speaking, and provide a result reflecting whatever audio data has been captured so far. + /// + /// A task representing the asynchronous operation that stops an active listening session. + std::future StopListeningAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + // close any unfinished previous attempt + SPX_THROW_ON_FAIL(hr = speechapi_async_handle_release(m_hasyncStopContinuous)); + SPX_EXITFN_ON_FAIL(hr = dialog_service_connector_stop_listening_async(m_handle, &m_hasyncStopContinuous)); + SPX_EXITFN_ON_FAIL(hr = speechapi_async_wait_for(m_hasyncStopContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = speechapi_async_handle_release(m_hasyncStopContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStopContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the connector will encounter errors during its operation. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Sets a JSON template that will be provided to the speech service for the next conversation. The service will + /// attempt to merge this template into all activities sent to the dialog backend, whether originated by the + /// client with SendActivityAsync or generated by the service, as is the case with speech-to-text results. + /// + /// + /// The activity payload, as a JSON string, to be merged into all applicable activity messages. + /// + void SetSpeechActivityTemplate(const SPXSTRING& activityTemplate) + { + Properties.SetProperty(PropertyId::Conversation_Speech_Activity_Template, activityTemplate); + } + + /// + /// Gets the JSON template that will be provided to the speech service for the next conversation. The service will + /// attempt to merge this template into all activities sent to the dialog backend, whether originated by the + /// client with SendActivityAsync or generated by the service, as is the case with speech-to-text results. + /// + /// The JSON activity template currently set that will be used on subsequent requests. + SPXSTRING GetSpeechActivityTemplate() + { + return Properties.GetProperty(PropertyId::Conversation_Speech_Activity_Template, SPXSTRING()); + } + + /// + /// Signal for events containing speech recognition results. + /// + EventSignal Recognized; + + /// + /// Signal for events containing intermediate recognition results. + /// + EventSignal Recognizing; + + /// + /// Signals that indicates the start of a listening session. + /// + EventSignal SessionStarted; + + /// + /// Signal that indicates the end of a listening session. + /// + EventSignal SessionStopped; + + /// + /// Signal that indicates the first detection of speech data in the current phrase. + /// + EventSignal SpeechStartDetected; + + /// + /// Signal that indicates the detected end of the current phrase's speech data. + /// + EventSignal SpeechEndDetected; + + /// + /// Signal for events relating to the cancellation of an interaction. The event indicates if the reason is a direct cancellation or an error. + /// + EventSignal Canceled; + + /// + /// Signals that an activity was received from the backend + /// + EventSignal ActivityReceived; + + /// + /// Signals that a turn status update was received from the backend + /// + EventSignal TurnStatusReceived; + +private: + /*! \cond PROTECTED */ + template + std::function&)> Callback(F f) + { + return [=](const EventSignal& evt) + { + (this->*f)(evt); + }; + } + + static void FireEvent_Recognized(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SpeechRecognitionEventArgs event{ h_event }; + keep_alive->Recognized.Signal(event); + /* Not releasing the handle as SpeechRecognitionEventArgs manages it */ + } + + static void FireEvent_Recognizing(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SpeechRecognitionEventArgs event{ h_event }; + keep_alive->Recognizing.Signal(event); + /* Not releasing the handle as SpeechRecognitionEventArgs manages it */ + } + + void RecognizerEventConnectionChanged(const EventSignal& reco_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&reco_event == &Recognizing) + { + ::dialog_service_connector_recognizing_set_callback(m_handle, Recognizing.IsConnected() ? DialogServiceConnector::FireEvent_Recognizing : nullptr, this); + } + else if (&reco_event == &Recognized) + { + ::dialog_service_connector_recognized_set_callback(m_handle, Recognized.IsConnected() ? DialogServiceConnector::FireEvent_Recognized : nullptr, this); + } + } + } + + static void FireEvent_SessionStarted(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SessionEventArgs event{ h_event }; + keep_alive->SessionStarted.Signal(event); + + SPX_DBG_ASSERT(::recognizer_event_handle_is_valid(h_event)); + /* Releasing the event handle as SessionEventArgs doesn't keep the handle */ + ::recognizer_event_handle_release(h_event); + } + + static void FireEvent_SessionStopped(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SessionEventArgs event{ h_event }; + keep_alive->SessionStopped.Signal(event); + + SPX_DBG_ASSERT(::recognizer_event_handle_is_valid(h_event)); + /* Releasing the event handle as SessionEventArgs doesn't keep the handle */ + ::recognizer_event_handle_release(h_event); + } + + void SessionEventConnectionChanged(const EventSignal& session_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&session_event == &SessionStarted) + { + ::dialog_service_connector_session_started_set_callback(m_handle, SessionStarted.IsConnected() ? DialogServiceConnector::FireEvent_SessionStarted : nullptr, this); + } + else if (&session_event == &SessionStopped) + { + ::dialog_service_connector_session_stopped_set_callback(m_handle, SessionStopped.IsConnected() ? DialogServiceConnector::FireEvent_SessionStopped : nullptr, this); + } + } + } + + static void FireEvent_SpeechStartDetected(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + RecognitionEventArgs event{ h_event }; + keep_alive->SpeechStartDetected.Signal(event); + + SPX_DBG_ASSERT(::recognizer_event_handle_is_valid(h_event)); + /* Releasing the event handle as RecognitionEventArgs doesn't manage handle lifetime */ + ::recognizer_event_handle_release(h_event); + } + + static void FireEvent_SpeechEndDetected(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + RecognitionEventArgs event{ h_event }; + keep_alive->SpeechEndDetected.Signal(event); + + SPX_DBG_ASSERT(::recognizer_event_handle_is_valid(h_event)); + /* Releasing the event handle as RecognitionEventArgs doesn't manage handle lifetime */ + ::recognizer_event_handle_release(h_event); + } + + void SpeechDetectionEventConnectionChanged(const EventSignal& speech_detection_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&speech_detection_event == &SpeechStartDetected) + { + ::dialog_service_connector_speech_start_detected_set_callback(m_handle, SpeechStartDetected.IsConnected() ? DialogServiceConnector::FireEvent_SpeechStartDetected : nullptr, this); + } + else if (&speech_detection_event == &SpeechEndDetected) + { + ::dialog_service_connector_speech_end_detected_set_callback(m_handle, SpeechEndDetected.IsConnected() ? DialogServiceConnector::FireEvent_SpeechEndDetected : nullptr, this); + } + } + } + + static void FireEvent_Canceled(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SpeechRecognitionCanceledEventArgs event{ h_event }; + keep_alive->Canceled.Signal(event); + /* Not releasing the handle as SpeechRecognitionCanceledEventArgs manages it */ + } + + void CanceledEventConnectionChanged(const EventSignal& canceled_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&canceled_event == &Canceled) + { + ::dialog_service_connector_canceled_set_callback(m_handle, Canceled.IsConnected() ? DialogServiceConnector::FireEvent_Canceled : nullptr, this); + } + } + } + + static void FireEvent_ActivityReceived(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + ActivityReceivedEventArgs event{ h_event }; + keep_alive->ActivityReceived.Signal(event); + /* Not releasing the handle as ActivityReceivedEventArgs manages it */ + } + + void ActivityReceivedConnectionChanged(const EventSignal& activity_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&activity_event == &ActivityReceived) + { + ::dialog_service_connector_activity_received_set_callback(m_handle, ActivityReceived.IsConnected() ? DialogServiceConnector::FireEvent_ActivityReceived : nullptr, this); + } + } + } + + static void FireEvent_TurnStatusReceived(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + TurnStatusReceivedEventArgs event{ h_event }; + keep_alive->TurnStatusReceived.Signal(event); + /* Not releasing the handle as TurnStatusReceivedEventArgs manages it */ + } + + void TurnStatusReceivedConnectionChanged(const EventSignal& turn_status_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::dialog_service_connector_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&turn_status_event == &TurnStatusReceived) + { + ::dialog_service_connector_turn_status_received_set_callback(m_handle, TurnStatusReceived.IsConnected() ? DialogServiceConnector::FireEvent_TurnStatusReceived : nullptr, this); + } + } + } + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRECOHANDLE h_connector) : + PropertyCollection( + [=](){ + SPXPROPERTYBAGHANDLE h_prop_bag = SPXHANDLE_INVALID; + dialog_service_connector_get_property_bag(h_connector, &h_prop_bag); + return h_prop_bag; + }()) + { + } + }; + + inline explicit DialogServiceConnector(SPXRECOHANDLE handle) : + Recognized{ Callback(&DialogServiceConnector::RecognizerEventConnectionChanged) }, + Recognizing{ Callback(&DialogServiceConnector::RecognizerEventConnectionChanged) }, + SessionStarted{ Callback(&DialogServiceConnector::SessionEventConnectionChanged) }, + SessionStopped{ Callback(&DialogServiceConnector::SessionEventConnectionChanged) }, + SpeechStartDetected{ Callback(&DialogServiceConnector::SpeechDetectionEventConnectionChanged) }, + SpeechEndDetected{ Callback(&DialogServiceConnector::SpeechDetectionEventConnectionChanged) }, + Canceled{ Callback(&DialogServiceConnector::CanceledEventConnectionChanged) }, + ActivityReceived{ Callback(&DialogServiceConnector::ActivityReceivedConnectionChanged) }, + TurnStatusReceived{ Callback(&DialogServiceConnector::TurnStatusReceivedConnectionChanged) }, + m_handle{ handle }, + m_properties{ handle }, + Properties{ m_properties } + { + } + +private: + friend class Microsoft::CognitiveServices::Speech::Connection; + SPXRECOHANDLE m_handle; + SPXASYNCHANDLE m_hasyncStopContinuous; + + PrivatePropertyCollection m_properties; + /*! \endcond */ +public: + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; +}; + +} } } } diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_dialog_service_connector_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_dialog_service_connector_eventargs.h new file mode 100644 index 0000000..f55f611 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_dialog_service_connector_eventargs.h @@ -0,0 +1,148 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include +#include + +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Dialog { + +// Forward declarations +class DialogServiceConnector; + +/// +/// Class for activity received event arguments. +/// +class ActivityReceivedEventArgs: public std::enable_shared_from_this +{ +public: + friend DialogServiceConnector; + /// + /// Releases the event. + /// + inline ~ActivityReceivedEventArgs() + { + SPX_THROW_ON_FAIL(::dialog_service_connector_activity_received_event_release(m_handle)); + } + + /// + /// Gets the activity associated with the event. + /// + /// The serialized activity activity. + inline std::string GetActivity() const + { + size_t size; + SPX_THROW_ON_FAIL(::dialog_service_connector_activity_received_event_get_activity_size(m_handle, &size)); + auto ptr = std::make_unique(size + 1); + SPX_THROW_ON_FAIL(::dialog_service_connector_activity_received_event_get_activity(m_handle, ptr.get(), size + 1)); + return std::string{ ptr.get() }; + } + + /// + /// Gets the audio associated with the event. + /// + /// The audio. + inline std::shared_ptr GetAudio() const + { + SPXAUDIOSTREAMHANDLE h_audio{ SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::dialog_service_connector_activity_received_event_get_audio(m_handle, &h_audio)); + if (h_audio == SPXHANDLE_INVALID) + { + return nullptr; + } + return std::shared_ptr(new Audio::PullAudioOutputStream(h_audio) ); + } + + /// + /// Checks if the event contains audio. + /// + /// True if the event contains audio, false otherwise. + inline bool HasAudio() const + { + return ::dialog_service_connector_activity_received_event_has_audio(m_handle); + } +private: + /*! \cond PROTECTED */ + inline ActivityReceivedEventArgs(SPXEVENTHANDLE h_event) : m_handle{ h_event } + { + } + + SPXEVENTHANDLE m_handle; + /*! \endcond */ +}; + +/// +/// Class for turn status event arguments. +/// +class TurnStatusReceivedEventArgs : public std::enable_shared_from_this +{ +public: + friend DialogServiceConnector; + /// + /// Releases the event. + /// + inline ~TurnStatusReceivedEventArgs() + { + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_release(m_handle)); + } + + /// + /// Retrieves the interaction ID associated with this turn status event. Interaction generally correspond + /// to a single input signal (e.g. voice utterance) or data/activity transaction and will correlate to + /// 'replyToId' fields in Bot Framework activities. + /// + /// The interaction ID associated with the turn status. + inline std::string GetInteractionId() const + { + size_t size = 0; + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_get_interaction_id_size(m_handle, &size)); + auto ptr = std::make_unique(size + 1); + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_get_interaction_id(m_handle, ptr.get(), size + 1)); + return std::string{ ptr.get() }; + } + + /// + /// Retrieves the conversation ID associated with this turn status event. Conversations may span multiple + /// interactions and are the unit which a client may request resume/retry upon. + /// + /// The conversation ID associated with the turn status. + inline std::string GetConversationId() const + { + size_t size = 0; + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_get_conversation_id_size(m_handle, &size)); + auto ptr = std::make_unique(size + 1); + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_get_conversation_id(m_handle, ptr.get(), size + 1)); + return std::string{ ptr.get() }; + } + + /// + /// Retrieves the numeric status code associated with this turn status event. These generally correspond to + /// standard HTTP status codes such as 200 (OK), 400 (Failure/Bad Request), and 429 (Timeout/Throttled). + /// + /// The status code associated with this event, analolgous to standard HTTP codes. + inline int GetStatusCode() const + { + int cApiStatus = 404; + SPX_THROW_ON_FAIL(::dialog_service_connector_turn_status_received_get_status(m_handle, &cApiStatus)); + return cApiStatus; + } + +private: + /*! \cond PROTECTED */ + inline TurnStatusReceivedEventArgs(SPXEVENTHANDLE h_event) : m_handle{ h_event } + { + } + + SPXEVENTHANDLE m_handle; + /*! \endcond */ +}; + +} } } } diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_embedded_speech_config.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_embedded_speech_config.h new file mode 100644 index 0000000..61454a2 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_embedded_speech_config.h @@ -0,0 +1,324 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_embedded_speech_config.h: Public API declarations for EmbeddedSpeechConfig C++ class +// + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines embedded (offline) speech configuration. +/// +class EmbeddedSpeechConfig +{ +protected: + /*! \cond PROTECTED */ + + SpeechConfig m_config; + + /*! \endcond */ + +public: + /// + /// Internal operator used to get the underlying handle value. + /// + /// A handle. + explicit operator SPXSPEECHCONFIGHANDLE() const + { + return static_cast(m_config); + } + + /// + /// Creates an instance of the embedded speech config with a specified offline model path. + /// + /// The folder path to search for offline models. + /// This can be a root path under which several models are located in subfolders, + /// or a direct path to a specific model folder. + /// + /// A shared pointer to the new embedded speech config instance. + static std::shared_ptr FromPath(const SPXSTRING& path) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, path.empty()); + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(embedded_speech_config_create(&hconfig)); + SPX_THROW_ON_FAIL(embedded_speech_config_add_path(hconfig, Utils::ToUTF8(path).c_str())); + + auto ptr = new EmbeddedSpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the embedded speech config with specified offline model paths. + /// + /// The folder paths to search for offline models. + /// These can be root paths under which several models are located in subfolders, + /// or direct paths to specific model folders. + /// + /// A shared pointer to the new embedded speech config instance. + static std::shared_ptr FromPaths(const std::vector& paths) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, paths.empty()); + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(embedded_speech_config_create(&hconfig)); + for (const SPXSTRING& path : paths) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, path.empty()); + SPX_THROW_ON_FAIL(embedded_speech_config_add_path(hconfig, Utils::ToUTF8(path).c_str())); + } + + auto ptr = new EmbeddedSpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Gets a list of available speech recognition models. + /// + /// Speech recognition model info. + std::vector> GetSpeechRecognitionModels() + { + std::vector> models; + + uint32_t numModels = 0; + SPX_THROW_ON_FAIL(embedded_speech_config_get_num_speech_reco_models(static_cast(m_config), &numModels)); + + for (uint32_t i = 0; i < numModels; i++) + { + SPXSPEECHRECOMODELHANDLE hmodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(embedded_speech_config_get_speech_reco_model(static_cast(m_config), i, &hmodel)); + + auto model = std::make_shared(hmodel); + models.push_back(model); + } + + return models; + } + + /// + /// Sets the model for speech recognition. + /// + /// The model name. + /// The license text. + void SetSpeechRecognitionModel(const SPXSTRING& name, const SPXSTRING& license) + { + SPX_THROW_ON_FAIL(embedded_speech_config_set_speech_recognition_model( + static_cast(m_config), Utils::ToUTF8(name).c_str(), Utils::ToUTF8(license).c_str())); + } + + /// + /// Gets the model name for speech recognition. + /// + /// The speech recognition model name. + SPXSTRING GetSpeechRecognitionModelName() const + { + return GetProperty(PropertyId::SpeechServiceConnection_RecoModelName); + } + + /// + /// Sets the speech recognition output format. + /// + /// Speech recognition output format (simple or detailed). + void SetSpeechRecognitionOutputFormat(OutputFormat format) + { + m_config.SetOutputFormat(format); + } + + /// + /// Gets the speech recognition output format. + /// + /// Speech recognition output format (simple or detailed). + OutputFormat GetSpeechRecognitionOutputFormat() const + { + return m_config.GetOutputFormat(); + } + + /// + /// Sets the profanity option. This can be used to remove profane words or mask them. + /// + /// Profanity option value. + void SetProfanity(ProfanityOption profanity) + { + m_config.SetProfanity(profanity); + } + + /// + /// Sets the voice for embedded speech synthesis. + /// + /// The voice name of the embedded speech synthesis. + /// The license text. + void SetSpeechSynthesisVoice(const SPXSTRING& name, const SPXSTRING& license) + { + SPX_THROW_ON_FAIL(embedded_speech_config_set_speech_synthesis_voice( + static_cast(m_config), Utils::ToUTF8(name).c_str(), Utils::ToUTF8(license).c_str())); + } + + /// + /// Gets the voice name for embedded speech synthesis. + /// + /// The speech synthesis model name, i.e. the voice name. + SPXSTRING GetSpeechSynthesisVoiceName() const + { + return GetProperty(PropertyId::SpeechServiceConnection_SynthOfflineVoice); + } + + /// + /// Sets the speech synthesis output format (e.g. Riff16Khz16BitMonoPcm). + /// + /// Specifies the output format ID + void SetSpeechSynthesisOutputFormat(SpeechSynthesisOutputFormat formatId) + { + m_config.SetSpeechSynthesisOutputFormat(formatId); + } + + /// + /// Gets the speech synthesis output format. + /// + /// The speech synthesis output format. + SPXSTRING GetSpeechSynthesisOutputFormat() const + { + return m_config.GetSpeechSynthesisOutputFormat(); + } + + /// + /// Gets a list of available speech translation models. + /// + /// Speech translation model info. + std::vector> GetSpeechTranslationModels() + { + std::vector> models; + + uint32_t numModels = 0; + SPX_THROW_ON_FAIL(embedded_speech_config_get_num_speech_translation_models(static_cast(m_config), &numModels)); + + for (uint32_t i = 0; i < numModels; i++) + { + SPXSPEECHRECOMODELHANDLE hmodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(embedded_speech_config_get_speech_translation_model(static_cast(m_config), i, &hmodel)); + + auto model = std::make_shared(hmodel); + models.push_back(model); + } + + return models; + } + + /// + /// Sets the model for speech translation. + /// + /// Model name. + /// License text. + void SetSpeechTranslationModel(const SPXSTRING& name, const SPXSTRING& license) + { + SPX_THROW_ON_FAIL(embedded_speech_config_set_speech_translation_model( + static_cast(m_config), Utils::ToUTF8(name).c_str(), Utils::ToUTF8(license).c_str())); + } + + /// + /// Gets the model name for speech translation. + /// + /// The speech translation model name. + SPXSTRING GetSpeechTranslationModelName() const + { + return GetProperty(PropertyId::SpeechTranslation_ModelName); + } + + /// + /// Sets the model for keyword recognition. + /// This is for customer specific models that are tailored for detecting + /// wake words and direct commands. + /// + /// Model name. + /// License text. + void SetKeywordRecognitionModel(const SPXSTRING& name, const SPXSTRING& license) + { + SPX_THROW_ON_FAIL(embedded_speech_config_set_keyword_recognition_model( + static_cast(m_config), Utils::ToUTF8(name).c_str(), Utils::ToUTF8(license).c_str())); + } + + /// + /// Gets the model name for keyword recognition. + /// + /// The keyword recognition model name. + SPXSTRING GetKeywordRecognitionModelName() const + { + return GetProperty(PropertyId::KeywordRecognition_ModelName); + } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + m_config.SetProperty(name, value); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + m_config.SetProperty(id, value); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + return m_config.GetProperty(name); + } + + /// + /// Gets a property value by ID. + /// + /// The parameter id. + /// The property value. + SPXSTRING GetProperty(PropertyId id) const + { + return m_config.GetProperty(id); + } + + /// + /// Destructs the object. + /// + virtual ~EmbeddedSpeechConfig() = default; + +protected: + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + inline explicit EmbeddedSpeechConfig(SPXSPEECHCONFIGHANDLE hconfig) : m_config(hconfig) + { + } + + /*! \endcond */ + +private: + DISABLE_COPY_AND_MOVE(EmbeddedSpeechConfig); + + }; + +}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_enums.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_enums.h new file mode 100644 index 0000000..e55c5a6 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_enums.h @@ -0,0 +1,1757 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_enums.h: Public API declarations for C++ enumerations +// + +#pragma once + +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +constexpr const char* TrueString = "true"; +constexpr const char* FalseString = "false"; +constexpr const char CommaDelim = ','; + +/// +/// Defines speech property ids. +/// Changed in version 1.4.0. +/// +enum class PropertyId +{ + /// + /// The Cognitive Services Speech Service subscription key. If you are using an intent recognizer, you need + /// to specify the LUIS endpoint key for your particular LUIS app. Under normal circumstances, you shouldn't + /// have to use this property directly. + /// Instead, use . + /// + SpeechServiceConnection_Key = 1000, + + /// + /// The Cognitive Services Speech Service endpoint (url). Under normal circumstances, you shouldn't + /// have to use this property directly. + /// Instead, use . + /// NOTE: This endpoint is not the same as the endpoint used to obtain an access token. + /// + SpeechServiceConnection_Endpoint = 1001, + + /// + /// The Cognitive Services Speech Service region. Under normal circumstances, you shouldn't have to + /// use this property directly. + /// Instead, use , , + /// , . + /// + SpeechServiceConnection_Region = 1002, + + /// + /// The Cognitive Services Speech Service authorization token (aka access token). Under normal circumstances, + /// you shouldn't have to use this property directly. + /// Instead, use , + /// , , + /// . + /// + SpeechServiceAuthorization_Token = 1003, + + /// + /// The Cognitive Services Speech Service authorization type. Currently unused. + /// + SpeechServiceAuthorization_Type = 1004, + + /// + /// The Cognitive Services Custom Speech or Custom Voice Service endpoint id. Under normal circumstances, you shouldn't + /// have to use this property directly. + /// Instead use . + /// NOTE: The endpoint id is available in the Custom Speech Portal, listed under Endpoint Details. + /// + SpeechServiceConnection_EndpointId = 1005, + + /// + /// The Cognitive Services Speech Service host (url). Under normal circumstances, you shouldn't + /// have to use this property directly. + /// Instead, use . + /// + SpeechServiceConnection_Host = 1006, + + /// + /// The host name of the proxy server used to connect to the Cognitive Services Speech Service. Under normal circumstances, + /// you shouldn't have to use this property directly. + /// Instead, use . + /// NOTE: This property id was added in version 1.1.0. + /// + SpeechServiceConnection_ProxyHostName = 1100, + + /// + /// The port of the proxy server used to connect to the Cognitive Services Speech Service. Under normal circumstances, + /// you shouldn't have to use this property directly. + /// Instead, use . + /// NOTE: This property id was added in version 1.1.0. + /// + SpeechServiceConnection_ProxyPort = 1101, + + /// + /// The user name of the proxy server used to connect to the Cognitive Services Speech Service. Under normal circumstances, + /// you shouldn't have to use this property directly. + /// Instead, use . + /// NOTE: This property id was added in version 1.1.0. + /// + SpeechServiceConnection_ProxyUserName = 1102, + + /// + /// The password of the proxy server used to connect to the Cognitive Services Speech Service. Under normal circumstances, + /// you shouldn't have to use this property directly. + /// Instead, use . + /// NOTE: This property id was added in version 1.1.0. + /// + SpeechServiceConnection_ProxyPassword = 1103, + + /// + /// The URL string built from speech configuration. + /// This property is intended to be read-only. The SDK is using it internally. + /// NOTE: Added in version 1.5.0. + /// + SpeechServiceConnection_Url = 1104, + + /// + /// Specifies the list of hosts for which proxies should not be used. This setting overrides all other configurations. + /// Hostnames are separated by commas and are matched in a case-insensitive manner. Wildcards are not supported. + /// + SpeechServiceConnection_ProxyHostBypass = 1105, + + /// + /// The list of comma separated languages used as target translation languages. Under normal circumstances, + /// you shouldn't have to use this property directly. Instead use + /// and . + /// + SpeechServiceConnection_TranslationToLanguages = 2000, + + /// + /// The name of the Cognitive Service Text to Speech Service voice. Under normal circumstances, you shouldn't have to use this + /// property directly. Instead use . + /// NOTE: Valid voice names can be found here. + /// + SpeechServiceConnection_TranslationVoice = 2001, + + /// + /// Translation features. For internal use. + /// + SpeechServiceConnection_TranslationFeatures = 2002, + + /// + /// The Language Understanding Service region. Under normal circumstances, you shouldn't have to use this property directly. + /// Instead use . + /// + SpeechServiceConnection_IntentRegion = 2003, + + /// + /// The Cognitive Services Speech Service recognition mode. Can be "INTERACTIVE", "CONVERSATION", "DICTATION". + /// This property is intended to be read-only. The SDK is using it internally. + /// + SpeechServiceConnection_RecoMode = 3000, + + /// + /// The spoken language to be recognized (in BCP-47 format). Under normal circumstances, you shouldn't have to use this property + /// directly. + /// Instead, use . + /// + SpeechServiceConnection_RecoLanguage = 3001, + + /// + /// The session id. This id is a universally unique identifier (aka UUID) representing a specific binding of an audio input stream + /// and the underlying speech recognition instance to which it is bound. Under normal circumstances, you shouldn't have to use this + /// property directly. + /// Instead use . + /// + Speech_SessionId = 3002, + + /// + /// The query parameters provided by users. They will be passed to service as URL query parameters. + /// Added in version 1.5.0 + /// + SpeechServiceConnection_UserDefinedQueryParameters = 3003, + + /// + /// The string to specify the backend to be used for speech recognition; + /// allowed options are online and offline. + /// Under normal circumstances, you shouldn't use this property directly. + /// Currently the offline option is only valid when EmbeddedSpeechConfig is used. + /// Added in version 1.19.0 + /// + SpeechServiceConnection_RecoBackend = 3004, + + /// + /// The name of the model to be used for speech recognition. + /// Under normal circumstances, you shouldn't use this property directly. + /// Currently this is only valid when EmbeddedSpeechConfig is used. + /// Added in version 1.19.0 + /// + SpeechServiceConnection_RecoModelName = 3005, + + /// + /// This property is deprecated. + /// + SpeechServiceConnection_RecoModelKey = 3006, + + /// + /// The path to the ini file of the model to be used for speech recognition. + /// Under normal circumstances, you shouldn't use this property directly. + /// Currently this is only valid when EmbeddedSpeechConfig is used. + /// Added in version 1.19.0 + /// + SpeechServiceConnection_RecoModelIniFile = 3007, + + /// + /// The spoken language to be synthesized (e.g. en-US) + /// Added in version 1.4.0 + /// + SpeechServiceConnection_SynthLanguage = 3100, + + /// + /// The name of the TTS voice to be used for speech synthesis + /// Added in version 1.4.0 + /// + SpeechServiceConnection_SynthVoice = 3101, + + /// + /// The string to specify TTS output audio format + /// Added in version 1.4.0 + /// + SpeechServiceConnection_SynthOutputFormat = 3102, + + /// + /// Indicates if use compressed audio format for speech synthesis audio transmission. + /// This property only affects when SpeechServiceConnection_SynthOutputFormat is set to a pcm format. + /// If this property is not set and GStreamer is available, SDK will use compressed format for synthesized audio transmission, + /// and decode it. You can set this property to "false" to use raw pcm format for transmission on wire. + /// Added in version 1.16.0 + /// + SpeechServiceConnection_SynthEnableCompressedAudioTransmission = 3103, + + /// + /// The string to specify TTS backend; valid options are online and offline. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use or + /// to set the synthesis backend to offline. + /// Added in version 1.19.0 + /// + SpeechServiceConnection_SynthBackend = 3110, + + /// + /// The data file path(s) for offline synthesis engine; only valid when synthesis backend is offline. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use or . + /// Added in version 1.19.0 + /// + SpeechServiceConnection_SynthOfflineDataPath = 3112, + + /// + /// The name of the offline TTS voice to be used for speech synthesis + /// Under normal circumstances, you shouldn't use this property directly. + /// Instead, use and . + /// Added in version 1.19.0 + /// + SpeechServiceConnection_SynthOfflineVoice = 3113, + + /// + /// This property is deprecated. + /// + SpeechServiceConnection_SynthModelKey = 3114, + + /// + /// The Cognitive Services Speech Service voices list api endpoint (url). Under normal circumstances, + /// you don't need to specify this property, SDK will construct it based on the region/host/endpoint of . + /// Added in version 1.16.0 + /// + SpeechServiceConnection_VoicesListEndpoint = 3130, + + /// + /// The initial silence timeout value (in milliseconds) used by the service. + /// Added in version 1.5.0 + /// + SpeechServiceConnection_InitialSilenceTimeoutMs = 3200, + + /// + /// The end silence timeout value (in milliseconds) used by the service. + /// Added in version 1.5.0 + /// + SpeechServiceConnection_EndSilenceTimeoutMs = 3201, + + /// + /// A boolean value specifying whether audio logging is enabled in the service or not. + /// Audio and content logs are stored either in Microsoft-owned storage, or in your own storage account linked + /// to your Cognitive Services subscription (Bring Your Own Storage (BYOS) enabled Speech resource). + /// Added in version 1.5.0. + /// + SpeechServiceConnection_EnableAudioLogging = 3202, + + /// + /// The speech service connection language identifier mode. + /// Can be "AtStart" (the default), or "Continuous". See [Language + /// Identification](https://aka.ms/speech/lid?pivots=programming-language-cpp) document. + /// Added in 1.25.0 + /// + SpeechServiceConnection_LanguageIdMode = 3205, + + /// + /// The speech service connection translation categoryId. + /// + SpeechServiceConnection_TranslationCategoryId = 3206, + + /// + /// The auto detect source languages + /// Added in version 1.8.0 + /// + SpeechServiceConnection_AutoDetectSourceLanguages = 3300, + + /// + /// The auto detect source language result + /// Added in version 1.8.0 + /// + SpeechServiceConnection_AutoDetectSourceLanguageResult = 3301, + + /// + /// The requested Cognitive Services Speech Service response output format (simple or detailed). Under normal circumstances, you shouldn't have + /// to use this property directly. + /// Instead use . + /// + SpeechServiceResponse_RequestDetailedResultTrueFalse = 4000, + + /// + /// The requested Cognitive Services Speech Service response output profanity level. Currently unused. + /// + SpeechServiceResponse_RequestProfanityFilterTrueFalse = 4001, + + /// + /// The requested Cognitive Services Speech Service response output profanity setting. + /// Allowed values are "masked", "removed", and "raw". + /// Added in version 1.5.0. + /// + SpeechServiceResponse_ProfanityOption = 4002, + + /// + /// A string value specifying which post processing option should be used by service. + /// Allowed values are "TrueText". + /// Added in version 1.5.0 + /// + SpeechServiceResponse_PostProcessingOption = 4003, + + /// + /// A boolean value specifying whether to include word-level timestamps in the response result. + /// Added in version 1.5.0 + /// + SpeechServiceResponse_RequestWordLevelTimestamps = 4004, + + /// + /// The number of times a word has to be in partial results to be returned. + /// Added in version 1.5.0 + /// + SpeechServiceResponse_StablePartialResultThreshold = 4005, + + /// + /// A string value specifying the output format option in the response result. Internal use only. + /// Added in version 1.5.0. + /// + SpeechServiceResponse_OutputFormatOption = 4006, + + /// + /// A boolean value specifying whether to include SNR (signal to noise ratio) in the response result. + /// Added in version 1.18.0 + /// + SpeechServiceResponse_RequestSnr = 4007, + + /// + /// A boolean value to request for stabilizing translation partial results by omitting words in the end. + /// Added in version 1.5.0. + /// + SpeechServiceResponse_TranslationRequestStablePartialResult = 4100, + + /// + /// A boolean value specifying whether to request WordBoundary events. + /// Added in version 1.21.0. + /// + SpeechServiceResponse_RequestWordBoundary = 4200, + + /// + /// A boolean value specifying whether to request punctuation boundary in WordBoundary Events. Default is true. + /// Added in version 1.21.0. + /// + SpeechServiceResponse_RequestPunctuationBoundary = 4201, + + /// + /// A boolean value specifying whether to request sentence boundary in WordBoundary Events. Default is false. + /// Added in version 1.21.0. + /// + SpeechServiceResponse_RequestSentenceBoundary = 4202, + + /// + /// A boolean value specifying whether the SDK should synchronize synthesis metadata events, + /// (e.g. word boundary, viseme, etc.) to the audio playback. This only takes effect when the audio is played through the SDK. + /// Default is true. + /// If set to false, the SDK will fire the events as they come from the service, which may be out of sync with the audio playback. + /// Added in version 1.31.0. + /// + SpeechServiceResponse_SynthesisEventsSyncToAudio = 4210, + + /// + /// The Cognitive Services Speech Service response output (in JSON format). This property is available on recognition result objects only. + /// + SpeechServiceResponse_JsonResult = 5000, + + /// + /// The Cognitive Services Speech Service error details (in JSON format). Under normal circumstances, you shouldn't have to + /// use this property directly. + /// Instead, use . + /// + SpeechServiceResponse_JsonErrorDetails = 5001, + + /// + /// The recognition latency in milliseconds. Read-only, available on final speech/translation/intent results. + /// This measures the latency between when an audio input is received by the SDK, and the moment the final result is received from the service. + /// The SDK computes the time difference between the last audio fragment from the audio input that is contributing to the final result, and the time the final result is received from the speech service. + /// Added in version 1.3.0. + /// + SpeechServiceResponse_RecognitionLatencyMs = 5002, + + /// + /// The recognition backend. Read-only, available on speech recognition results. + /// This indicates whether cloud (online) or embedded (offline) recognition was used to produce the result. + /// + SpeechServiceResponse_RecognitionBackend = 5003, + + /// + /// The speech synthesis first byte latency in milliseconds. Read-only, available on final speech synthesis results. + /// This measures the latency between when the synthesis is started to be processed, and the moment the first byte audio is available. + /// Added in version 1.17.0. + /// + SpeechServiceResponse_SynthesisFirstByteLatencyMs = 5010, + + /// + /// The speech synthesis all bytes latency in milliseconds. Read-only, available on final speech synthesis results. + /// This measures the latency between when the synthesis is started to be processed, and the moment the whole audio is synthesized. + /// Added in version 1.17.0. + /// + SpeechServiceResponse_SynthesisFinishLatencyMs = 5011, + + /// + /// The underrun time for speech synthesis in milliseconds. Read-only, available on results in SynthesisCompleted events. + /// This measures the total underrun time from is filled to synthesis completed. + /// Added in version 1.17.0. + /// + SpeechServiceResponse_SynthesisUnderrunTimeMs = 5012, + + /// + /// The speech synthesis connection latency in milliseconds. Read-only, available on final speech synthesis results. + /// This measures the latency between when the synthesis is started to be processed, and the moment the HTTP/WebSocket connection is established. + /// Added in version 1.26.0. + /// + SpeechServiceResponse_SynthesisConnectionLatencyMs = 5013, + + /// + /// The speech synthesis network latency in milliseconds. Read-only, available on final speech synthesis results. + /// This measures the network round trip time. + /// Added in version 1.26.0. + /// + SpeechServiceResponse_SynthesisNetworkLatencyMs = 5014, + + /// + /// The speech synthesis service latency in milliseconds. Read-only, available on final speech synthesis results. + /// This measures the service processing time to synthesize the first byte of audio. + /// Added in version 1.26.0. + /// + SpeechServiceResponse_SynthesisServiceLatencyMs = 5015, + + /// + /// Indicates which backend the synthesis is finished by. Read-only, available on speech synthesis results, except for the result in SynthesisStarted event + /// Added in version 1.17.0. + /// + SpeechServiceResponse_SynthesisBackend = 5020, + + /// + /// Determines if intermediate results contain speaker identification. + /// + /// + /// + /// Allowed values are "true" or "false". If set to "true", the intermediate results will contain speaker identification. + /// The default value if unset or set to an invalid value is "false". + /// + /// + /// This is currently only supported for scenarios using the + /// + /// + /// Adding in version 1.40. + /// + /// + SpeechServiceResponse_DiarizeIntermediateResults = 5025, + + /// + /// The cancellation reason. Currently unused. + /// + CancellationDetails_Reason = 6000, + + /// + /// The cancellation text. Currently unused. + /// + CancellationDetails_ReasonText = 6001, + + /// + /// The cancellation detailed text. Currently unused. + /// + CancellationDetails_ReasonDetailedText = 6002, + + /// + /// The Language Understanding Service response output (in JSON format). Available via . + /// + LanguageUnderstandingServiceResponse_JsonResult = 7000, + + /// + /// The device name for audio capture. Under normal circumstances, you shouldn't have to + /// use this property directly. + /// Instead, use . + /// NOTE: This property id was added in version 1.3.0. + /// + AudioConfig_DeviceNameForCapture = 8000, + + /// + /// The number of channels for audio capture. Internal use only. + /// NOTE: This property id was added in version 1.3.0. + /// + AudioConfig_NumberOfChannelsForCapture = 8001, + + /// + /// The sample rate (in Hz) for audio capture. Internal use only. + /// NOTE: This property id was added in version 1.3.0. + /// + AudioConfig_SampleRateForCapture = 8002, + + /// + /// The number of bits of each sample for audio capture. Internal use only. + /// NOTE: This property id was added in version 1.3.0. + /// + AudioConfig_BitsPerSampleForCapture = 8003, + + /// + /// The audio source. Allowed values are "Microphones", "File", and "Stream". + /// Added in version 1.3.0. + /// + AudioConfig_AudioSource = 8004, + + /// + /// The device name for audio render. Under normal circumstances, you shouldn't have to + /// use this property directly. + /// Instead, use . + /// Added in version 1.14.0 + /// + AudioConfig_DeviceNameForRender = 8005, + + /// + /// Playback buffer length in milliseconds, default is 50 milliseconds. + /// + AudioConfig_PlaybackBufferLengthInMs = 8006, + + /// + /// Audio processing options in JSON format. + /// + AudioConfig_AudioProcessingOptions = 8007, + + /// + /// The file name to write logs. + /// Added in version 1.4.0. + /// + Speech_LogFilename = 9001, + + /// + /// A duration of detected silence, measured in milliseconds, after which speech-to-text will determine a spoken + /// phrase has ended and generate a final Recognized result. Configuring this timeout may be helpful in situations + /// where spoken input is significantly faster or slower than usual and default segmentation behavior consistently + /// yields results that are too long or too short. Segmentation timeout values that are inappropriately high or low + /// can negatively affect speech-to-text accuracy; this property should be carefully configured and the resulting + /// behavior should be thoroughly validated as intended. + /// The value must be in the range **[100, 5000]** milliseconds. + /// + /// For more information about timeout configuration that includes discussion of default behaviors, please visit + /// https://aka.ms/csspeech/timeouts. + /// + Speech_SegmentationSilenceTimeoutMs = 9002, + + /// + /// The maximum length of a spoken phrase when using the "Time" segmentation strategy. + /// As the length of a spoken phrase approaches this value, the will begin being reduced until either the phrase silence timeout is hit or the phrase reaches the maximum length. + /// The value must be in the range **[20000, 70000]** milliseconds. + /// + Speech_SegmentationMaximumTimeMs = 9003, + + /// + /// The strategy used to determine when a spoken phrase has ended and a final Recognized result should be generated. + /// Allowed values are "Default", "Time", and "Semantic". + /// + /// + /// Valid values are: + /// + /// + /// Default + /// Use the default strategy and settings as determined by the Speech Service. Use in most situations. + /// + /// + /// Time + /// Uses a time based strategy where the amount of silence between speech is used to determine when to generate a final result. + /// + /// + /// Semantic + /// Uses an AI model to deterine the end of a spoken phrase based on the content of the phrase. + /// + /// + /// + /// When using the time strategy, the property can be used to adjust the amount of silence needed to determine the end of a spoken phrase, + /// and the property can be used to adjust the maximum length of a spoken phrase. + /// + /// + /// The semantic strategy has no control properties available. + /// + /// + Speech_SegmentationStrategy = 9004, + + /// + /// Identifier used to connect to the backend service. + /// Added in version 1.5.0. + /// + Conversation_ApplicationId = 10000, + + /// + /// Type of dialog backend to connect to. + /// Added in version 1.7.0. + /// + Conversation_DialogType = 10001, + + /// + /// Silence timeout for listening + /// Added in version 1.5.0. + /// + Conversation_Initial_Silence_Timeout = 10002, + + /// + /// From id to be used on speech recognition activities + /// Added in version 1.5.0. + /// + Conversation_From_Id = 10003, + + /// + /// ConversationId for the session. + /// Added in version 1.8.0. + /// + Conversation_Conversation_Id = 10004, + + /// + /// Comma separated list of custom voice deployment ids. + /// Added in version 1.8.0. + /// + Conversation_Custom_Voice_Deployment_Ids = 10005, + + /// + /// Speech activity template, stamp properties in the template on the activity generated by the service for speech. + /// Added in version 1.10.0. + /// + Conversation_Speech_Activity_Template = 10006, + + /// + /// Your participant identifier in the current conversation. + /// Added in version 1.13.0 + /// + Conversation_ParticipantId = 10007, + + // If specified as true, request that the service send MessageStatus payloads via the ActivityReceived event + // handler. These messages communicate the outcome of ITurnContext resolution from the dialog system. + // Added in version 1.14.0. + Conversation_Request_Bot_Status_Messages = 10008, + + // Additional identifying information, such as a Direct Line token, used to authenticate with the backend service. + // Added in version 1.16.0. + Conversation_Connection_Id = 10009, + + /// + /// The time stamp associated to data buffer written by client when using Pull/Push audio input streams. + /// The time stamp is a 64-bit value with a resolution of 90 kHz. It is the same as the presentation timestamp in an MPEG transport stream. See https://en.wikipedia.org/wiki/Presentation_timestamp + /// Added in version 1.5.0. + /// + DataBuffer_TimeStamp = 11001, + + /// + /// The user id associated to data buffer written by client when using Pull/Push audio input streams. + /// Added in version 1.5.0. + /// + DataBuffer_UserId = 11002, + + /// + /// The reference text of the audio for pronunciation evaluation. + /// For this and the following pronunciation assessment parameters, see the table + /// [Pronunciation assessment parameters](/azure/cognitive-services/speech-service/rest-speech-to-text-short#pronunciation-assessment-parameters). + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use or . + /// Added in version 1.14.0 + /// + PronunciationAssessment_ReferenceText = 12001, + + /// + /// The point system for pronunciation score calibration (FivePoint or HundredMark). + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.14.0 + /// + PronunciationAssessment_GradingSystem = 12002, + + /// + /// The pronunciation evaluation granularity (Phoneme, Word, or FullText). + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.14.0 + /// + PronunciationAssessment_Granularity = 12003, + + /// + /// Defines if enable miscue calculation. + /// With this enabled, the pronounced words will be compared to the reference text, + /// and will be marked with omission/insertion based on the comparison. The default setting is False. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.14.0 + /// + PronunciationAssessment_EnableMiscue = 12005, + + /// + /// The pronunciation evaluation phoneme alphabet. The valid values are "SAPI" (default) and "IPA" + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.20.0 + /// + PronunciationAssessment_PhonemeAlphabet = 12006, + + /// + /// The pronunciation evaluation nbest phoneme count. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.20.0 + /// + PronunciationAssessment_NBestPhonemeCount = 12007, + + /// + /// Whether to enable prosody assessment. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.33.0 + /// + PronunciationAssessment_EnableProsodyAssessment = 12008, + + /// + /// The json string of pronunciation assessment parameters + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.14.0 + /// + PronunciationAssessment_Json = 12009, + + /// + /// Pronunciation assessment parameters. + /// This property is intended to be read-only. The SDK is using it internally. + /// Added in version 1.14.0 + /// + PronunciationAssessment_Params = 12010, + + /// + /// The content topic of the pronunciation assessment. + /// Under normal circumstances, you shouldn't have to use this property directly. + /// Instead, use . + /// Added in version 1.33.0 + /// + PronunciationAssessment_ContentTopic = 12020, + + /// + /// Speaker Recognition backend API version. + /// This property is added to allow testing and use of previous versions of Speaker Recognition APIs, where applicable. + /// Added in version 1.18.0 + /// + SpeakerRecognition_Api_Version = 13001, + + /// + /// The name of a model to be used for speech translation. + /// Do not use this property directly. + /// Currently this is only valid when EmbeddedSpeechConfig is used. + /// + SpeechTranslation_ModelName = 13100, + + /// + /// This property is deprecated. + /// + SpeechTranslation_ModelKey = 13101, + + /// + /// The name of a model to be used for keyword recognition. + /// Do not use this property directly. + /// Currently this is only valid when EmbeddedSpeechConfig is used. + /// + KeywordRecognition_ModelName = 13200, + + /// + /// This property is deprecated. + /// + KeywordRecognition_ModelKey = 13201, + + /// + /// Enable the collection of embedded speech performance metrics which can + /// be used to evaluate the capability of a device to use embedded speech. + /// The collected data is included in results from specific scenarios like + /// speech recognition. + /// The default setting is "false". Note that metrics may not be available + /// from all embedded speech scenarios. + /// + EmbeddedSpeech_EnablePerformanceMetrics = 13300, + + /// + /// The pitch of the synthesized speech. + /// + SpeechSynthesisRequest_Pitch = 14001, + + /// + /// The rate of the synthesized speech. + /// + SpeechSynthesisRequest_Rate = 14002, + + /// + /// The volume of the synthesized speech. + /// + SpeechSynthesisRequest_Volume = 14003, + + /// + /// The style of the synthesized speech. + /// + SpeechSynthesisRequest_Style = 14004, + + /// + /// The temperature of the synthesized speech. + /// The temperature parameter only takes effect when the voice is a HD voice. + /// + SpeechSynthesisRequest_Temperature = 14005, + + /// + /// The custom lexicon URL for the synthesized speech. + /// This provides a URL to a custom pronunciation lexicon to be used during synthesis. + /// + SpeechSynthesisRequest_CustomLexiconUrl = 14006, + + /// + /// The preferred locales for the synthesized speech. + /// Comma-separated list of locale names in order of preference. + /// + SpeechSynthesisRequest_PreferLocales = 14007, + + /// + /// The timeout interval in milliseconds between synthesized speech audio frames. + /// The greater of this and 10 seconds is used as a hard frame timeout. + /// A speech synthesis timeout occurs if + /// a) the time passed since the latest frame exceeds this timeout interval and the Real-Time Factor (RTF) exceeds its maximum value, or + /// b) the time passed since the latest frame exceeds the hard frame timeout. + /// + SpeechSynthesis_FrameTimeoutInterval = 14101, + + /// + /// The maximum Real-Time Factor (RTF) for speech synthesis. The RTF is calculated as + /// RTF = f(d)/d + /// where f(d) is the time taken to synthesize speech audio of duration d. + /// + SpeechSynthesis_RtfTimeoutThreshold = 14102, +}; + +/// +/// Output format. +/// +enum class OutputFormat +{ + Simple = 0, + Detailed = 1 +}; + +/// +/// Removes profanity (swearing), or replaces letters of profane words with stars. +/// Added in version 1.5.0. +/// +enum class ProfanityOption +{ + /// + /// Replaces letters in profane words with star characters. + /// + Masked = 0, + /// + /// Removes profane words. + /// + Removed = 1, + /// + /// Does nothing to profane words. + /// + Raw = 2 +}; + +/// +/// Specifies the possible reasons a recognition result might be generated. +/// +enum class ResultReason +{ + /// + /// Indicates speech could not be recognized. More details can be found in the NoMatchDetails object. + /// + NoMatch = 0, + + /// + /// Indicates that the recognition was canceled. More details can be found using the CancellationDetails object. + /// + Canceled = 1, + + /// + /// Indicates the speech result contains hypothesis text. + /// + RecognizingSpeech = 2, + + /// + /// Indicates the speech result contains final text that has been recognized. + /// Speech Recognition is now complete for this phrase. + /// + RecognizedSpeech = 3, + + /// + /// Indicates the intent result contains hypothesis text and intent. + /// + RecognizingIntent = 4, + + /// + /// Indicates the intent result contains final text and intent. + /// Speech Recognition and Intent determination are now complete for this phrase. + /// + RecognizedIntent = 5, + + /// + /// Indicates the translation result contains hypothesis text and its translation(s). + /// + TranslatingSpeech = 6, + + /// + /// Indicates the translation result contains final text and corresponding translation(s). + /// Speech Recognition and Translation are now complete for this phrase. + /// + TranslatedSpeech = 7, + + /// + /// Indicates the synthesized audio result contains a non-zero amount of audio data + /// + SynthesizingAudio = 8, + + /// + /// Indicates the synthesized audio is now complete for this phrase. + /// + SynthesizingAudioCompleted = 9, + + /// + /// Indicates the speech result contains (unverified) keyword text. + /// Added in version 1.3.0 + /// + RecognizingKeyword = 10, + + /// + /// Indicates that keyword recognition completed recognizing the given keyword. + /// Added in version 1.3.0 + /// + RecognizedKeyword = 11, + + /// + /// Indicates the speech synthesis is now started + /// Added in version 1.4.0 + /// + SynthesizingAudioStarted = 12, + + /// + /// Indicates the transcription result contains hypothesis text and its translation(s) for + /// other participants in the conversation. + /// Added in version 1.8.0 + /// + TranslatingParticipantSpeech = 13, + + /// + /// Indicates the transcription result contains final text and corresponding translation(s) + /// for other participants in the conversation. Speech Recognition and Translation are now + /// complete for this phrase. + /// Added in version 1.8.0 + /// + TranslatedParticipantSpeech = 14, + + /// + /// Indicates the transcription result contains the instant message and corresponding + /// translation(s). + /// Added in version 1.8.0 + /// + TranslatedInstantMessage = 15, + + /// + /// Indicates the transcription result contains the instant message for other participants + /// in the conversation and corresponding translation(s). + /// Added in version 1.8.0 + /// + TranslatedParticipantInstantMessage = 16, + + /// + /// Indicates the voice profile is being enrolling and customers need to send more audio to create a voice profile. + /// Added in version 1.12.0 + /// + EnrollingVoiceProfile = 17, + + /// + /// The voice profile has been enrolled. + /// Added in version 1.12.0 + /// + EnrolledVoiceProfile = 18, + + /// + /// Indicates successful identification of some speakers. + /// Added in version 1.12.0 + /// + RecognizedSpeakers = 19, + + /// + /// Indicates successfully verified one speaker. + /// Added in version 1.12.0 + /// + RecognizedSpeaker = 20, + + /// + /// Indicates a voice profile has been reset successfully. + /// Added in version 1.12.0 + /// + ResetVoiceProfile = 21, + + /// + /// Indicates a voice profile has been deleted successfully. + /// Added in version 1.12.0 + /// + DeletedVoiceProfile = 22, + + /// + /// Indicates the voices list has been retrieved successfully. + /// Added in version 1.16.0 + /// + VoicesListRetrieved = 23 +}; + +/// +/// Defines the possible reasons a recognition result might be canceled. +/// +enum class CancellationReason +{ + /// + /// Indicates that an error occurred during speech recognition. + /// + Error = 1, + + /// + /// Indicates that the end of the audio stream was reached. + /// + EndOfStream = 2, + + /// + /// Indicates that request was cancelled by the user. + /// Added in version 1.14.0 + /// + CancelledByUser = 3, +}; + +/// +/// Defines error code in case that CancellationReason is Error. +/// Added in version 1.1.0. +/// +enum class CancellationErrorCode +{ + /// + /// No error. + /// If CancellationReason is EndOfStream, CancellationErrorCode + /// is set to NoError. + /// + NoError = 0, + + /// + /// Indicates an authentication error. + /// An authentication error occurs if subscription key or authorization token is invalid, expired, + /// or does not match the region being used. + /// + AuthenticationFailure = 1, + + /// + /// Indicates that one or more recognition parameters are invalid or the audio format is not supported. + /// + BadRequest = 2, + + /// + /// Indicates that the number of parallel requests exceeded the number of allowed concurrent transcriptions for the subscription. + /// + TooManyRequests = 3, + + /// + /// Indicates that the free subscription used by the request ran out of quota. + /// + Forbidden = 4, + + /// + /// Indicates a connection error. + /// + ConnectionFailure = 5, + + /// + /// Indicates a time-out error when waiting for response from service. + /// + ServiceTimeout = 6, + + /// + /// Indicates that an error is returned by the service. + /// + ServiceError = 7, + + /// + /// Indicates that the service is currently unavailable. + /// + ServiceUnavailable = 8, + + /// + /// Indicates an unexpected runtime error. + /// + RuntimeError = 9, + + /// + /// Indicates the Speech Service is temporarily requesting a reconnect to a different endpoint. + /// + /// Used internally + ServiceRedirectTemporary = 10, + + /// + /// Indicates the Speech Service is permanently requesting a reconnect to a different endpoint. + /// + /// Used internally + ServiceRedirectPermanent = 11, + + /// + /// Indicates the embedded speech (SR or TTS) model is not available or corrupted. + /// + EmbeddedModelError = 12, +}; + +/// +/// Defines the possible reasons a recognition result might not be recognized. +/// +enum class NoMatchReason +{ + /// + /// Indicates that speech was detected, but not recognized. + /// + NotRecognized = 1, + + /// + /// Indicates that the start of the audio stream contained only silence, and the service timed out waiting for speech. + /// + InitialSilenceTimeout = 2, + + /// + /// Indicates that the start of the audio stream contained only noise, and the service timed out waiting for speech. + /// + InitialBabbleTimeout = 3, + + /// + /// Indicates that the spotted keyword has been rejected by the keyword verification service. + /// Added in version 1.5.0. + /// + KeywordNotRecognized = 4, + + /// + /// Indicates that the audio stream contained only silence after the last recognized phrase. + /// + EndSilenceTimeout = 5 +}; + +/// +/// Defines the possible types for an activity json value. +/// Added in version 1.5.0 +/// +enum class ActivityJSONType : int +{ + Null = 0, + Object = 1, + Array = 2, + String = 3, + Double = 4, + UInt = 5, + Int = 6, + Boolean = 7 +}; + + +/// +/// Defines the possible speech synthesis output audio formats. +/// Updated in version 1.19.0 +/// +enum class SpeechSynthesisOutputFormat +{ + /// + /// raw-8khz-8bit-mono-mulaw + /// + Raw8Khz8BitMonoMULaw = 1, + + /// + /// riff-16khz-16kbps-mono-siren + /// Unsupported by the service. Do not use this value. + /// + Riff16Khz16KbpsMonoSiren = 2, + + /// + /// audio-16khz-16kbps-mono-siren + /// Unsupported by the service. Do not use this value. + /// + Audio16Khz16KbpsMonoSiren = 3, + + /// + /// audio-16khz-32kbitrate-mono-mp3 + /// + Audio16Khz32KBitRateMonoMp3 = 4, + + /// + /// audio-16khz-128kbitrate-mono-mp3 + /// + Audio16Khz128KBitRateMonoMp3 = 5, + + /// + /// audio-16khz-64kbitrate-mono-mp3 + /// + Audio16Khz64KBitRateMonoMp3 = 6, + + /// + /// audio-24khz-48kbitrate-mono-mp3 + /// + Audio24Khz48KBitRateMonoMp3 =7, + + /// + /// audio-24khz-96kbitrate-mono-mp3 + /// + Audio24Khz96KBitRateMonoMp3 = 8, + + /// + /// audio-24khz-160kbitrate-mono-mp3 + /// + Audio24Khz160KBitRateMonoMp3 = 9, + + /// + /// raw-16khz-16bit-mono-truesilk + /// + Raw16Khz16BitMonoTrueSilk = 10, + + /// + /// riff-16khz-16bit-mono-pcm + /// + Riff16Khz16BitMonoPcm = 11, + + /// + /// riff-8khz-16bit-mono-pcm + /// + Riff8Khz16BitMonoPcm = 12, + + /// + /// riff-24khz-16bit-mono-pcm + /// + Riff24Khz16BitMonoPcm = 13, + + /// + /// riff-8khz-8bit-mono-mulaw + /// + Riff8Khz8BitMonoMULaw = 14, + + /// + /// raw-16khz-16bit-mono-pcm + /// + Raw16Khz16BitMonoPcm = 15, + + /// + /// raw-24khz-16bit-mono-pcm + /// + Raw24Khz16BitMonoPcm = 16, + + /// + /// raw-8khz-16bit-mono-pcm + /// + Raw8Khz16BitMonoPcm = 17, + + /// + /// ogg-16khz-16bit-mono-opus + /// + Ogg16Khz16BitMonoOpus = 18, + + /// + /// ogg-24khz-16bit-mono-opus + /// + Ogg24Khz16BitMonoOpus = 19, + + /// + /// raw-48khz-16bit-mono-pcm + /// + Raw48Khz16BitMonoPcm = 20, + + /// + /// riff-48khz-16bit-mono-pcm + /// + Riff48Khz16BitMonoPcm = 21, + + /// + /// audio-48khz-96kbitrate-mono-mp3 + /// + Audio48Khz96KBitRateMonoMp3 = 22, + + /// + /// audio-48khz-192kbitrate-mono-mp3 + /// + Audio48Khz192KBitRateMonoMp3 = 23, + + /// + /// ogg-48khz-16bit-mono-opus + /// Added in version 1.16.0 + /// + Ogg48Khz16BitMonoOpus = 24, + + /// + /// webm-16khz-16bit-mono-opus + /// Added in version 1.16.0 + /// + Webm16Khz16BitMonoOpus = 25, + + /// + /// webm-24khz-16bit-mono-opus + /// Added in version 1.16.0 + /// + Webm24Khz16BitMonoOpus = 26, + + /// + /// raw-24khz-16bit-mono-truesilk + /// Added in version 1.17.0 + /// + Raw24Khz16BitMonoTrueSilk = 27, + + /// + /// raw-8khz-8bit-mono-alaw + /// Added in version 1.17.0 + /// + Raw8Khz8BitMonoALaw = 28, + + /// + /// riff-8khz-8bit-mono-alaw + /// Added in version 1.17.0 + /// + Riff8Khz8BitMonoALaw = 29, + + /// + /// webm-24khz-16bit-24kbps-mono-opus + /// Audio compressed by OPUS codec in a WebM container, with bitrate of 24kbps, optimized for IoT scenario. + /// (Added in 1.19.0) + /// + Webm24Khz16Bit24KbpsMonoOpus = 30, + + /// + /// audio-16khz-16bit-32kbps-mono-opus + /// Audio compressed by OPUS codec without container, with bitrate of 32kbps. + /// (Added in 1.20.0) + /// + Audio16Khz16Bit32KbpsMonoOpus = 31, + + /// + /// audio-24khz-16bit-48kbps-mono-opus + /// Audio compressed by OPUS codec without container, with bitrate of 48kbps. + /// (Added in 1.20.0) + /// + Audio24Khz16Bit48KbpsMonoOpus = 32, + + /// + /// audio-24khz-16bit-24kbps-mono-opus + /// Audio compressed by OPUS codec without container, with bitrate of 24kbps. + /// (Added in 1.20.0) + /// + Audio24Khz16Bit24KbpsMonoOpus = 33, + + /// + /// raw-22050hz-16bit-mono-pcm + /// Raw PCM audio at 22050Hz sampling rate and 16-bit depth. + /// (Added in 1.22.0) + /// + Raw22050Hz16BitMonoPcm = 34, + + /// + /// riff-22050hz-16bit-mono-pcm + /// PCM audio at 22050Hz sampling rate and 16-bit depth, with RIFF header. + /// (Added in 1.22.0) + /// + Riff22050Hz16BitMonoPcm = 35, + + /// + /// raw-44100hz-16bit-mono-pcm + /// Raw PCM audio at 44100Hz sampling rate and 16-bit depth. + /// (Added in 1.22.0) + /// + Raw44100Hz16BitMonoPcm = 36, + + /// + /// riff-44100hz-16bit-mono-pcm + /// PCM audio at 44100Hz sampling rate and 16-bit depth, with RIFF header. + /// (Added in 1.22.0) + /// + Riff44100Hz16BitMonoPcm = 37, + + /// + /// amr-wb-16000hz + /// AMR-WB audio at 16kHz sampling rate. + /// (Added in 1.24.0) + /// + AmrWb16000Hz = 38, + + /// + /// g722-16khz-64kbps + /// G.722 audio at 16kHz sampling rate and 64kbps bitrate. + /// (Added in 1.38.0) + /// + G72216Khz64Kbps = 39 +}; + +/// +/// Defines the possible status of audio data stream. +/// Added in version 1.4.0 +/// +enum class StreamStatus +{ + /// + /// The audio data stream status is unknown + /// + Unknown = 0, + + /// + /// The audio data stream contains no data + /// + NoData = 1, + + /// + /// The audio data stream contains partial data of a speak request + /// + PartialData = 2, + + /// + /// The audio data stream contains all data of a speak request + /// + AllData = 3, + + /// + /// The audio data stream was canceled + /// + Canceled = 4 +}; + +/// +/// Defines channels used to pass property settings to service. +/// Added in version 1.5.0. +/// +enum class ServicePropertyChannel +{ + /// + /// Uses URI query parameter to pass property settings to service. + /// + UriQueryParameter = 0, + + /// + /// Uses HttpHeader to set a key/value in a HTTP header. + /// + HttpHeader = 1 +}; + +namespace Transcription +{ + /// + /// Why the participant changed event was raised + /// Added in version 1.8.0 + /// + enum class ParticipantChangedReason + { + /// + /// Participant has joined the conversation + /// + JoinedConversation = 0, + + /// + /// Participant has left the conversation. This could be voluntary, or involuntary + /// (e.g. they are experiencing networking issues) + /// + LeftConversation = 1, + + /// + /// The participants' state has changed (e.g. they became muted, changed their nickname) + /// + Updated = 2 + }; +} + +namespace Intent +{ + /// + /// Used to define the type of entity used for intent recognition. + /// + enum class EntityType + { + /// + /// This will match any text that fills the slot. + /// + Any = 0, + /// + /// This will match text that is contained within the list or any text if the mode is set to "fuzzy". + /// + List = 1, + /// + /// This will match cardinal and ordinal integers. + /// + PrebuiltInteger = 2 + }; + + /// + /// Used to define the type of entity used for intent recognition. + /// + enum class EntityMatchMode + { + /// + /// This is the basic or default mode of matching based on the EntityType + /// + Basic = 0, + /// + /// This will match only exact matches within the entities phrases. + /// + Strict = 1, + /// + /// This will match text within the slot the entity is in, but not require anything from that text. + /// + Fuzzy = 2 + }; + + /// + /// Used to define the greediness of the entity. + /// + enum class EntityGreed + { + /// + /// Lazy will match as little as possible. + /// + Lazy = 0, + /// + /// Greedy will match as much as possible. + /// + Greedy = 1, + }; +} +/// +/// Defines voice profile types +/// +enum class VoiceProfileType +{ + /// + /// Text independent speaker identification. + /// + TextIndependentIdentification = 1, + + /// + /// Text dependent speaker verification. + /// + TextDependentVerification = 2, + + /// + /// Text independent verification. + /// + TextIndependentVerification = 3 +}; + +/// +/// Defines the scope that a Recognition Factor is applied to. +/// +enum class RecognitionFactorScope +{ + /// + /// A Recognition Factor will apply to grammars that can be referenced as individual partial phrases. + /// + /// + /// Currently only applies to PhraseListGrammars + /// + PartialPhrase = 1, +}; + +/// +/// Defines the point system for pronunciation score calibration; default value is FivePoint. +/// Added in version 1.14.0 +/// +enum class PronunciationAssessmentGradingSystem +{ + /// + /// Five point calibration + /// + FivePoint = 1, + + /// + /// Hundred mark + /// + HundredMark = 2 +}; + +/// +/// Defines the pronunciation evaluation granularity; default value is Phoneme. +/// Added in version 1.14.0 +/// +enum class PronunciationAssessmentGranularity +{ + /// + /// Shows the score on the full text, word and phoneme level + /// + Phoneme = 1, + + /// + /// Shows the score on the full text and word level + /// + Word = 2, + + /// + /// Shows the score on the full text level only + /// + FullText = 3 +}; + +/// +/// Defines the type of synthesis voices +/// Added in version 1.16.0 +/// +enum class SynthesisVoiceType +{ + /// + /// Online neural voice + /// + OnlineNeural = 1, + + /// + /// Online standard voice + /// + OnlineStandard = 2, + + /// + /// Offline neural voice + /// + OfflineNeural = 3, + + /// + /// Offline standard voice + /// + OfflineStandard = 4 +}; + +/// +/// Defines the gender of synthesis voices +/// Added in version 1.17.0 +/// +enum class SynthesisVoiceGender +{ + /// + /// Gender unknown. + /// + Unknown = 0, + + /// + /// Female voice + /// + Female = 1, + + /// + /// Male voice + /// + Male = 2, + + /// + /// Neutral voice + /// + Neutral = 3 +}; + +/// +/// Defines the status of synthesis voices +/// +enum class SynthesisVoiceStatus +{ + /// + /// Voice status unknown. + /// + Unknown = 0, + + /// + /// Voice is generally available. + /// + GeneralAvailability = 1, + + /// + /// Voice is in preview. + /// + Preview = 2, + + /// + /// Voice is deprecated, do not use. + /// + Deprecated = 3, +}; + +/// +/// Defines the boundary type of speech synthesis boundary event +/// Added in version 1.21.0 +/// +enum class SpeechSynthesisBoundaryType +{ + /// + /// Word boundary + /// + Word = 0, + + /// + /// Punctuation boundary + /// + Punctuation = 1, + + /// + /// Sentence boundary + /// + Sentence = 2 +}; + +/// +/// The strategy used to determine when a spoken phrase has ended and a final Recognized result should be generated. +/// Allowed values are "Default", "Time", and "Semantic". +/// +enum class SegmentationStrategy +{ + /// + /// Use the default strategy and settings as determined by the Speech Service. Use in most situations. + /// + Default = 0, + + /// + /// Uses a time based strategy where the amount of silence between speech is used to determine when to generate a final result. + /// + /// + /// When using the time strategy, the property can be used to adjust the amount of silence needed to determine the end of a spoken phrase, + /// and the property can be used to adjust the maximum length of a spoken phrase. + /// + Time = 1, + + /// + /// Uses an AI model to deterine the end of a spoken phrase based on the content of the phrase. + /// + /// + /// The semantic strategy has no control properties available. + /// + Semantic = 2 +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_event_logger.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_event_logger.h new file mode 100644 index 0000000..4e67cb1 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_event_logger.h @@ -0,0 +1,108 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Diagnostics { +namespace Logging { + +/// +/// Class with static methods to control callback-based SDK logging. +/// Turning on logging while running your Speech SDK scenario provides +/// detailed information from the SDK's core native components. If you +/// report an issue to Microsoft, you may be asked to provide logs to help +/// Microsoft diagnose the issue. Your application should not take dependency +/// on particular log strings, as they may change from one SDK release to another +/// without notice. +/// Use EventLogger when you want to get access to new log strings as soon +/// as they are available, and you need to further process them. For example, +/// integrating Speech SDK logs with your existing logging collection system. +/// Added in version 1.20.0 +/// +/// Event logging is a process wide construct. That means that if (for example) +/// you have multiple speech recognizer objects running in parallel, you can only register +/// one callback function to receive interleaved logs from all recognizers. You cannot register +/// a separate callback for each recognizer. +class EventLogger +{ +public: + using CallbackFunction_Type = ::std::function; + + /// + /// Register a callback function that will be invoked for each new log messages. + /// + /// callback function to call. Set a nullptr value + /// to stop the Event Logger. + /// You can only register one callback function. This call will happen on a working thread of the SDK, + /// so the log string should be copied somewhere for further processing by another thread, and the function should return immediately. + /// No heavy processing or network calls should be done in this callback function. + static void SetCallback(CallbackFunction_Type callback = nullptr) + { + AZAC_THROW_ON_FAIL(diagnostics_logmessage_set_callback(nullptr == callback ? nullptr : LineLogged)); + + SetOrGet(true, callback); + } + + /// + /// Sets or clears filters for callbacks. + /// Once filters are set, the callback will be invoked only if the log string + /// contains at least one of the strings specified by the filters. The match is case sensitive. + /// + /// Optional. Filters to use, or an empty list to clear previously set filters + static void SetFilters(std::initializer_list filters = {}) + { + std::string str = ""; + + if (filters.size() > 0) + { + std::ostringstream filtersCollapsed; + std::copy(filters.begin(), filters.end(), std::ostream_iterator(filtersCollapsed, ";")); + str = filtersCollapsed.str(); + } + + AZAC_THROW_ON_FAIL(diagnostics_logmessage_set_filters(str.c_str())); + } + + /// + /// Sets the level of the messages to be captured by the logger + /// + /// Maximum level of detail to be captured by the logger. + static void SetLevel(Level level) + { + const auto levelStr = Details::LevelToString(level); + diagnostics_set_log_level("event", levelStr); + } + +private: + static CallbackFunction_Type SetOrGet(bool set, CallbackFunction_Type callback) + { + static CallbackFunction_Type staticCallback = nullptr; + if (set) + { + staticCallback = callback; + } + return staticCallback; + } + + static void LineLogged(const char* line) + { + auto callback = SetOrGet(false, nullptr); + if (nullptr != callback) + { + callback(line); + } + } +}; +}}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_eventargs.h new file mode 100644 index 0000000..8142268 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_eventargs.h @@ -0,0 +1,47 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_eventargs.h: Public API declarations for EventArgs C++ base class +// + +#pragma once +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Base class for event arguments. +/// +class EventArgs +{ +public: + + /// + /// Destructor. + /// + virtual ~EventArgs() {} + +protected: + + /*! \cond PROTECTED */ + + /// + /// Constructor. + /// + EventArgs() {}; + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(EventArgs); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_eventsignal.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_eventsignal.h new file mode 100644 index 0000000..a1e10c0 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_eventsignal.h @@ -0,0 +1,200 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_eventsignal.h: Public API declarations for the EventSignal class. This derives from +// EventSignalBase and uses runtime type information (RTTI) to facilitate management and disconnection of handlers +// without explicit callback token management. +// + +#pragma once +#include +#include +#include +#include + +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Clients can connect to the event signal to receive events, or disconnect from the event signal to stop receiving events. +/// +/// +/// At construction time, connect and disconnect callbacks can be provided that are called when +/// the number of connected clients changes from zero to one or one to zero, respectively. +/// +// +template +class EventSignal : public EventSignalBase +{ +public: + /// + /// Callback type that is used for signalling the event to connected clients. + /// + using CallbackFunction = std::function; + + /// + /// A monotonically increasing token used for registration, tracking, and unregistration of callbacks. + /// + using CallbackToken = uint32_t; + + /// + /// Type for callbacks used when any client connects to the signal (the number of connected clients changes from zero to one) or + /// the last client disconnects from the signal (the number of connected clients changes from one to zero). + /// + using NotifyCallback_Type = std::function&)>; + + /// + /// Constructs an event signal with empty register and disconnect callbacks. + /// + EventSignal() : EventSignal(nullptr) + { + } + + /// + /// Constructor. + /// + /// Callback to invoke if the number of connected clients changes from zero to one, or one to zero + EventSignal(NotifyCallback_Type connectedAndDisconnected) + : EventSignal(connectedAndDisconnected, connectedAndDisconnected) + { + } + + /// + /// Constructor. + /// + /// Callback to invoke if the number of connected clients changes from zero to one. + /// Callback to invoke if the number of connected clients changes from one to zero. + EventSignal(NotifyCallback_Type connected, NotifyCallback_Type disconnected) + : EventSignalBase() + , m_firstConnectedCallback(connected) + , m_lastDisconnectedCallback(disconnected) + { + } + + /// + /// Addition assignment operator overload. + /// Connects the provided callback to the event signal, see also . + /// + /// Callback to connect. + /// Event signal reference. + EventSignal& operator+=(CallbackFunction callback) + { + Connect(callback); + return *this; + } + + /// + /// Subtraction assignment operator overload. + /// Disconnects the provided callback from the event signal, see also . + /// + /// Callback to disconnect. + /// Event signal reference. + EventSignal& operator-=(CallbackFunction callback) + { + Disconnect(callback); + return *this; + } + + /// + /// Connects given callback function to the event signal, to be invoked when the event is signalled. + /// + /// + /// When the number of connected clients changes from zero to one, the connect callback will be called, if provided. + /// + /// Callback to connect. + void Connect(CallbackFunction callback) + { + std::unique_lock lock(m_mutex); + + auto shouldFireFirstConnected = m_callbacks.empty() && m_firstConnectedCallback != nullptr; + + (void)EventSignalBase::RegisterCallback(callback); + + lock.unlock(); + + if (shouldFireFirstConnected) + { + m_firstConnectedCallback(*this); + } + } + +#ifndef AZAC_CONFIG_CXX_NO_RTTI + /// + /// Disconnects given callback. + /// + /// + /// When the number of connected clients changes from one to zero, the disconnect callback will be called, if provided. + /// + /// Callback function. + void Disconnect(CallbackFunction callback) + { + std::unique_lock lock(m_mutex); + + auto itMatchingCallback = std::find_if( + m_callbacks.begin(), + m_callbacks.end(), + [&](const std::pair& item) + { + return callback.target_type() == item.second.target_type(); + }); + + auto removeHappened = EventSignal::UnregisterCallback(itMatchingCallback->first); + lock.unlock(); + if (removeHappened && m_callbacks.empty() && m_lastDisconnectedCallback != nullptr) + { + m_lastDisconnectedCallback(*this); + } + } +#else + void Disconnect(CallbackFunction) + { + // Callback disconnection without a stored token requires runtime type information. + // To remove callbacks with RTTI disabled, use UnregisterCallback(token). + SPX_THROW_HR(SPXERR_NOT_IMPL); + } +#endif + + /// + /// Disconnects all registered callbacks. + /// + void DisconnectAll() + { + std::unique_lock lock(m_mutex); + auto shouldFireLastDisconnected = !m_callbacks.empty() && m_lastDisconnectedCallback != nullptr; + + EventSignal::UnregisterAllCallbacks(); + + lock.unlock(); + + if (shouldFireLastDisconnected) + { + m_lastDisconnectedCallback(*this); + } + } + + /// + /// Signals the event with given arguments to all connected callbacks. + /// + /// Event arguments to signal. + void Signal(T t) + { + EventSignalBase::Signal(t); + } + +private: + using EventSignalBase::m_mutex; + using EventSignalBase::m_callbacks; + + NotifyCallback_Type m_firstConnectedCallback; + NotifyCallback_Type m_lastDisconnectedCallback; + + EventSignal(const EventSignal&) = delete; + EventSignal(const EventSignal&&) = delete; + EventSignal& operator=(const EventSignal&) = delete; +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_eventsignalbase.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_eventsignalbase.h new file mode 100644 index 0000000..4450283 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_eventsignalbase.h @@ -0,0 +1,164 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_eventsignalbase.h: Public API declarations for EventSignalBase C++ template class +// + +#pragma once +#include +#include +#include +#include +#include + +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Clients can connect to the event signal to receive events, or disconnect from the event signal to stop receiving events. +/// +/// +/// At construction time, connect and disconnect callbacks can be provided that are called when +/// the number of connected clients changes from zero to one or one to zero, respectively. +/// +// +template +class EventSignalBase +{ +public: + /// + /// Constructs an event signal with empty connect and disconnect actions. + /// + EventSignalBase() : + m_nextCallbackToken(0) + { + } + + /// + /// Destructor. + /// + virtual ~EventSignalBase() + { + UnregisterAllCallbacks(); + } + + /// + /// Callback type that is used for signalling the event to connected clients. + /// + using CallbackFunction = std::function; + + /// + /// The argument type for the callback event + /// + using CallbackArgument = T; + + /// + /// A monotonically increasing token used for registration, tracking, and unregistration of callbacks. + /// + using CallbackToken = uint32_t; + + /// + /// Registers a callback to this EventSignalBase and assigns it a unique token. + /// + /// The callback to register. + /// + /// The new token associated with this registration that can be used for subsequent unregistration. + /// + CallbackToken RegisterCallback(CallbackFunction callback) + { + std::unique_lock lock(m_mutex); + + auto token = m_nextCallbackToken; + m_nextCallbackToken++; + + m_callbacks.emplace(token, callback); + + return token; + } + + /// + /// If present, unregisters a callback from this EventSource associated with the provided token. Tokens are + /// returned from RegisterCallback at the time of registration. + /// + /// + /// The token associated with the callback to be removed. This token is provided by the return value of + /// RegisterCallback at the time of registration. + /// + /// A value indicating whether any callback was unregistered in response to this request. + bool UnregisterCallback(CallbackToken token) + { + std::unique_lock lock(m_mutex); + return (bool)m_callbacks.erase(token); + } + + /// + /// Function call operator. + /// Signals the event with given arguments to connected clients, see also . + /// + /// Event arguments to signal. + void operator()(T t) + { + Signal(t); + } + + /// + /// Unregisters all registered callbacks. + /// + void UnregisterAllCallbacks() + { + std::unique_lock lock(m_mutex); + m_callbacks.clear(); + } + + /// + /// Signals the event with given arguments to all connected callbacks. + /// + /// Event arguments to signal. + void Signal(T t) + { + std::unique_lock lock(m_mutex); + + auto callbacksSnapshot = m_callbacks; + for (auto callbackCopyPair : callbacksSnapshot) + { + // now, while a callback is in progress, it can disconnect itself and any other connected + // callback. Check to see if the next one stored in the copy container is still connected. + bool stillConnected = (std::find_if(m_callbacks.begin(), m_callbacks.end(), + [&](const std::pair item) { + return callbackCopyPair.first == item.first; + }) != m_callbacks.end()); + + if (stillConnected) + { + callbackCopyPair.second(t); + } + } + } + + /// + /// Checks if a callback is connected. + /// + /// true if a callback is connected + bool IsConnected() const + { + std::unique_lock lock(m_mutex); + return !m_callbacks.empty(); + } + +protected: + std::map m_callbacks; + CallbackToken m_nextCallbackToken; + mutable std::recursive_mutex m_mutex; + +private: + EventSignalBase(const EventSignalBase&) = delete; + EventSignalBase(const EventSignalBase&&) = delete; + EventSignalBase& operator=(const EventSignalBase&) = delete; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_file_logger.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_file_logger.h new file mode 100644 index 0000000..7638d09 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_file_logger.h @@ -0,0 +1,115 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Diagnostics { +namespace Logging { + +/// +/// Class with static methods to control file-based SDK logging. +/// Turning on logging while running your Speech SDK scenario provides +/// detailed information from the SDK's core native components. If you +/// report an issue to Microsoft, you may be asked to provide logs to help +/// Microsoft diagnose the issue. Your application should not take dependency +/// on particular log strings, as they may change from one SDK release to another +/// without notice. +/// FileLogger is the simplest logging solution and suitable for diagnosing +/// most on-device issues when running Speech SDK. +/// Added in version 1.20.0 +/// +/// File logging is a process wide construct. That means that if (for example) +/// you have multiple speech recognizer objects running in parallel, there will be one +/// log file containing interleaved logs lines from all recognizers. You cannot get a +/// separate log file for each recognizer. +class FileLogger +{ +public: + /// + /// Starts logging to a file. + /// + /// Path to a log file on local disk + /// Optional. If true, appends to existing log file. If false, creates a new log file + /// Note that each write operation to the file is immediately followed by a flush to disk. + /// For typical usage (e.g. one Speech Recognizer and a Solid State Drive (SSD)) this should not + /// cause performace issues. You may however want to avoid file logging when running many Speech + /// SDK recognizers or other SDK objects simultaneously. Use MemoryLogger or EventLogger instead. + static void Start(const SPXSTRING& filePath, bool append = false) + { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, filePath.empty()); + + SPX_THROW_ON_FAIL(property_bag_create(&hpropbag)); + SPX_THROW_ON_FAIL(property_bag_set_string(hpropbag, -1, "SPEECH-LogFilename", Utils::ToUTF8(filePath).c_str())); + SPX_THROW_ON_FAIL(property_bag_set_string(hpropbag, -1, "SPEECH-AppendToLogFile", append ? "1" : "0")); + SPX_THROW_ON_FAIL(diagnostics_log_start_logging(hpropbag, nullptr)); + SPX_THROW_ON_FAIL(property_bag_release(hpropbag)); + } + + /// + /// Stops logging to a file. + /// + /// This call is optional. If logging as been started, + /// the log file will be written when the process exists normally. + static void Stop() + { + SPX_THROW_ON_FAIL(diagnostics_log_stop_logging()); + } + + /// + /// Sets or clears the filters that apply to file logging. + /// Once filters are set, the callback will be invoked only if the log string + /// contains at least one of the strings specified by the filters. The match is case sensitive. + /// + /// Optional. Filters to use, or an empty list to remove previously set filters. + static void SetFilters(std::initializer_list filters = {}) + { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(property_bag_create(&hpropbag)); + + PropBagSetFilter(hpropbag, filters); + + SPX_THROW_ON_FAIL(diagnostics_log_apply_properties(hpropbag, nullptr)); + SPX_THROW_ON_FAIL(property_bag_release(hpropbag)); + } + + /// + /// Sets the level of the messages to be captured by the logger + /// + /// Maximum level of detail to be captured by the logger. + static void SetLevel(Level level) + { + const auto levelStr = Details::LevelToString(level); + diagnostics_set_log_level("memory", levelStr); + } + +private: + static void PropBagSetFilter(AZAC_HANDLE hpropbag, std::initializer_list filters) + { + std::string str = ""; + + if (filters.size() > 0) + { + std::ostringstream filtersCollapsed; + std::copy(filters.begin(), filters.end(), std::ostream_iterator(filtersCollapsed, ";")); + str = filtersCollapsed.str(); + } + + SPX_THROW_ON_FAIL(property_bag_set_string(hpropbag, -1, "SPEECH-LogFileFilters", str.c_str())); + } +}; + +}}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_grammar.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_grammar.h new file mode 100644 index 0000000..056e0a1 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_grammar.h @@ -0,0 +1,70 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_grammar.h: Public API declarations for Grammar C++ class +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Represents base class grammar for customizing speech recognition. +/// Added in version 1.5.0. +/// +class Grammar +{ +public: + + /// + /// Creates a grammar from a storage ID. + /// Added in version 1.7.0. + /// + /// The persisted storage ID of the language model. + /// The grammar. + /// + /// Creating a grammar from a storage ID is only usable in specific scenarios and is not generally possible. + /// + static std::shared_ptr FromStorageId(const SPXSTRING& storageId) + { + SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(grammar_create_from_storage_id(&hgrammar, Utils::ToUTF8(storageId.c_str()))); + + return std::make_shared(hgrammar); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Grammar handle. + explicit Grammar(SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID) : m_hgrammar(hgrammar) { } + + /// + /// Destructor, does nothing. + /// + virtual ~Grammar() { } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXGRAMMARHANDLE() { return m_hgrammar; } + +protected: + /*! \cond PROTECTED */ + DISABLE_COPY_AND_MOVE(Grammar); + + SmartHandle m_hgrammar; + /*! \endcond */ +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_grammar_list.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_grammar_list.h new file mode 100644 index 0000000..1118bca --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_grammar_list.h @@ -0,0 +1,90 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_grammar_list.h: Public API declarations for GrammarList C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Represents a list of grammars for dynamic grammar scenarios. +/// Added in version 1.7.0. +/// +/// +/// GrammarLists are only usable in specific scenarios and are not generally available. +/// +class GrammarList : public Grammar +{ +public: + + /// + /// Creates a grammar lsit for the specified recognizer. + /// + /// The recognizer from which to obtain the grammar list. + /// The grammar list associated with the recognizer. + /// + /// Creating a grammar list from a recognizer is only usable in specific scenarios and is not generally available. + /// + template + static std::shared_ptr FromRecognizer(std::shared_ptr recognizer) + { + SPXRECOHANDLE hreco = recognizer != nullptr + ? (SPXRECOHANDLE)(*recognizer.get()) + : SPXHANDLE_INVALID; + + SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(grammar_list_from_recognizer(&hgrammar, hreco)); + + return std::make_shared(hgrammar); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// GrammarList handle. + explicit GrammarList(SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID) : Grammar(hgrammar) { } + + /// + /// Adds a single grammar to the current grammar list + /// + /// The grammar to add + /// + /// Currently Class Language Models are the only support grammars to add. + /// + template + void Add(std::shared_ptr grammar) + { + SPX_THROW_ON_FAIL(grammar_list_add_grammar(m_hgrammar.get(), (SPXGRAMMARHANDLE)(*grammar.get()))); + } + + /// + /// Sets the Recognition Factor applied to all grammars in a recognizer's GrammarList + /// + /// The RecognitionFactor to apply + /// The scope for the Recognition Factor being set + /// + /// The Recognition Factor is a numerical value greater than 0 modifies the default weight applied to supplied grammars. + /// Setting the Recognition Factor to 0 will disable the supplied grammars. + /// The default Recognition Factor is 1. + /// + void SetRecognitionFactor(double factor, RecognitionFactorScope scope) + { + SPX_THROW_ON_FAIL(grammar_list_set_recognition_factor(m_hgrammar.get(), factor, (GrammarList_RecognitionFactorScope)scope)); + } + +private: + DISABLE_COPY_AND_MOVE(GrammarList); +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_grammar_phrase.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_grammar_phrase.h new file mode 100644 index 0000000..2c35b9d --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_grammar_phrase.h @@ -0,0 +1,64 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_phrase_grammar.h: Public API declarations for GrammarPhrase C++ class +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Represents a phrase that may be spoken by the user. +/// Added in version 1.5.0. +/// +class GrammarPhrase +{ +public: + + /// + /// Creates a grammar phrase using the specified phrase text. + /// + /// The text representing a phrase that may be spoken by the user. + /// A shared pointer to a grammar phrase. + static std::shared_ptr From(const SPXSTRING& text) + { + SPXPHRASEHANDLE hphrase = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(grammar_phrase_create_from_text(&hphrase, Utils::ToUTF8(text).c_str())); + return std::make_shared(hphrase); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Grammar phrase handle. + explicit GrammarPhrase(SPXPHRASEHANDLE hphrase) : m_hphrase(hphrase) { }; + + /// + /// Virtual destructor + /// + virtual ~GrammarPhrase() { } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXPHRASEHANDLE() { return m_hphrase; } + +private: + + DISABLE_DEFAULT_CTORS(GrammarPhrase); + + SmartHandle m_hphrase; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_hybrid_speech_config.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_hybrid_speech_config.h new file mode 100644 index 0000000..39ca52e --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_hybrid_speech_config.h @@ -0,0 +1,161 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_hybrid_speech_config.h: Public API declarations for HybridSpeechConfig C++ class +// +#pragma once + +#include + +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines hybrid (cloud and embedded) configurations for speech recognition or speech synthesis. +/// +class HybridSpeechConfig +{ +protected: + /*! \cond PROTECTED */ + + SpeechConfig m_config; + + /*! \endcond */ + +public: + /// + /// Internal operator used to get the underlying handle value. + /// + /// A handle. + explicit operator SPXSPEECHCONFIGHANDLE() const + { + return static_cast(m_config); + } + + /// + /// Creates an instance of the hybrid speech config with specified cloud and embedded speech configs. + /// + /// A shared smart pointer of a cloud speech config. + /// A shared smart pointer of an embedded speech config. + /// A shared pointer to the new hybrid speech config instance. + static std::shared_ptr FromConfigs( + std::shared_ptr cloudSpeechConfig, + std::shared_ptr embeddedSpeechConfig) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hybrid_speech_config_create( + &hconfig, + Utils::HandleOrInvalid(cloudSpeechConfig), + Utils::HandleOrInvalid(embeddedSpeechConfig))); + + auto ptr = new HybridSpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Sets the speech recognition output format. + /// + /// Speech recognition output format (simple or detailed). + void SetSpeechRecognitionOutputFormat(OutputFormat format) + { + m_config.SetOutputFormat(format); + } + + /// + /// Gets the speech recognition output format. + /// + /// Speech recognition output format (simple or detailed). + OutputFormat GetSpeechRecognitionOutputFormat() const + { + return m_config.GetOutputFormat(); + } + + /// + /// Sets the speech synthesis output format (e.g. Riff16Khz16BitMonoPcm). + /// + /// Specifies the output format ID + void SetSpeechSynthesisOutputFormat(SpeechSynthesisOutputFormat formatId) + { + m_config.SetSpeechSynthesisOutputFormat(formatId); + } + + /// + /// Gets the speech synthesis output format. + /// + /// The speech synthesis output format. + SPXSTRING GetSpeechSynthesisOutputFormat() const + { + return m_config.GetSpeechSynthesisOutputFormat(); + } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + m_config.SetProperty(name, value); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + m_config.SetProperty(id, value); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + return m_config.GetProperty(name); + } + + /// + /// Gets a property value by ID. + /// + /// The parameter id. + /// The property value. + SPXSTRING GetProperty(PropertyId id) const + { + return m_config.GetProperty(id); + } + + /// + /// Destructs the object. + /// + virtual ~HybridSpeechConfig() = default; + +protected: + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + inline explicit HybridSpeechConfig(SPXSPEECHCONFIGHANDLE hconfig) : m_config(hconfig) + { + } + + /*! \endcond */ + +private: + DISABLE_COPY_AND_MOVE(HybridSpeechConfig); + + }; + +}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_recognition_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_recognition_eventargs.h new file mode 100644 index 0000000..dda06e8 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_recognition_eventargs.h @@ -0,0 +1,169 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_intent_recognition_eventargs.h: Public API declarations for IntentRecognitionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + + +/// +/// Class for intent recognition event arguments. +/// +class IntentRecognitionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit IntentRecognitionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(IntentResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~IntentRecognitionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(recognizer_event_handle_release(m_hevent)); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Intent recognition event result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Intent recognition event result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(IntentRecognitionEventArgs); + + SPXRESULTHANDLE IntentResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + + +/// +/// Class for intent recognition canceled event arguments. +/// +class IntentRecognitionCanceledEventArgs final : public IntentRecognitionEventArgs +{ +private: + + std::shared_ptr m_cancellation; + CancellationReason m_cancellationReason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit IntentRecognitionCanceledEventArgs(SPXEVENTHANDLE hevent) : + IntentRecognitionEventArgs(hevent), + m_cancellation(CancellationDetails::FromResult(GetResult())), + m_cancellationReason(m_cancellation->Reason), + m_errorCode(m_cancellation->ErrorCode), + Reason(m_cancellationReason), + ErrorCode(m_errorCode), + ErrorDetails(m_cancellation->ErrorDetails) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + virtual ~IntentRecognitionCanceledEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// Added in version 1.1.0. + /// + const CancellationErrorCode& ErrorCode; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// CancellationDetails. + /// + std::shared_ptr GetCancellationDetails() const { return m_cancellation; } + +private: + + DISABLE_DEFAULT_CTORS(IntentRecognitionCanceledEventArgs); +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_recognition_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_recognition_result.h new file mode 100644 index 0000000..360e846 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_recognition_result.h @@ -0,0 +1,119 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_intent_recognition_result.h: Public API declarations for IntentRecognitionResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include "speechapi_c_json.h" + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// Represents the result of an intent recognition. +/// +class IntentRecognitionResult final : public RecognitionResult +{ +public: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Result handle. + explicit IntentRecognitionResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult), + IntentId(m_intentId) + { + PopulateIntentFields(hresult, &m_intentId); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s; reason=0x%x; text=%s", __FUNCTION__, (void*)this, (void*)Handle, Utils::ToUTF8(ResultId).c_str(), Reason, Utils::ToUTF8(Text).c_str()); + } + + /// + /// A call to return a map of the entities found in the utterance. + /// + /// + /// A map with the entity name as a key and containing the value of the entity found in the utterance. + /// + /// + /// This currently does not report LUIS entities. + /// + const std::map& GetEntities() const + { + return m_entities; + } + + /// + /// Destructor. + /// + ~IntentRecognitionResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)Handle); + } + + /// + /// Unique intent id. + /// + const SPXSTRING& IntentId; + +private: + DISABLE_DEFAULT_CTORS(IntentRecognitionResult); + + void PopulateIntentFields(SPXRESULTHANDLE hresult, SPXSTRING* pintentId) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 1024; + char sz[maxCharCount+1] = {}; + + if (pintentId != nullptr && recognizer_result_handle_is_valid(hresult)) + { + SPX_THROW_ON_FAIL(hr = intent_result_get_intent_id(hresult, sz, maxCharCount)); + *pintentId = Utils::ToSPXString(sz); + } + + auto jsonSLE = Properties.GetProperty("LanguageUnderstandingSLE_JsonResult"); + SPXHANDLE parserHandle = SPXHANDLE_INVALID; + auto scopeGuard = Utils::MakeScopeGuard([&parserHandle]() + { + if (parserHandle != SPXHANDLE_INVALID) + { + ai_core_json_parser_handle_release(parserHandle); + } + }); + + auto root = ai_core_json_parser_create(&parserHandle, jsonSLE.c_str(), jsonSLE.size()); + int count = ai_core_json_item_count(parserHandle, root); + for (int i = 0; i < count; i++) + { + auto itemInt = ai_core_json_item_at(parserHandle, root, i, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, itemInt); + + // Need to use string copy here to force the ajv json parser to convert back to utf8. + auto name = ai_core_json_value_as_string_copy(parserHandle, nameInt, ""); + auto value = ai_core_json_value_as_string_copy(parserHandle, itemInt, ""); + if (value != nullptr && name != nullptr) + { + m_entities[name] = value; + } + } + + } + + SPXSTRING m_intentId; + std::map m_entities; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Recognition::Intent diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_recognizer.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_recognizer.h new file mode 100644 index 0000000..473053d --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_recognizer.h @@ -0,0 +1,513 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_intent_recognizer.h: Public API declarations for IntentRecognizer C++ class +// + +#pragma once +#include +#include +#include +#include "speechapi_c_json.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// In addition to performing speech-to-text recognition, the IntentRecognizer extracts structured information +/// about the intent of the speaker, which can be used to drive further actions using dedicated intent triggers +/// (see ). +/// + class IntentRecognizer : public AsyncRecognizer + { + public: + + using BaseType = AsyncRecognizer; + + /// + /// Creates an intent recognizer from a speech config and an audio config. + /// Users should use this function to create a new instance of an intent recognizer. + /// + /// Speech configuration. + /// Audio configuration. + /// Instance of intent recognizer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_intent_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Creates an intent recognizer from an embedded speech config and an audio config. + /// Users should use this function to create a new instance of an intent recognizer. + /// Added in version 1.19.0 + /// + /// Embedded speech configuration. + /// Audio configuration. + /// Instance of intent recognizer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_intent_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit IntentRecognizer(SPXRECOHANDLE hreco) : BaseType(hreco), Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// destructor + /// + ~IntentRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + /// + /// Starts intent recognition, and returns after a single utterance is recognized. The end of a + /// single utterance is determined by listening for silence at the end or until a maximum of about 30 + /// seconds of audio is processed. The task returns the recognition text as result. + /// Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single + /// shot recognition like command or query. + /// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead.. + /// + /// Future containing result value (a shared pointer to IntentRecognitionResult) + /// of the asynchronous intent recognition. + /// + std::future> RecognizeOnceAsync() override + { + return BaseType::RecognizeOnceAsyncInternal(); + } + + /// + /// Starts intent recognition, and generates a result from the text passed in. This is useful for testing and other times when the speech input + /// is not tied to the IntentRecognizer. + /// Note: The Intent Service does not currently support this so it is only valid for offline pattern matching or exact matching intents. + /// + /// The text to be evaluated. + /// Future containing result value (a shared pointer to IntentRecognitionResult) + /// of the asynchronous intent recognition. + /// + std::future> RecognizeOnceAsync(SPXSTRING text) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, text]() -> std::shared_ptr { + SPX_INIT_HR(hr); + + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(hr = intent_recognizer_recognize_text_once(m_hreco, Utils::ToUTF8(text).c_str(), &hresult)); + + return std::make_shared(hresult); + }); + return future; + } + + /// + /// Asynchronously initiates continuous intent recognition operation. + /// + /// An empty future. + std::future StartContinuousRecognitionAsync() override + { + return BaseType::StartContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously terminates ongoing continuous intent recognition operation. + /// + /// An empty future. + std::future StopContinuousRecognitionAsync() override + { + return BaseType::StopContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously initiates keyword recognition operation. + /// + /// Specifies the keyword model to be used. + /// An empty future. + std::future StartKeywordRecognitionAsync(std::shared_ptr model) override + { + return BaseType::StartKeywordRecognitionAsyncInternal(model); + } + + /// + /// Asynchronously terminates keyword recognition operation. + /// + /// An empty future. + std::future StopKeywordRecognitionAsync() override + { + return BaseType::StopKeywordRecognitionAsyncInternal(); + } + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Adds a simple phrase that may be spoken by the user, indicating a specific user intent. + /// This simple phrase can be a pattern including and enitity surrounded by braces. Such as "click the {checkboxName} checkbox". + /// + /// + /// The phrase corresponding to the intent. + /// Once recognized, the IntentRecognitionResult's IntentId property will match the simplePhrase specified here. + /// If any entities are specified and matched, they will be available in the IntentResult->GetEntities() call. + /// + void AddIntent(const SPXSTRING& simplePhrase) + { + auto trigger = IntentTrigger::From(simplePhrase); + return AddIntent(trigger, simplePhrase); + } + + /// + /// Adds a simple phrase that may be spoken by the user, indicating a specific user intent. + /// This simple phrase can be a pattern including and enitity surrounded by braces. Such as "click the {checkboxName} checkbox". + /// + /// The phrase corresponding to the intent. + /// A custom id string to be returned in the IntentRecognitionResult's IntentId property. + /// Once recognized, the result's intent id will match the id supplied here. + /// If any entities are specified and matched, they will be available in the IntentResult->GetEntities() call. + /// + void AddIntent(const SPXSTRING& simplePhrase, const SPXSTRING& intentId) + { + auto trigger = IntentTrigger::From(simplePhrase); + return AddIntent(trigger, intentId); + } + + /// + /// Adds a single intent by name from the specified Language Understanding Model. + /// For PatternMatchingModel and ConversationalLanguageUnderstandingModel types, this will clear + /// any existing models before enabling it. For these types, the intentName is ignored. + /// + /// The language understanding model containing the intent. + /// The name of the single intent to be included from the language understanding model. + /// Once recognized, the IntentRecognitionResult's IntentId property will contain the intentName specified here. + void AddIntent(std::shared_ptr model, const SPXSTRING& intentName) + { + switch (model->GetModelType()) + { + case LanguageUnderstandingModel::LanguageUnderstandingModelType::LanguageUnderstandingModel: + { + auto trigger = IntentTrigger::From(model, intentName); + AddIntent(trigger, intentName); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::PatternMatchingModel: + { + intent_recognizer_clear_language_models(m_hreco); + AddPatternMatchingModel(model); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel: + { + intent_recognizer_clear_language_models(m_hreco); + auto cluModel = static_cast(model.get()); + intent_recognizer_add_conversational_language_understanding_model( + m_hreco, + cluModel->languageResourceKey.c_str(), + cluModel->endpoint.c_str(), + cluModel->projectName.c_str(), + cluModel->deploymentName.c_str()); + break; + } + default: + break; + } + } + + /// + /// Adds a single intent by name from the specified Language Understanding Model. + /// For PatternMatchingModel and ConversationalLanguageUnderstandingModel types, this will clear + /// any existing models before enabling it. For these types, the intentName and intentId are ignored. + /// + /// The language understanding model containing the intent. + /// The name of the single intent to be included from the language understanding model. + /// A custom id string to be returned in the IntentRecognitionResult's IntentId property. + void AddIntent(std::shared_ptr model, const SPXSTRING& intentName, const SPXSTRING& intentId) + { + switch (model->GetModelType()) + { + case LanguageUnderstandingModel::LanguageUnderstandingModelType::LanguageUnderstandingModel: + { + auto trigger = IntentTrigger::From(model, intentName); + AddIntent(trigger, intentId); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::PatternMatchingModel: + { + intent_recognizer_clear_language_models(m_hreco); + AddPatternMatchingModel(model); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel: + { + intent_recognizer_clear_language_models(m_hreco); + auto cluModel = static_cast(model.get()); + intent_recognizer_add_conversational_language_understanding_model( + m_hreco, + cluModel->languageResourceKey.c_str(), + cluModel->endpoint.c_str(), + cluModel->projectName.c_str(), + cluModel->deploymentName.c_str()); + break; + } + default: + break; + } + } + + /// + /// Adds all intents from the specified Language Understanding Model. + /// For PatternMatchingModel and ConversationalLanguageUnderstandingModel types, this will clear + /// any existing models before enabling it. + /// + /// The language understanding model containing the intents. + /// Once recognized, the IntentRecognitionResult's IntentId property will contain the name of the intent recognized. + void AddAllIntents(std::shared_ptr model) + { + switch (model->GetModelType()) + { + case LanguageUnderstandingModel::LanguageUnderstandingModelType::LanguageUnderstandingModel: + { + auto trigger = IntentTrigger::From(model); + AddIntent(trigger, SPXSTRING_EMPTY); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::PatternMatchingModel: + { + intent_recognizer_clear_language_models(m_hreco); + AddPatternMatchingModel(model); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel: + { + intent_recognizer_clear_language_models(m_hreco); + auto cluModel = static_cast(model.get()); + intent_recognizer_add_conversational_language_understanding_model( + m_hreco, + cluModel->languageResourceKey.c_str(), + cluModel->endpoint.c_str(), + cluModel->projectName.c_str(), + cluModel->deploymentName.c_str()); + break; + } + default: + break; + } + } + + /// + /// Adds all intents from the specified Language Understanding Model. + /// For PatternMatchingModel and ConversationalLanguageUnderstandingModel types, this will clear + /// any existing models before enabling it. + /// + /// The language understanding model containing the intents. + /// A custom string id to be returned in the IntentRecognitionResult's IntentId property. + void AddAllIntents(std::shared_ptr model, const SPXSTRING& intentId) + { + switch (model->GetModelType()) + { + case LanguageUnderstandingModel::LanguageUnderstandingModelType::LanguageUnderstandingModel: + { + auto trigger = IntentTrigger::From(model); + AddIntent(trigger, intentId); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::PatternMatchingModel: + { + intent_recognizer_clear_language_models(m_hreco); + AddPatternMatchingModel(model); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel: + { + intent_recognizer_clear_language_models(m_hreco); + auto cluModel = static_cast(model.get()); + intent_recognizer_add_conversational_language_understanding_model( + m_hreco, + cluModel->languageResourceKey.c_str(), + cluModel->endpoint.c_str(), + cluModel->projectName.c_str(), + cluModel->deploymentName.c_str()); + break; + } + default: + break; + } + } + + /// + /// Adds the IntentTrigger specified. + /// + /// The IntentTrigger corresponding to the intent. + /// A custom string id to be returned in the IntentRecognitionResult's IntentId property. + void AddIntent(std::shared_ptr trigger, const SPXSTRING& intentId) + { + SPX_THROW_ON_FAIL(intent_recognizer_add_intent(m_hreco, Utils::ToUTF8(intentId).c_str(), (SPXTRIGGERHANDLE)(*trigger.get()))); + } + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the recognizer will encounter errors during recognition. + /// + /// A string that represents the authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Takes a collection of language understanding models, makes a copy of them, and applies them to the recognizer. This application + /// happens at different times depending on the language understanding model type. + /// Simple Language Models will become active almost immediately whereas + /// language understanding models utilizing LUIS will become active on the next Speech turn. + /// This replaces any previously applied models. + /// + /// A vector of shared pointers to LanguageUnderstandingModels. + /// True if the application of the models takes effect immediately. Otherwise false. + bool ApplyLanguageModels(const std::vector>& collection) + { + bool result = true; + SPXTRIGGERHANDLE htrigger = SPXHANDLE_INVALID; + + // Clear existing language models. + SPX_THROW_ON_FAIL(intent_recognizer_clear_language_models(m_hreco)); + + // Add the new ones. + for (auto model : collection) + { + switch (model->GetModelType()) + { + case LanguageUnderstandingModel::LanguageUnderstandingModelType::LanguageUnderstandingModel: + SPX_THROW_ON_FAIL(intent_trigger_create_from_language_understanding_model(&htrigger, static_cast(*model), nullptr)); + intent_recognizer_add_intent(m_hreco, nullptr, htrigger); + result = false; + break; + case LanguageUnderstandingModel::LanguageUnderstandingModelType::PatternMatchingModel: + { + AddPatternMatchingModel(model); + break; + } + case LanguageUnderstandingModel::LanguageUnderstandingModelType::ConversationalLanguageUnderstandingModel: + { + auto cluModel = static_cast(model.get()); + intent_recognizer_add_conversational_language_understanding_model( + m_hreco, + cluModel->languageResourceKey.c_str(), + cluModel->endpoint.c_str(), + cluModel->projectName.c_str(), + cluModel->deploymentName.c_str()); + break; + } + default: + break; + } + + } + return result; + } + +private: + void AddPatternMatchingModel(const std::shared_ptr& luModel) const + { + auto model = static_cast(luModel.get()); + std::string modelId = model->GetModelId(); + + Utils::AbiHandle hModel(language_understanding_model__handle_release); + SPX_THROW_ON_FAIL(pattern_matching_model_create(&hModel, m_hreco, modelId.c_str())); + + PATTERN_MATCHING_MODEL_GET_STR_FROM_INDEX vectorGetter = [](void* context, size_t index, const char** phrase, size_t* phraseLen) -> AZACHR + { + try + { + SPX_RETURN_HR_IF(SPXERR_INVALID_ARG, context == nullptr || phrase == nullptr || phraseLen == nullptr); + + auto phrases = static_cast*>(context); + SPX_RETURN_HR_IF(SPXERR_OUT_OF_RANGE, index >= phrases->size()); + + *phrase = phrases->at(index).c_str(); + *phraseLen = phrases->at(index).length(); + return SPX_NOERROR; + } + catch (...) + { + return SPXERR_UNHANDLED_EXCEPTION; + } + }; + + for (const auto& entity : model->Entities) + { + SPX_THROW_ON_FAIL(pattern_matching_model_add_entity( + hModel, + entity.Id.c_str(), + (int)entity.Type, + (int)entity.Mode, + entity.Phrases.size(), + (void*)&entity.Phrases, + vectorGetter)); + } + + for (const auto& intent : model->Intents) + { + SPX_THROW_ON_FAIL(pattern_matching_model_add_intent( + hModel, + intent.Id.c_str(), + 0, // no priority at the moment so set to 0 + intent.Phrases.size(), + (void*)&intent.Phrases, + vectorGetter)); + } + + Utils::AbiHandle hTrigger(intent_trigger_handle_release); + SPX_THROW_ON_FAIL(intent_trigger_create_from_language_understanding_model(&hTrigger, hModel, "")); + + SPX_THROW_ON_FAIL(intent_recognizer_add_intent_with_model_id(m_hreco, hTrigger, modelId.c_str())); + } + + DISABLE_COPY_AND_MOVE(IntentRecognizer); + + friend class Microsoft::CognitiveServices::Speech::Session; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_trigger.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_trigger.h new file mode 100644 index 0000000..b67babd --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_intent_trigger.h @@ -0,0 +1,87 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_intent_trigger.h: Public API declarations for IntentTrigger C++ class +// + +#pragma once +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// Represents an intent trigger. +/// +class IntentTrigger +{ +public: + + /// + /// Creates an intent trigger using the specified phrase. + /// + /// The simple phrase to create an intent trigger for. + /// A shared pointer to an intent trigger. + static std::shared_ptr From(const SPXSTRING& simplePhrase) + { + SPXTRIGGERHANDLE htrigger = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(intent_trigger_create_from_phrase(&htrigger, Utils::ToUTF8(simplePhrase).c_str())); + return std::make_shared(htrigger); + } + + /// + /// Creates an intent trigger using the specified LanguageUnderstandingModel. + /// + /// The LanguageUnderstandingModel to create an intent trigger for. + /// A shared pointer to an intent trigger. + static std::shared_ptr From(std::shared_ptr model) + { + SPXTRIGGERHANDLE htrigger = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(intent_trigger_create_from_language_understanding_model(&htrigger, (SPXLUMODELHANDLE)(*model.get()), nullptr)); + return std::make_shared(htrigger); + } + + /// + /// Creates an intent trigger using the specified LanguageUnderstandingModel and an intent name. + /// + /// The LanguageUnderstandingModel to create an intent trigger for. + /// The intent name to create an intent trigger for. + /// A shared pointer to an intent trigger. + static std::shared_ptr From(std::shared_ptr model, const SPXSTRING& intentName) + { + SPXTRIGGERHANDLE htrigger = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(intent_trigger_create_from_language_understanding_model(&htrigger, (SPXLUMODELHANDLE)(*model.get()), Utils::ToUTF8(intentName).c_str())); + return std::make_shared(htrigger); + } + + /// + /// Virtual destructor + /// + virtual ~IntentTrigger() { intent_trigger_handle_release(m_htrigger); m_htrigger = SPXHANDLE_INVALID; } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Trigger handle. + explicit IntentTrigger(SPXTRIGGERHANDLE htrigger) : m_htrigger(htrigger) { }; + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXTRIGGERHANDLE() { return m_htrigger; } + +private: + DISABLE_DEFAULT_CTORS(IntentTrigger); + + SPXTRIGGERHANDLE m_htrigger; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognition_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognition_eventargs.h new file mode 100644 index 0000000..24411d5 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognition_eventargs.h @@ -0,0 +1,86 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_keyword_recognition_eventargs.h: Public API declarations for KeywordRecognitionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class for the events emmited by the . +/// +class KeywordRecognitionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit KeywordRecognitionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~KeywordRecognitionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(recognizer_event_handle_release(m_hevent)); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Keyword recognition event result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Speech recognition event result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(KeywordRecognitionEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognition_model.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognition_model.h new file mode 100644 index 0000000..afa56cd --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognition_model.h @@ -0,0 +1,101 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_keyword_recognition_model.h: Public API declarations for KeywordRecognitionModel C++ class +// + +#pragma once +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Represents keyword recognition model used with StartKeywordRecognitionAsync methods. +/// +class KeywordRecognitionModel +{ +public: + + /// + /// Creates a keyword recognition model using the specified file. + /// + /// The file name of the keyword recognition model. + /// A shared pointer to keyword recognition model. + static std::shared_ptr FromFile(const SPXSTRING& fileName) + { + SPXKEYWORDHANDLE hkeyword = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(keyword_recognition_model_create_from_file(Utils::ToUTF8(fileName).c_str(), &hkeyword)); + return std::make_shared(hkeyword); + } + + /// + /// Creates a keyword recognition model using the specified embedded speech config. + /// + /// Embedded speech config. + /// A shared pointer to keyword recognition model. + static std::shared_ptr FromConfig(std::shared_ptr embeddedSpeechConfig) + { + SPXKEYWORDHANDLE hkeyword = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(keyword_recognition_model_create_from_config( + Utils::HandleOrInvalid(embeddedSpeechConfig), &hkeyword)); + + return std::make_shared(hkeyword); + } + + /// + /// Creates a keyword recognition model using the specified embedded speech config + /// and user-defined wake words. + /// + /// Embedded speech config. + /// User-defined wake words. + /// A shared pointer to keyword recognition model. + static std::shared_ptr FromConfig( + std::shared_ptr embeddedSpeechConfig, const std::vector& userDefinedWakeWords) + { + SPXKEYWORDHANDLE hkeyword = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(keyword_recognition_model_create_from_config( + Utils::HandleOrInvalid(embeddedSpeechConfig), &hkeyword)); + + for (const SPXSTRING& wakeWord : userDefinedWakeWords) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, wakeWord.empty()); + SPX_THROW_ON_FAIL(keyword_recognition_model_add_user_defined_wake_word( + static_cast(hkeyword), Utils::ToUTF8(wakeWord).c_str())); + } + + return std::make_shared(hkeyword); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Keyword handle. + explicit KeywordRecognitionModel(SPXKEYWORDHANDLE hkeyword = SPXHANDLE_INVALID) : m_hkwmodel(hkeyword) { } + + /// + /// Virtual destructor. + /// + virtual ~KeywordRecognitionModel() { keyword_recognition_model_handle_release(m_hkwmodel); } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXKEYWORDHANDLE() { return m_hkwmodel; } + +private: + + DISABLE_COPY_AND_MOVE(KeywordRecognitionModel); + + SPXKEYWORDHANDLE m_hkwmodel; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognition_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognition_result.h new file mode 100644 index 0000000..ddaa7ee --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognition_result.h @@ -0,0 +1,44 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_keyword_recognition_result.h: Public API declarations for the KeywordRecognitionResult C++ class +// + +#pragma once + +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines the results emitted by the . +/// +class KeywordRecognitionResult : public RecognitionResult +{ +public: + + explicit KeywordRecognitionResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s; reason=0x%x; text=%s", __FUNCTION__, (void*)this, (void*)Handle, Utils::ToUTF8(ResultId).c_str(), Reason, Utils::ToUTF8(Text).c_str()); + } + + virtual ~KeywordRecognitionResult() = default; + +private: + DISABLE_DEFAULT_CTORS(KeywordRecognitionResult); +}; + +inline std::shared_ptr AudioDataStream::FromResult(std::shared_ptr result) +{ + auto resultHandle = result != nullptr ? static_cast(*result) : SPXHANDLE_INVALID; + auto streamHandle = Utils::CallFactoryMethodLeft(audio_data_stream_create_from_keyword_result, resultHandle); + return std::shared_ptr{ new AudioDataStream(streamHandle) }; +} + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognizer.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognizer.h new file mode 100644 index 0000000..3807868 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_keyword_recognizer.h @@ -0,0 +1,213 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_keyword_recognizer.h: Public API declarations for KeywordRecognizer C++ class +// +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +namespace Utils { + template + std::function&)> Callback(U* callee, F f) + { + return [=](const EventSignal& evt) + { + (callee->*f)(evt); + }; + } +} + +/// +/// Recognizer type that is specialized to only handle keyword activation. +/// +/// +/// First, the object needs to be instantiated: +/// +/// auto audioConfig = AudioConfig::FromMicrophoneInput(); // Or an alternative input +/// auto recognizer = KeywordRecognizer::FromConfig(audioConfig); +/// +/// (optional) Then, the events need to be wired in order to receive notifications: +/// +/// recognizer->Recognized += [](const KeywordRecognitionEventArgs& event) +/// { +/// // Your logic here... +/// }; +/// +/// And finally, recognition needs to be started. +/// +/// auto keywordModel = KeywordRecognitionModel::FromFile(modelPath); +/// auto resultFuture = recognizer->RecognizeKeywordOnceAsync(keywordModel); +/// resultFuture.wait(); +/// auto result = resultFuture.get(); +/// +///
    +///
  • +///
  • +///
  • +///
  • +///
+///
+class KeywordRecognizer: public std::enable_shared_from_this +{ +public: + /// + /// Creates a KeywordRecognizer from an . The config is intended + /// to define the audio input to be used by the recognizer object. + /// + /// Defines the audio input to be used by the recognizer. + /// A new KeywordRecognizer that will consume audio from the specified input. + /// + /// If no audio config is provided, it will be equivalent to calling with a config constructed with + /// + /// + inline static std::shared_ptr FromConfig(std::shared_ptr audioConfig = nullptr) + { + auto hreco = Utils::CallFactoryMethodLeft( + ::recognizer_create_keyword_recognizer_from_audio_config, + Utils::HandleOrInvalid(audioConfig)); + return std::shared_ptr(new KeywordRecognizer(hreco)); + } + + /// + /// Destructor. + /// + ~KeywordRecognizer() + { + Canceled.DisconnectAll(); + Recognized.DisconnectAll(); + recognizer_handle_release(m_handle); + } + + /// + /// Starts a keyword recognition session. This session will last until the first keyword is recognized. When this happens, + /// a event will be raised and the session will end. To rearm the keyword, the method needs to be called + /// again after the event is emitted. + /// + /// The that describes the keyword we want to detect. + /// A future that resolves to a that resolves once a keyword is detected. + /// + /// Note that if no keyword is detected in the input, the task will never resolve (unless is called. + /// + inline std::future> RecognizeOnceAsync(std::shared_ptr model) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, model, this]() + { + auto modelHandle = static_cast(*model); + + SPXRESULTHANDLE result = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognize_keyword_once(m_handle, modelHandle, &result)); + + return std::make_shared(result); + }); + return future; + } + + /// + /// Stops a currently active keyword recognition session. + /// + /// A future that resolves when the active session (if any) is stopped. + inline std::future StopRecognitionAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() + { + SPX_THROW_ON_FAIL(recognizer_stop_keyword_recognition(m_handle)); + }); + return future; + } + + /// + /// Signal for events related to the recognition of keywords. + /// + EventSignal Recognized; + + /// + /// Signal for events relating to the cancellation of an interaction. The event indicates if the reason is a direct cancellation or an error. + /// + EventSignal Canceled; + +private: + /*! \cond PROTECTED */ + + static void FireEvent_Recognized(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + KeywordRecognitionEventArgs event{ h_event }; + keep_alive->Recognized.Signal(event); + } + + static void FireEvent_Canceled(SPXRECOHANDLE, SPXEVENTHANDLE h_event, void* pv_context) + { + auto keep_alive = static_cast(pv_context)->shared_from_this(); + SpeechRecognitionCanceledEventArgs event{ h_event }; + keep_alive->Canceled.Signal(event); + } + + void RecognizerEventConnectionChanged(const EventSignal& reco_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&reco_event== &Recognized) + { + ::recognizer_recognized_set_callback(m_handle, Recognized.IsConnected() ? KeywordRecognizer::FireEvent_Recognized : nullptr, this); + } + } + } + + void CanceledEventConnectionChanged(const EventSignal& canceled_event) + { + if (m_handle != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_handle=0x%8p", __FUNCTION__, (void*)m_handle); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_handle), "%s: m_handle is INVALID!!!", __FUNCTION__); + + if (&canceled_event == &Canceled) + { + ::recognizer_canceled_set_callback(m_handle, Canceled.IsConnected() ? KeywordRecognizer::FireEvent_Canceled : nullptr, this); + } + } + } + + inline explicit KeywordRecognizer(SPXRECOHANDLE handle): + Recognized{ Utils::Callback(this, &KeywordRecognizer::RecognizerEventConnectionChanged) }, + Canceled{ Utils::Callback(this, &KeywordRecognizer::CanceledEventConnectionChanged) }, + m_properties{ Utils::CallFactoryMethodRight(recognizer_get_property_bag, handle) }, + m_handle{ handle }, + Properties { m_properties } + { + } + + PropertyCollection m_properties; + SPXRECOHANDLE m_handle; + /*! \endcond */ + +public: + /// + /// A collection of properties and their values defined for this . + /// + const PropertyCollection& Properties; +}; + + +} } } diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_language_understanding_model.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_language_understanding_model.h new file mode 100644 index 0000000..343a6b6 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_language_understanding_model.h @@ -0,0 +1,113 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_language_understanding_model.h: Public API declarations for LanguageUnderstandingModel C++ class +// + +#pragma once +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// Represents language understanding model used for intent recognition. +/// +class LanguageUnderstandingModel +{ +public: + + enum class LanguageUnderstandingModelType + { + PatternMatchingModel, + LanguageUnderstandingModel, + ConversationalLanguageUnderstandingModel + }; + + /// + /// Creates a language understanding (LUIS) model using the specified endpoint url. + /// + /// The endpoint url of a language understanding model. + /// A shared pointer to language understanding model. + static std::shared_ptr FromEndpoint(const SPXSTRING& uri) + { + SPXLUMODELHANDLE hlumodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(language_understanding_model_create_from_uri(&hlumodel, Utils::ToUTF8(uri).c_str())); + return std::make_shared(hlumodel); + } + + /// + /// Creates a language understanding model using the specified app id. + /// + /// A string that represents the application id of Language Understanding service. + /// A shared pointer to language understanding model. + static std::shared_ptr FromAppId(const SPXSTRING& appId) + { + SPXLUMODELHANDLE hlumodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(language_understanding_model_create_from_app_id(&hlumodel, Utils::ToUTF8(appId).c_str())); + return std::make_shared(hlumodel); + } + + /// + /// Creates a language understanding model using the specified hostname, subscription key and application id. + /// + /// A string that represents the subscription key of Language Understanding service. + /// A string that represents the application id of Language Understanding service. + /// A String that represents the region of the Language Understanding service (see the region page). + /// A shared pointer to language understanding model. + static std::shared_ptr FromSubscription(const SPXSTRING& subscriptionKey, const SPXSTRING& appId, const SPXSTRING& region) + { + SPXLUMODELHANDLE hlumodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(language_understanding_model_create_from_subscription(&hlumodel, Utils::ToUTF8(subscriptionKey).c_str(), Utils::ToUTF8(appId).c_str(), Utils::ToUTF8(region).c_str())); + return std::make_shared(hlumodel); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Language understanding model handle. + explicit LanguageUnderstandingModel(SPXLUMODELHANDLE hlumodel = SPXHANDLE_INVALID) : m_type(LanguageUnderstandingModelType::LanguageUnderstandingModel), m_hlumodel(hlumodel) { } + + /// + /// Virtual destructor. + /// + virtual ~LanguageUnderstandingModel() { language_understanding_model__handle_release(m_hlumodel); } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXLUMODELHANDLE() const { return m_hlumodel; } + + /// + /// Returns id for this model. + /// + /// An string representing the id of this model. + virtual SPXSTRING GetModelId() const { return Utils::ToSPXString(language_understanding_model_get_model_id(m_hlumodel)); } + + /// + /// Gets the model type. + /// + /// An enum representing the type of the model. + LanguageUnderstandingModelType GetModelType() const { return m_type; } +protected: + /// + /// Protected constructor for base classes to set type. + /// + /// Language understanding model type. + LanguageUnderstandingModel(LanguageUnderstandingModelType type) : m_type(type), m_hlumodel(SPXHANDLE_INVALID){} + + LanguageUnderstandingModelType m_type; +private: + DISABLE_COPY_AND_MOVE(LanguageUnderstandingModel); + + SPXLUMODELHANDLE m_hlumodel; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_log_level.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_log_level.h new file mode 100644 index 0000000..7da838b --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_log_level.h @@ -0,0 +1,66 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Diagnostics { +namespace Logging { + +/// +/// Defines the different available log levels. +/// +/// +/// This is used by different loggers to set the maximum level of detail they will output. +/// +/// +/// +/// +/// +/// +/// +enum class Level +{ + /// + /// Error logging level. Only errors will be logged. + /// + Error, + + /// + /// Warning logging level. Only errors and warnings will be logged. + /// + Warning, + + /// + /// Informational logging level. Only errors, warnings and informational log messages will be logged. + /// + Info, + + /// + /// Verbose logging level. All log messages will be logged. + /// + Verbose +}; + +/*! \cond INTERNAL */ +namespace Details +{ + inline const char * LevelToString(Level level) + { + switch (level) + { + case Level::Error: return "error"; + case Level::Warning: return "warning"; + case Level::Info: return "info"; + default: + case Level::Verbose: return "verbose"; + } + } +} +/*! \endcond */ + +}}}}} \ No newline at end of file diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting.h new file mode 100644 index 0000000..1111e26 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting.h @@ -0,0 +1,340 @@ + +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_meeting.h: Public API declarations for Meeting C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Class for meeting. +/// +class Meeting : public std::enable_shared_from_this +{ +public: + + static constexpr size_t MAX_MEETING_ID_LEN = 1024; + + /// + /// Create a meeting using a speech config and a meeting id. + /// + /// A shared smart pointer of a speech config object. + /// meeting Id. + /// A shared smart pointer of the created meeting object. + static std::future> CreateMeetingAsync(std::shared_ptr speechConfig, const SPXSTRING& meetingId) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, meetingId.empty()); + auto future = std::async(std::launch::async, [meetingId, speechConfig]() -> std::shared_ptr { + SPXMEETINGHANDLE hmeeting; + SPX_THROW_ON_FAIL(meeting_create_from_config(&hmeeting, (SPXSPEECHCONFIGHANDLE)(*speechConfig), Utils::ToUTF8(meetingId).c_str())); + return std::make_shared(hmeeting); + }); + return future; + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit Meeting(SPXMEETINGHANDLE hmeeting) : + m_hmeeting(hmeeting), + m_properties(hmeeting), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~Meeting() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + ::meeting_release_handle(m_hmeeting); + m_hmeeting = SPXHANDLE_INVALID; + } + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXMEETINGHANDLE () const { return m_hmeeting; } + + /// + /// Get the meeting id. + /// + /// Meeting id. + SPXSTRING GetMeetingId() + { + char id[MAX_MEETING_ID_LEN + 1]; + std::memset(id, 0, MAX_MEETING_ID_LEN); + SPX_THROW_ON_FAIL(meeting_get_meeting_id(m_hmeeting, id, MAX_MEETING_ID_LEN)); + return id; + } + + /// + /// Add a participant to a meeting using the user's id. + /// + /// Note: The returned participant can be used to remove. If the client changes the participant's attributes, + /// the changed attributes are passed on to the service only when the participant is added again. + /// + /// A user id. + /// a shared smart pointer of the participant. + std::future> AddParticipantAsync(const SPXSTRING& userId) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, userId]() -> std::shared_ptr { + const auto participant = Participant::From(userId); + SPX_THROW_ON_FAIL(meeting_update_participant(m_hmeeting, true, (SPXPARTICIPANTHANDLE)(*participant))); + return participant; + }); + return future; + } + + /// + /// Add a participant to a meeting using the User object. + /// + /// A shared smart pointer to a User object. + /// The passed in User object. + std::future> AddParticipantAsync(const std::shared_ptr& user) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, user]() -> std::shared_ptr { + SPX_THROW_ON_FAIL(meeting_update_participant_by_user(m_hmeeting, true, (SPXUSERHANDLE)(*user))); + return user; + }); + return future; + } + + /// + /// Add a participant to a meeting using the participant object + /// + /// A shared smart pointer to a participant object. + /// The passed in participant object. + std::future> AddParticipantAsync(const std::shared_ptr& participant) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, participant]() -> std::shared_ptr { + SPX_THROW_ON_FAIL(meeting_update_participant(m_hmeeting, true, (SPXPARTICIPANTHANDLE)(*participant))); + return participant; + }); + return future; + } + + /// + /// Remove a participant from a meeting using the participant object + /// + /// A shared smart pointer of a participant object. + /// An empty future. + std::future RemoveParticipantAsync(const std::shared_ptr& participant) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, participant]() -> void { + SPX_THROW_ON_FAIL(meeting_update_participant(m_hmeeting, false, (SPXPARTICIPANTHANDLE)(*participant))); + }); + return future; + } + + /// + /// Remove a participant from a meeting using the User object + /// + /// A smart pointer of a User. + /// An empty future. + std::future RemoveParticipantAsync(const std::shared_ptr& user) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, user]() -> void { + SPX_THROW_ON_FAIL(meeting_update_participant_by_user(m_hmeeting, false, SPXUSERHANDLE(*user))); + }); + return future; + } + + /// + /// Remove a participant from a meeting using a user id string. + /// + /// A user id. + /// An empty future. + std::future RemoveParticipantAsync(const SPXSTRING& userId) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, userId]() -> void { + SPX_THROW_ON_FAIL(meeting_update_participant_by_user_id(m_hmeeting, false, Utils::ToUTF8(userId.c_str()))); + }); + return future; + } + + /// + /// Ends the current meeting. + /// + /// An empty future. + std::future EndMeetingAsync() + { + return RunAsync(::meeting_end_meeting); + } + + /// + /// Sets the authorization token that will be used for connecting the server. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Start the meeting. + /// + /// An empty future. + std::future StartMeetingAsync() + { + return RunAsync(::meeting_start_meeting); + } + + /// + /// Deletes the meeting. Any participants that are still part of the meeting + /// will be ejected after this call. + /// + /// An empty future. + std::future DeleteMeetingAsync() + { + return RunAsync(::meeting_delete_meeting); + } + + /// + /// Locks the meeting. After this no new participants will be able to join. + /// + /// An empty future. + std::future LockMeetingAsync() + { + return RunAsync(::meeting_lock_meeting); + } + + /// + /// Unlocks the meeting. + /// + /// An empty future. + std::future UnlockMeetingAsync() + { + return RunAsync(::meeting_unlock_meeting); + } + + /// + /// Mutes all participants except for the host. This prevents others from generating + /// transcriptions, or sending text messages. + /// + /// An empty future. + std::future MuteAllParticipantsAsync() + { + return RunAsync(::meeting_mute_all_participants); + } + + /// + /// Allows other participants to generate transcriptions, or send text messages. + /// + /// An empty future. + std::future UnmuteAllParticipantsAsync() + { + return RunAsync(::meeting_unmute_all_participants); + } + + /// + /// Mutes a particular participant. This will prevent them generating new transcriptions, + /// or sending text messages. + /// + /// The identifier for the participant. + /// An empty future. + std::future MuteParticipantAsync(const SPXSTRING& participantId) + { + return RunAsync([participantId = Utils::ToUTF8(participantId)](auto handle) + { + return ::meeting_mute_participant(handle, participantId.c_str()); + }); + } + + /// + /// Unmutes a particular participant. + /// + /// The identifier for the participant. + /// An empty future. + std::future UnmuteParticipantAsync(const SPXSTRING& participantId) + { + return RunAsync([participantId = Utils::ToUTF8(participantId)](auto handle) + { + return ::meeting_unmute_participant(handle, participantId.c_str()); + }); + } + +private: + + /*! \cond PRIVATE */ + + SPXMEETINGHANDLE m_hmeeting; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXMEETINGHANDLE hmeeting) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + meeting_get_property_bag(hmeeting, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + inline std::future RunAsync(std::function func) + { + auto keepalive = this->shared_from_this(); + return std::async(std::launch::async, [keepalive, this, func]() + { + SPX_THROW_ON_FAIL(func(m_hmeeting)); + }); + } + + /*! \endcond */ + +public: + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + +}; + +}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting_transcriber.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting_transcriber.h new file mode 100644 index 0000000..9eb2612 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting_transcriber.h @@ -0,0 +1,467 @@ + +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_meeting_transcriber.h: Public API declarations for MeetingTranscriber C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +class Session; + +/// +/// Class for meeting transcriber. +/// +class MeetingTranscriber : public Recognizer +{ +public: + + /// + /// Create a meeting transcriber from an audio config. + /// + /// Audio configuration. + /// A smart pointer wrapped meeting transcriber pointer. + static std::shared_ptr FromConfig(std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::recognizer_create_meeting_transcriber_from_config( &hreco, + Utils::HandleOrInvalid(audioInput))); + + return std::make_shared(hreco); + } + + /// + /// Join a meeting. + /// + /// A smart pointer of the meeting to be joined. + /// An empty future. + std::future JoinMeetingAsync(std::shared_ptr meeting) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this, meeting]() -> void { + SPX_THROW_ON_FAIL(::recognizer_join_meeting(Utils::HandleOrInvalid(meeting), m_hreco)); + }); + + return future; + } + + /// + /// Leave a meeting. + /// + /// Note: After leaving a meeting, no transcribing or transcribed events will be sent to end users. End users need to join a meeting to get the events again. + /// + /// An empty future. + std::future LeaveMeetingAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_THROW_ON_FAIL(::recognizer_leave_meeting(m_hreco)); + }); + + return future; + } + + /// + /// Asynchronously starts a meeting transcribing. + /// + /// An empty future. + std::future StartTranscribingAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStartContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async(m_hreco, &m_hasyncStartContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async_wait_for(m_hasyncStartContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStartContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStartContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + /// + /// Asynchronously stops a meeting transcribing. + /// + /// An empty future. + std::future StopTranscribingAsync() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + + SPX_THROW_ON_FAIL(::recognizer_leave_meeting(m_hreco)); + + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStopContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async(m_hreco, &m_hasyncStopContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async_wait_for(m_hasyncStopContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStopContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStopContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit MeetingTranscriber(SPXRECOHANDLE hreco) throw() : + Recognizer(hreco), + SessionStarted(GetSessionEventConnectionsChangedCallback()), + SessionStopped(GetSessionEventConnectionsChangedCallback()), + SpeechStartDetected(GetRecognitionEventConnectionsChangedCallback()), + SpeechEndDetected(GetRecognitionEventConnectionsChangedCallback()), + Transcribing(GetRecoEventConnectionsChangedCallback()), + Transcribed(GetRecoEventConnectionsChangedCallback()), + Canceled(GetRecoCanceledEventConnectionsChangedCallback()), + m_hasyncStartContinuous(SPXHANDLE_INVALID), + m_hasyncStopContinuous(SPXHANDLE_INVALID), + m_properties(hreco), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~MeetingTranscriber() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + /// + /// Signal for events indicating the start of a recognition session (operation). + /// + EventSignal SessionStarted; + + /// + /// Signal for events indicating the end of a recognition session (operation). + /// + EventSignal SessionStopped; + + /// + /// Signal for events indicating the start of speech. + /// + EventSignal SpeechStartDetected; + + /// + /// Signal for events indicating the end of speech. + /// + EventSignal SpeechEndDetected; + + /// + /// Signal for events containing intermediate recognition results. + /// + EventSignal Transcribing; + + /// + /// Signal for events containing final recognition results. + /// (indicating a successful recognition attempt). + /// + EventSignal Transcribed; + + /// + /// Signal for events containing canceled recognition results + /// (indicating a recognition attempt that was canceled as a result or a direct cancellation request + /// or, alternatively, a transport or protocol failure). + /// + EventSignal Canceled; + + /// + /// Sets the authorization token that will be used for connecting the server. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + virtual void TermRecognizer() override + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // Disconnect the event signals in reverse construction order + Canceled.DisconnectAll(); + Transcribed.DisconnectAll(); + Transcribing.DisconnectAll(); + SpeechEndDetected.DisconnectAll(); + SpeechStartDetected.DisconnectAll(); + SessionStopped.DisconnectAll(); + SessionStarted.DisconnectAll(); + + // Close the async handles we have open for Recognize, StartContinuous, and StopContinuous + for (auto handle : { &m_hasyncStartContinuous, &m_hasyncStopContinuous }) + { + if (*handle != SPXHANDLE_INVALID && ::recognizer_async_handle_is_valid(*handle)) + { + ::recognizer_async_handle_release(*handle); + *handle = SPXHANDLE_INVALID; + } + } + + // Ask the base to term + Recognizer::TermRecognizer(); + } + + void RecoEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Transcribing) + { + recognizer_recognizing_set_callback(m_hreco, Transcribing.IsConnected() ? FireEvent_Transcribing : nullptr, this); + } + else if (&recoEvent == &Transcribed) + { + recognizer_recognized_set_callback(m_hreco, Transcribed.IsConnected() ? FireEvent_Transcribed : nullptr, this); + } + } + } + + static void FireEvent_Transcribing(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new MeetingTranscriptionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Transcribing.Signal(*recoEvent.get()); + } + + static void FireEvent_Transcribed(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new MeetingTranscriptionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Transcribed.Signal(*recoEvent.get()); + } + + void RecoCanceledEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Canceled) + { + recognizer_canceled_set_callback(m_hreco, Canceled.IsConnected() ? FireEvent_Canceled : nullptr, this); + } + } + } + + static void FireEvent_Canceled(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + + auto ptr = new MeetingTranscriptionCanceledEventArgs(hevent); + std::shared_ptr recoEvent(ptr); + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Canceled.Signal(*ptr); + } + + void SessionEventConnectionsChanged(const EventSignal& sessionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&sessionEvent == &SessionStarted) + { + recognizer_session_started_set_callback(m_hreco, SessionStarted.IsConnected() ? FireEvent_SessionStarted : nullptr, this); + } + else if (&sessionEvent == &SessionStopped) + { + recognizer_session_stopped_set_callback(m_hreco, SessionStopped.IsConnected() ? FireEvent_SessionStopped : nullptr, this); + } + } + } + + static void FireEvent_SessionStarted(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent{ new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStarted.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SessionStopped(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent{ new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStopped.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + void RecognitionEventConnectionsChanged(const EventSignal& recognitionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recognitionEvent == &SpeechStartDetected) + { + recognizer_speech_start_detected_set_callback(m_hreco, SpeechStartDetected.IsConnected() ? FireEvent_SpeechStartDetected : nullptr, this); + } + else if (&recognitionEvent == &SpeechEndDetected) + { + recognizer_speech_end_detected_set_callback(m_hreco, SpeechEndDetected.IsConnected() ? FireEvent_SpeechEndDetected : nullptr, this); + } + } + } + + static void FireEvent_SpeechStartDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechStartDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SpeechEndDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechEndDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + /*! \endcond */ + +private: + + SPXASYNCHANDLE m_hasyncStartContinuous; + SPXASYNCHANDLE m_hasyncStopContinuous; + + DISABLE_DEFAULT_CTORS(MeetingTranscriber); + friend class Microsoft::CognitiveServices::Speech::Session; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRECOHANDLE hreco) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + recognizer_get_property_bag(hreco, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + inline std::function&)> GetSessionEventConnectionsChangedCallback() + { + return [=](const EventSignal& sessionEvent) { this->SessionEventConnectionsChanged(sessionEvent); }; + } + + inline std::function&)> GetRecoEventConnectionsChangedCallback() + { + return [=](const EventSignal& recoEvent) { this->RecoEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecoCanceledEventConnectionsChangedCallback() + { + return [=](const EventSignal& recoEvent) { this->RecoCanceledEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecognitionEventConnectionsChangedCallback() + { + return [=](const EventSignal& recoEvent) { this->RecognitionEventConnectionsChanged(recoEvent); }; + } + +public: + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting_transcription_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting_transcription_eventargs.h new file mode 100644 index 0000000..4ff1db1 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting_transcription_eventargs.h @@ -0,0 +1,168 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_meeting_transcription_eventargs.h: Public API declarations for MeetingTranscriptionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Class for meeting transcriber event arguments. +/// +class MeetingTranscriptionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit MeetingTranscriptionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~MeetingTranscriptionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(recognizer_event_handle_release(m_hevent)); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Meeting transcriber result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Meeting transcriber result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(MeetingTranscriptionEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + + +/// +/// Class for meeting transcriber canceled event arguments. +/// Added in version 1.5.0. +/// +class MeetingTranscriptionCanceledEventArgs : public MeetingTranscriptionEventArgs +{ +private: + + std::shared_ptr m_cancellation; + CancellationReason m_cancellationReason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit MeetingTranscriptionCanceledEventArgs(SPXEVENTHANDLE hevent) : + MeetingTranscriptionEventArgs(hevent), + m_cancellation(CancellationDetails::FromResult(GetResult())), + m_cancellationReason(m_cancellation->Reason), + m_errorCode(m_cancellation->ErrorCode), + Reason(m_cancellationReason), + ErrorCode(m_errorCode), + ErrorDetails(m_cancellation->ErrorDetails) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + virtual ~MeetingTranscriptionCanceledEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// CancellationDetails. + /// + std::shared_ptr GetCancellationDetails() const { return m_cancellation; } + +private: + + DISABLE_DEFAULT_CTORS(MeetingTranscriptionCanceledEventArgs); +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting_transcription_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting_transcription_result.h new file mode 100644 index 0000000..9b54fb8 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_meeting_transcription_result.h @@ -0,0 +1,96 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_meeting_transcriber_result.h: Public API declarations for MeetingTranscription C++ class +// + +#pragma once +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Represents the result of a meeting transcriber. +/// +class MeetingTranscriptionResult final : public RecognitionResult +{ +public: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Result handle. + explicit MeetingTranscriptionResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult), + UserId(m_userId), + UtteranceId(m_utteranceId) + { + PopulateSpeakerFields(hresult, &m_userId); + PopulateUtteranceFields(hresult, &m_utteranceId); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s; reason=0x%x; text=%s, userid=%s, utteranceid=%s", __FUNCTION__, (void*)this, (void*)Handle, Utils::ToUTF8(ResultId).c_str(), Reason, Utils::ToUTF8(Text).c_str(), Utils::ToUTF8(UserId).c_str(), Utils::ToUTF8(UtteranceId).c_str()); + } + + /// + /// Destructor. + /// + ~MeetingTranscriptionResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)Handle); + } + + /// + /// Unique Speaker id. + /// + const SPXSTRING& UserId; + + /// + /// Unique id that is consistent across all the intermediates and final speech recognition result from one user. + /// + const SPXSTRING& UtteranceId; + +private: + DISABLE_DEFAULT_CTORS(MeetingTranscriptionResult); + + void PopulateSpeakerFields(SPXRESULTHANDLE hresult, SPXSTRING* puserId) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 1024; + char sz[maxCharCount + 1] = {}; + + if (puserId != nullptr && recognizer_result_handle_is_valid(hresult)) + { + SPX_THROW_ON_FAIL(hr = meeting_transcription_result_get_user_id(hresult, sz, maxCharCount)); + *puserId = Utils::ToSPXString(sz); + } + } + + void PopulateUtteranceFields(SPXRESULTHANDLE hresult, SPXSTRING* putteranceId) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 1024; + char sz[maxCharCount + 1] = {}; + + if (putteranceId != nullptr && recognizer_result_handle_is_valid(hresult)) + { + SPX_THROW_ON_FAIL(hr = meeting_transcription_result_get_utterance_id(hresult, sz, maxCharCount)); + *putteranceId = Utils::ToSPXString(sz); + } + } + + SPXSTRING m_userId; + SPXSTRING m_utteranceId; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Transcription diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_memory_logger.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_memory_logger.h new file mode 100644 index 0000000..2e1644f --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_memory_logger.h @@ -0,0 +1,163 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Diagnostics { +namespace Logging { + +/// +/// Class with static methods to control SDK logging into an in-memory buffer. +/// Turning on logging while running your Speech SDK scenario provides +/// detailed information from the SDK's core native components. If you +/// report an issue to Microsoft, you may be asked to provide logs to help +/// Microsoft diagnose the issue. Your application should not take dependency +/// on particular log strings, as they may change from one SDK release to another +/// without notice. +/// MemoryLogger is designed for the case where you want to get access to logs +/// that were taken in the short duration before some unexpected event happens. +/// For example, if you are running a Speech Recognizer, you may want to dump the MemoryLogger +/// after getting an event indicating recognition was canceled due to some error. +/// The size of the memory buffer is fixed at 2MB and cannot be changed. This is +/// a "ring" buffer, that is, new log strings written replace the oldest ones +/// in the buffer. +/// Added in version 1.20.0 +/// +/// Memory logging is a process wide construct. That means that if (for example) +/// you have multiple speech recognizer objects running in parallel, there will be one +/// memory buffer containing interleaved logs from all recognizers. You cannot get a +/// separate logs for each recognizer. +class MemoryLogger +{ +public: + /// + /// Starts logging into the internal memory buffer. + /// + static void Start() + { + diagnostics_log_memory_start_logging(); + } + + /// + /// Stops logging into the internal memory buffer. + /// + static void Stop() + { + diagnostics_log_memory_stop_logging(); + } + + /// + /// Sets or clears filters for memory logging. + /// Once filters are set, memory logger will only be updated with log strings + /// containing at least one of the strings specified by the filters. The match is case sensitive. + /// + /// Optional. Filters to use, or an empty list to remove previously set filters. + static void SetFilters(std::initializer_list filters = {}) + { + std::string collapsedFilters = MemoryLogger::CollapseFilters(filters); + + diagnostics_log_memory_set_filters(collapsedFilters.c_str()); + } + + /// + /// Writes the content of the whole memory buffer to the specified file. + /// It does not block other SDK threads from continuing to log into the buffer. + /// + /// Path to a log file on local disk. + /// This does not reset (clear) the memory buffer. + static void Dump(const SPXSTRING& filePath) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, filePath.empty()); + + SPX_THROW_ON_FAIL(diagnostics_log_memory_dump(Utils::ToUTF8(filePath).c_str(), nullptr, false, false)); + } + + /// + /// Writes the content of the whole memory buffer to an object that implements std::ostream. + /// For example, std::cout (for console output). + /// It does not block other SDK threads from continuing to log into the buffer. + /// + /// std::ostream object to write to. + /// This does not reset (clear) the memory buffer. + static void Dump(std::ostream& outStream) + { + auto start = diagnostics_log_memory_get_line_num_oldest(); + auto stop = diagnostics_log_memory_get_line_num_newest(); + for (auto i = start; + i < stop; + i++) + { + const char* line = diagnostics_log_memory_get_line(i); + if (line) + { + outStream << line; + } + } + } + + /// + /// Returns the content of the whole memory buffer as a vector of strings. + /// It does not block other SDK threads from continuing to log into the buffer. + /// + /// A vector with the contents of the memory buffer copied into it. + /// This does not reset (clear) the memory buffer. + static std::vector Dump() + { + std::vector results; + + auto start = diagnostics_log_memory_get_line_num_oldest(); + auto stop = diagnostics_log_memory_get_line_num_newest(); + for (auto i = start; + i < stop; + i++) + { + const char* line = diagnostics_log_memory_get_line(i); + if (line) + { + results.push_back(line); + } + } + + return results; + } + + /// + /// Sets the level of the messages to be captured by the logger + /// + /// Maximum level of detail to be captured by the logger. + static void SetLevel(Level level) + { + const auto levelStr = Details::LevelToString(level); + diagnostics_set_log_level("memory", levelStr); + } + +private: + static std::string CollapseFilters(std::initializer_list filters) + { + std::string str = ""; + + if (filters.size() > 0) + { + std::ostringstream filtersCollapsed; + std::copy(filters.begin(), filters.end(), std::ostream_iterator(filtersCollapsed, ";")); + str = filtersCollapsed.str(); + } + + return str; + } +}; + +}}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_participant.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_participant.h new file mode 100644 index 0000000..b87ee40 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_participant.h @@ -0,0 +1,222 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_participant.h: Public API declarations for Participant C++ class +// + +#pragma once +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +/// +/// Represents a participant in a conversation. +/// Added in version 1.5.0. +/// +class Participant +{ +private: + SPXPARTICIPANTHANDLE m_hparticipant; + SPXSTRING m_avatar; + SPXSTRING m_id; + SPXSTRING m_displayName; + bool m_isTts; + bool m_isMuted; + bool m_isHost; + +public: + /// + /// Create a participant using user id, her/his preferred language and her/his voice signature. + /// If voice signature is empty then user will not be identified. + /// + /// A user ids. + /// The preferred languages of the user. It can be optional. + /// The voice signature of the user. It can be optional. + /// A smart pointer of Participant + static std::shared_ptr From(const SPXSTRING& userId, const SPXSTRING& preferredLanguage = {}, const SPXSTRING& voiceSignature = {}) + { + SPXPARTICIPANTHANDLE hparticipant = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(participant_create_handle(&hparticipant, Utils::ToUTF8(userId.c_str()), Utils::ToUTF8(preferredLanguage.c_str()), Utils::ToUTF8(voiceSignature.c_str()))); + return std::make_shared(hparticipant); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// participant handle. + explicit Participant(SPXPARTICIPANTHANDLE hparticipant = SPXHANDLE_INVALID) : + m_hparticipant(hparticipant), + m_avatar(), + m_id(), + m_displayName(), + m_isTts(false), + m_isMuted(false), + m_isHost(false), + Id(m_id), + Avatar(m_avatar), + DisplayName(m_displayName), + IsUsingTts(m_isTts), + IsMuted(m_isMuted), + IsHost(m_isHost), + m_properties(hparticipant), + Properties(m_properties) + { + LoadConversationParticipantProperties(hparticipant); + } + + /// + /// Virtual destructor. + /// + virtual ~Participant() { participant_release_handle(m_hparticipant); } + + /// + /// Get the identifier for the participant. + /// + const SPXSTRING& Id; + + /// + /// Gets the colour of the user's avatar as an HTML hex string (e.g. FF0000 for red). + /// + const SPXSTRING& Avatar; + + /// + /// The participant's display name. Please note that each participant within the same conversation must + /// have a different display name. Duplicate names within the same conversation are not allowed. You can + /// use the Id property as another way to refer to each participant. + /// + const SPXSTRING& DisplayName; + + /// + /// Gets whether or not the participant is using Text To Speech (TTS). + /// + const bool& IsUsingTts; + + /// + /// Gets whether or not this participant is muted. + /// + const bool& IsMuted; + + /// + /// Gets whether or not this participant is the host. + /// + const bool& IsHost; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXPARTICIPANTHANDLE() const { return m_hparticipant; } + + /// + /// Set preferred language. + /// + /// The preferred language, such as "en-us". + void SetPreferredLanguage(const std::string& preferredLanguage) + { + SPX_THROW_ON_FAIL(participant_set_preferred_langugage(m_hparticipant, Utils::ToUTF8(preferredLanguage.c_str()))); + } + + /// + /// Set voice signature. + /// If voice signature is empty then user will not be identified. + /// + /// The participant's voice signature." + void SetVoiceSignature(const std::string& voiceSignature) + { + SPX_THROW_ON_FAIL(participant_set_voice_signature(m_hparticipant, Utils::ToUTF8(voiceSignature.c_str()))); + } + +private: + + /*! \cond PRIVATE */ + + DISABLE_COPY_AND_MOVE(Participant); + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXPARTICIPANTHANDLE hparticipant) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + participant_get_property_bag(hparticipant, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + SPXSTRING TryLoadString(SPXEVENTHANDLE hevent, SPXHR(SPXAPI_CALLTYPE * func)(SPXEVENTHANDLE, char*, uint32_t *)) + { + std::unique_ptr psz; + try + { + // query the string length + uint32_t length = 0; + + // don't use SPX_THROW_ON_FAIL since that creates a handle for exceptions that will leak + // since we don't care about them. + SPXHR hr = func(hevent, nullptr, &length); + if (SPX_FAILED(hr) || length == 0) + { + return SPXSTRING{}; + } + + psz = std::unique_ptr(new char[length]); + hr = func(hevent, psz.get(), &length); + if (SPX_FAILED(hr)) + { + return SPXSTRING{}; + } + + return Utils::ToSPXString(psz.get()); + } + catch (...) + { + // ignore errors since not all participants have the properties we need + return SPXSTRING{}; + } + } + + void LoadConversationParticipantProperties(SPXPARTICIPANTHANDLE hParticipant) + { + m_id = TryLoadString(hParticipant, conversation_translator_participant_get_id); + m_avatar = TryLoadString(hParticipant, conversation_translator_participant_get_avatar); + m_displayName = TryLoadString(hParticipant, conversation_translator_participant_get_displayname); + + bool val; + if (SPX_SUCCEEDED(conversation_translator_participant_get_is_using_tts(hParticipant, &val))) + { + m_isTts = val; + } + + if (SPX_SUCCEEDED(conversation_translator_participant_get_is_muted(hParticipant, &val))) + { + m_isMuted = val; + } + + if (SPX_SUCCEEDED(conversation_translator_participant_get_is_host(hParticipant, &val))) + { + m_isHost = val; + } + } + + /*! \endcond */ + +public: + + /// + /// Collection of additional participant properties. + /// + PropertyCollection& Properties; +}; + +}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pattern_matching_entity.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pattern_matching_entity.h new file mode 100644 index 0000000..823518f --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pattern_matching_entity.h @@ -0,0 +1,46 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_pattern_matching_entity.h: Public API declarations for PatternMatchingEntity C++ struct +// + +#pragma once +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// Represents a pattern matching entity used for intent recognition. +/// +struct PatternMatchingEntity +{ + /// + /// An Id used to define this Entity if it is matched. This id must appear in an intent phrase + /// or it will never be matched. + /// + SPXSTRING Id; + + /// + /// The Type of this Entity. + /// + EntityType Type; + + /// + /// The EntityMatchMode of this Entity. + /// + EntityMatchMode Mode; + + /// + /// If the Type is List these phrases will be used as the list. + /// + std::vector Phrases; + +}; + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pattern_matching_intent.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pattern_matching_intent.h new file mode 100644 index 0000000..0670291 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pattern_matching_intent.h @@ -0,0 +1,36 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_pattern_matching_intent.h: Public API declarations for PatternMatchingIntent C++ struct +// + +#pragma once +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + +/// +/// Represents a pattern matching intent used for intent recognition. +/// +struct PatternMatchingIntent +{ + /// + /// Phrases and patterns that will trigger this intent. At least one phrase must exist to be able to + /// apply this intent to an IntentRecognizer. + /// + std::vector Phrases; + + /// + /// An Id used to define this Intent if it is matched. If no Id is specified, then the first phrase in Phrases + /// will be used. + /// + SPXSTRING Id; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pattern_matching_model.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pattern_matching_model.h new file mode 100644 index 0000000..2cb8954 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pattern_matching_model.h @@ -0,0 +1,372 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_pattern_matching_model.h: Public API declarations for PatternMatchingModel C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Intent { + + /// + /// Represents a pattern matching model used for intent recognition. + /// + class PatternMatchingModel : public LanguageUnderstandingModel + { + public: + + /// + /// Creates a pattern matching model using the specified model ID. + /// + /// A string that represents a unique Id for this model. + /// A shared pointer to pattern matching model. + static std::shared_ptr FromModelId(const SPXSTRING& modelId) + { + return std::shared_ptr { + new PatternMatchingModel(modelId) + }; + } + + /// + /// Creates a pattern matching model using the specified .json file. This should follow the Microsoft LUIS JSON export schema. + /// + /// A string that representing the path to a '.json' file. + /// A shared pointer to pattern matching model. + static std::shared_ptr FromJSONFile(const SPXSTRING& filepath) + { + FILE* fp; + int err; +#ifdef _MSC_VER + err = fopen_s(&fp, filepath.c_str(), "r"); +#else + fp = fopen(filepath.c_str(), "r"); + if (fp == NULL) + { + err = -1; + } + else + { + err = 0; + } +#endif + if (err == 0 && fp != NULL) + { + char buffer[1024] = {}; + size_t numread = 0; + std::string fileContents = ""; +#ifdef _MSC_VER + while ((numread = fread_s((void**)&buffer, sizeof(buffer), sizeof(char), sizeof(buffer), fp)) != 0) +#else + while ((numread = fread((void**)&buffer, sizeof(char), sizeof(buffer), fp)) != 0) +#endif + { + fileContents.append(buffer, numread); + } + fclose(fp); + return ParseJSONFile(fileContents); + } + else + { + SPX_TRACE_ERROR("Attempt to read %s failed.", SPXERR_FILE_OPEN_FAILED, filepath.c_str()); + return nullptr; + } + } + + /// + /// Creates a PatternMatchingModel using the specified istream pointing to an .json file in the LUIS json format. + /// This assumes the stream is already open and has permission to read. + /// + /// A stream that representing a '.json' file. + /// A shared pointer to pattern matching model. + static std::shared_ptr FromJSONFileStream(std::istream& iStream) + { + std::istreambuf_iterator iterator{iStream}; + std::string str(iterator, {}); + return ParseJSONFile(str); + } + + /// + /// Returns id for this model. + /// + /// A string representing the id of this model. + SPXSTRING GetModelId() const { return m_modelId; } + + /// + /// This container of Intents is used to define all the Intents this model will look for. + /// + std::vector Intents; + + /// + /// This container of Intents is used to define all the Intents this model will look for. + /// + std::vector Entities; + + private: + DISABLE_COPY_AND_MOVE(PatternMatchingModel); + + PatternMatchingModel(const SPXSTRING& modelId) : LanguageUnderstandingModel(LanguageUnderstandingModelType::PatternMatchingModel), m_modelId(modelId) {} + + SPXSTRING m_modelId; + + static std::shared_ptr ParseJSONFile(const std::string& fileContents) + { + auto model = std::shared_ptr(new PatternMatchingModel("")); + AZAC_HANDLE parserHandle; + auto root = ai_core_json_parser_create(&parserHandle, fileContents.c_str(), fileContents.size()); + if (!ai_core_json_parser_handle_is_valid(parserHandle)) + { + SPX_TRACE_ERROR("Attempt to parse language understanding json file failed.", SPXERR_UNSUPPORTED_FORMAT); + return nullptr; + } + int count = ai_core_json_item_count(parserHandle, root); + for (int i = 0; i < count; i++) + { + auto itemInt = ai_core_json_item_at(parserHandle, root, i, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, itemInt); + size_t nameSize; + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + + size_t valueSize = 0; + auto value = ai_core_json_value_as_string_ptr(parserHandle, itemInt, &valueSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + if (nameStr == "luis_schema_version") + { + // We support any version that we are able to pull data out of. + } + else if (nameStr == "prebuiltEntities") + { + int prebuiltcount = ai_core_json_item_count(parserHandle, itemInt); + for (int j = 0; j < prebuiltcount; j++) + { + ParsePrebuiltEntityJson(parserHandle, model, itemInt, j); + } + } + else if (nameStr == "name") + { + model->m_modelId = std::string(value, valueSize); + } + else if (nameStr == "patternAnyEntities" || nameStr == "entities") + { + int anyCount = ai_core_json_item_count(parserHandle, itemInt); + for (int j = 0; j < anyCount; j++) + { + ParseEntityJson(parserHandle, model, itemInt, j); + } + } + else if (nameStr == "patterns") + { + int patternCount = ai_core_json_item_count(parserHandle, itemInt); + for (int j = 0; j < patternCount; j++) + { + ParsePatternJson(parserHandle, model, itemInt, j); + } + } + else if (nameStr == "closedLists") + { + int listCount = ai_core_json_item_count(parserHandle, itemInt); + for (int j = 0; j < listCount; j++) + { + ParseListEntityJson(parserHandle, model, itemInt, j); + } + } + } + } + return model; + } + + static void ParsePrebuiltEntityJson(AZAC_HANDLE parserHandle, std::shared_ptr model, int itemInt, int index) + { + auto subItemInt = ai_core_json_item_at(parserHandle, itemInt, index, nullptr); + int subItemCount = ai_core_json_item_count(parserHandle, subItemInt); + size_t nameSize = 0; + size_t valueSize = 0; + for (int subItemIndex = 0; subItemIndex < subItemCount; subItemIndex++) + { + auto prebuiltPairInt = ai_core_json_item_at(parserHandle, subItemInt, subItemIndex, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, prebuiltPairInt); + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + auto value = ai_core_json_value_as_string_ptr(parserHandle, prebuiltPairInt, &valueSize); + if (nameStr == "name" && value != nullptr) + { + auto valueStr = std::string(value, valueSize); + if (valueStr == "number") + { + model->Entities.push_back({ "number", EntityType::PrebuiltInteger, EntityMatchMode::Basic, {} }); + } + // ignore any other prebuilt types as they are not supported. + } + } + } + } + + static void ParseEntityJson(AZAC_HANDLE parserHandle, std::shared_ptr model, int itemInt, int index) + { + auto subItemInt = ai_core_json_item_at(parserHandle, itemInt, index, nullptr); + int subItemCount = ai_core_json_item_count(parserHandle, subItemInt); + size_t nameSize = 0; + size_t valueSize = 0; + for (int subItemIndex = 0; subItemIndex < subItemCount; subItemIndex++) + { + auto entityPairInt = ai_core_json_item_at(parserHandle, subItemInt, subItemIndex, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, entityPairInt); + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + auto value = ai_core_json_value_as_string_ptr(parserHandle, entityPairInt, &valueSize); + if (nameStr == "name" && value != nullptr) + { + model->Entities.push_back({ std::string(value, valueSize), EntityType::Any, EntityMatchMode::Basic, {}}); + } + // ignore any other pairs since we only care about the name. + } + } + } + + static void ParseListEntityJson(AZAC_HANDLE parserHandle, std::shared_ptr model, int itemInt, int index) + { + auto subItemInt = ai_core_json_item_at(parserHandle, itemInt, index, nullptr); + int subItemCount = ai_core_json_item_count(parserHandle, subItemInt); + size_t nameSize = 0; + size_t valueSize = 0; + // Default to Strict matching. + PatternMatchingEntity entity{ "", EntityType::List, EntityMatchMode::Strict, {} }; + for (int subItemIndex = 0; subItemIndex < subItemCount; subItemIndex++) + { + auto listPairInt = ai_core_json_item_at(parserHandle, subItemInt, subItemIndex, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, listPairInt); + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + if (nameStr == "name") + { + auto value = ai_core_json_value_as_string_ptr(parserHandle, listPairInt, &valueSize); + if (value != nullptr) + { + entity.Id = std::string(value, valueSize); + } + } + if (nameStr == "subLists") + { + ParseSubList(parserHandle, entity, listPairInt); + } + // ignore any other pairs since we only care about the name. + } + } + model->Entities.push_back(entity); + } + + static void ParseSubList(AZAC_HANDLE parserHandle, PatternMatchingEntity& entity, int listPairInt) + { + size_t nameSize = 0; + size_t valueSize = 0; + auto subListCount = ai_core_json_item_count(parserHandle, listPairInt); + for (int subListIndex = 0; subListIndex < subListCount; subListIndex++) + { + auto subListItemInt = ai_core_json_item_at(parserHandle, listPairInt, subListIndex, nullptr); + auto subListItemCount = ai_core_json_item_count(parserHandle, subListItemInt); + for (int subListItemIndex = 0; subListItemIndex < subListItemCount; subListItemIndex++) + { + auto subListPairInt = ai_core_json_item_at(parserHandle, subListItemInt, subListItemIndex, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, subListPairInt); + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + if (nameStr == "canonicalForm") + { + auto value = ai_core_json_value_as_string_ptr(parserHandle, subListPairInt, &valueSize); + if (value != nullptr) + { + entity.Phrases.push_back(std::string(value, valueSize)); + } + } + else if (nameStr == "list") + { + auto subListSynonymInt = ai_core_json_item_at(parserHandle, subListItemInt, subListItemIndex, nullptr); + auto subListSynonymItemCount = ai_core_json_item_count(parserHandle, subListSynonymInt); + for (int subListSynonymIndex = 0; subListSynonymIndex < subListSynonymItemCount; subListSynonymIndex++) + { + auto subListSynonymEntryInt = ai_core_json_item_at(parserHandle, subListSynonymInt, subListSynonymIndex, nullptr); + auto value = ai_core_json_value_as_string_ptr(parserHandle, subListSynonymEntryInt, &valueSize); + if (value != nullptr) + { + entity.Phrases.push_back(std::string(value, valueSize)); + } + } + } + } + } + } + } + + static void ParsePatternJson(AZAC_HANDLE parserHandle, std::shared_ptr model, int itemInt, int index) + { + auto subItemInt = ai_core_json_item_at(parserHandle, itemInt, index, nullptr); + int subItemCount = ai_core_json_item_count(parserHandle, subItemInt); + size_t nameSize = 0; + size_t valueSize = 0; + std::string patternStr, intentIdStr; + for (int subItemIndex = 0; subItemIndex < subItemCount; subItemIndex++) + { + auto entityPairInt = ai_core_json_item_at(parserHandle, subItemInt, subItemIndex, nullptr); + auto nameInt = ai_core_json_item_name(parserHandle, entityPairInt); + auto name = ai_core_json_value_as_string_ptr(parserHandle, nameInt, &nameSize); + if (name != nullptr) + { + auto nameStr = std::string(name, nameSize); + auto value = ai_core_json_value_as_string_ptr(parserHandle, entityPairInt, &valueSize); + if (value != nullptr) + { + if (nameStr == "pattern") + { + patternStr = std::string(value, valueSize); + } + else if (nameStr == "intent") + { + intentIdStr = std::string(value, valueSize); + } + } + // ignore any other pairs since we only care about the name. + } + } + if (!patternStr.empty() && !intentIdStr.empty()) + { + bool added = false; + for (auto& intent : model->Intents) + { + if (intent.Id == intentIdStr) + { + intent.Phrases.push_back(patternStr); + added = true; + break; + } + } + if (!added) + { + model->Intents.push_back({ {patternStr}, intentIdStr}); + } + } + } + +}; + +} } } } // Microsoft::CognitiveServices::Speech::Intent diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_phrase_list_grammar.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_phrase_list_grammar.h new file mode 100644 index 0000000..dbcece2 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_phrase_list_grammar.h @@ -0,0 +1,92 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_phrase_list_grammar.h: Public API declarations for PhraseListGrammar C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Represents a phrase list grammar for dynamic grammar scenarios. +/// Added in version 1.5.0. +/// +class PhraseListGrammar : public Grammar +{ +public: + + /// + /// Creates a phrase list grammar for the specified recognizer. + /// + /// The recognizer from which to obtain the phrase list grammar. + /// A shared pointer to phrase list grammar. + template + static std::shared_ptr FromRecognizer(std::shared_ptr recognizer) + { + return FromRecognizer(recognizer, Utils::ToSPXString(nullptr)); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Phrase list grammar handle. + explicit PhraseListGrammar(SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID) : Grammar(hgrammar) { } + + /// + /// Adds a simple phrase that may be spoken by the user. + /// + /// The phrase to be added. + void AddPhrase(const SPXSTRING& text) + { + auto phrase = GrammarPhrase::From(text); + SPX_THROW_ON_FAIL(phrase_list_grammar_add_phrase(m_hgrammar.get(), (SPXPHRASEHANDLE)(*phrase.get()))); + } + + /// + /// Clears all phrases from the phrase list grammar. + /// + void Clear() + { + SPX_THROW_ON_FAIL(phrase_list_grammar_clear(m_hgrammar.get())); + } + +protected: + + /// + /// Internal. Creates a phrase list grammar for the specified recognizer, with the specified name. + /// + /// The recognizer from which to obtain the phrase list grammar. + /// The name of the phrase list grammar to create. + /// A shared pointer to phrase list grammar. + template + static std::shared_ptr FromRecognizer(std::shared_ptr recognizer, const SPXSTRING& name) + { + SPXRECOHANDLE hreco = recognizer != nullptr + ? (SPXRECOHANDLE)(*recognizer.get()) + : SPXHANDLE_INVALID; + + SPXGRAMMARHANDLE hgrammar = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(phrase_list_grammar_from_recognizer_by_name(&hgrammar, hreco, Utils::ToUTF8(name.c_str()))); + + return std::make_shared(hgrammar); + } + + +private: + + DISABLE_COPY_AND_MOVE(PhraseListGrammar); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pronunciation_assessment_config.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pronunciation_assessment_config.h new file mode 100644 index 0000000..88ddc99 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pronunciation_assessment_config.h @@ -0,0 +1,222 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include + +#include "speechapi_cxx_properties.h" +#include "speechapi_cxx_string_helpers.h" +#include "speechapi_cxx_utils.h" +#include "speechapi_cxx_common.h" +#include "speechapi_cxx_enums.h" +#include +#include "speechapi_c_pronunciation_assessment_config.h" + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines pronunciation assessment configuration +/// Added in 1.14.0 +/// +class PronunciationAssessmentConfig +{ +public: + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE() const { return m_hconfig; } + + /// + /// Creates an instance of the PronunciationAssessmentConfig + /// For parameter details, see the table + /// [Pronunciation assessment parameters](/azure/cognitive-services/speech-service/rest-speech-to-text-short#pronunciation-assessment-parameters). + /// + /// The reference text + /// The point system for score calibration + /// The evaluation granularity + /// If enables miscue calculation. When true, the pronounced words are compared to the reference text, and are marked with omission/insertion based on the comparison; when false, the recognized text will always be reference text. + /// A shared pointer to the new PronunciationAssessmentConfig instance. + static std::shared_ptr Create(const std::string& referenceText, + PronunciationAssessmentGradingSystem gradingSystem = + PronunciationAssessmentGradingSystem::FivePoint, + PronunciationAssessmentGranularity granularity = + PronunciationAssessmentGranularity::Phoneme, + bool enableMiscue = false) + { + SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL( + create_pronunciation_assessment_config(&hconfig, Utils::ToUTF8(referenceText).c_str(), + static_cast(gradingSystem), + static_cast(granularity), + enableMiscue)); + const auto ptr = new PronunciationAssessmentConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the PronunciationAssessmentConfig + /// For parameters details, see the table + /// [Pronunciation assessment parameters](/azure/cognitive-services/speech-service/rest-speech-to-text-short#pronunciation-assessment-parameters). + /// + /// The reference text + /// The point system for score calibration + /// The evaluation granularity + /// If enables miscue calculation + /// A shared pointer to the new PronunciationAssessmentConfig instance. + static std::shared_ptr Create(const std::wstring& referenceText, + PronunciationAssessmentGradingSystem gradingSystem = + PronunciationAssessmentGradingSystem::FivePoint, + PronunciationAssessmentGranularity granularity = + PronunciationAssessmentGranularity::Phoneme, + bool enableMiscue = false) + { + return Create(Utils::ToUTF8(referenceText), gradingSystem, granularity, enableMiscue); + } + + /// + /// Creates an instance of the PronunciationAssessmentConfig from json. See the table + /// [Pronunciation assessment parameters](/azure/cognitive-services/speech-service/rest-speech-to-text-short#pronunciation-assessment-parameters). + /// + /// The json string containing the pronunciation assessment parameters. + /// A shared pointer to the new PronunciationAssessmentConfig instance. + static std::shared_ptr CreateFromJson(const SPXSTRING& json) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, json.empty()); + SPXAUTODETECTSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(create_pronunciation_assessment_config_from_json(&hconfig, Utils::ToUTF8(json).c_str())); + const auto ptr = new PronunciationAssessmentConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Gets to json string of pronunciation assessment parameters. + /// + /// json string of pronunciation assessment parameters. + SPXSTRING ToJson() const + { + const char* jsonCch = pronunciation_assessment_config_to_json(m_hconfig); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(jsonCch)); + } + + /// + /// Gets the reference text. + /// + /// The reference text. + SPXSTRING GetReferenceText() + { + const char* value = property_bag_get_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_ReferenceText), nullptr, ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + + /// + /// Sets the reference text. + /// + /// The reference text. + void SetReferenceText(const std::string& referenceText) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_ReferenceText), nullptr, referenceText.c_str()); + } + + /// + /// Sets the reference text. + /// + /// The reference text. + void SetReferenceText(const std::wstring& referenceText) + { + SetReferenceText(Utils::ToUTF8(referenceText)); + } + + /// + /// Sets phoneme alphabet. Valid values are: "SAPI" (default) and "IPA". + /// + /// Added in version 1.20.0. + /// The phoneme alphabet. + void SetPhonemeAlphabet(const SPXSTRING& phonemeAlphabet) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_PhonemeAlphabet), nullptr, Utils::ToUTF8(phonemeAlphabet).c_str()); + } + + /// + /// Sets nbest phoneme count in the result. + /// + /// Added in version 1.20.0. + /// The nbest phoneme count. + void SetNBestPhonemeCount(int count) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_NBestPhonemeCount), nullptr, std::to_string(count).c_str()); + } + + /// + /// Enables prosody assessment. + /// + /// Added in version 1.33.0. + void EnableProsodyAssessment() + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_EnableProsodyAssessment), nullptr, "true"); + } + + /// + /// Enables the content assessment with topic. + /// + /// Added in version 1.33.0. + /// The content topic. + void EnableContentAssessmentWithTopic(const SPXSTRING& contentTopic) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::PronunciationAssessment_ContentTopic), nullptr, Utils::ToUTF8(contentTopic).c_str()); + } + + /// + /// Applies the settings in this config to a Recognizer. + /// + /// The target Recognizer. + void ApplyTo(std::shared_ptr recognizer) const + { + SPX_INIT_HR(hr); + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, recognizer == nullptr); + + SPX_THROW_ON_FAIL(hr =::pronunciation_assessment_config_apply_to_recognizer(m_hconfig, recognizer->m_hreco)); + } + + /// + /// Destructs the object. + /// + virtual ~PronunciationAssessmentConfig() + { + pronunciation_assessment_config_release(m_hconfig); + property_bag_release(m_propertybag); + } + +private: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PronunciationAssessmentConfig(SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE hconfig) + :m_hconfig(hconfig) + { + SPX_THROW_ON_FAIL(pronunciation_assessment_config_get_property_bag(hconfig, &m_propertybag)); + } + + /// + /// Internal member variable that holds the config + /// + SPXPRONUNCIATIONASSESSMENTCONFIGHANDLE m_hconfig; + + /// + /// Internal member variable that holds the properties of the speech config + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + DISABLE_COPY_AND_MOVE(PronunciationAssessmentConfig); +}; + +}}} + diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pronunciation_assessment_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pronunciation_assessment_result.h new file mode 100644 index 0000000..aedbcb7 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_pronunciation_assessment_result.h @@ -0,0 +1,142 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_pronunciation_assessment_result.h: Public API declarations for PronunciationAssessmentResult C++ class +// + +#pragma once +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class for content assessment results. +/// +class PronunciationContentAssessmentResult +{ +public: + /// + /// The score indicating the grammar of the given speech. + /// + const double GrammarScore; + + /// + /// The score indicating the vocabulary of the given speech. + /// + const double VocabularyScore; + + /// + /// The score indicating the topic of the given speech. + /// + const double TopicScore; + + /*! \cond INTERNAL */ + + PronunciationContentAssessmentResult(const PropertyCollection& properties) : + GrammarScore(std::stod(properties.GetProperty("ContentAssessment_GrammarScore", "-1"))), + VocabularyScore(std::stod(properties.GetProperty("ContentAssessment_VocabularyScore", "-1"))), + TopicScore(std::stod(properties.GetProperty("ContentAssessment_TopicScore", "-1"))) + { + } + + /*! \endcond */ + +}; + + +/// +/// Class for pronunciation assessment results. +/// +class PronunciationAssessmentResult +{ +public: + + /// + /// Creates a pronunciation assessment result object from recognition result + /// If nullptr is returned, it means the assessment is failed. + /// + /// recognition result + /// A shared pointer to the created PronunciationAssessmentResult instance. + static std::shared_ptr FromResult(std::shared_ptr result) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, result == nullptr); + if (result->Properties.GetProperty("AccuracyScore").empty() && result->Properties.GetProperty("ContentAssessment_GrammarScore").empty()) + { + return nullptr; + } + auto ptr = new PronunciationAssessmentResult(result->Properties); + return std::shared_ptr(ptr); + } + + /// + /// The score indicating the pronunciation accuracy of the given speech, which indicates + /// how closely the phonemes match a native speaker's pronunciation. + /// If this is less 0, it means the pronunciation assessment failed. + /// + const double AccuracyScore; + + /// + /// The overall score indicating the pronunciation quality of the given speech. + /// This is calculated from AccuracyScore, FluencyScore and CompletenessScore with weight. + /// If this is less 0, it means the pronunciation assessment failed. + /// + const double PronunciationScore; + + /// + /// The score indicating the completeness of the given speech by calculating the ratio of pronounced words towards entire input. + /// If this is less 0, it means the pronunciation assessment failed. + /// + const double CompletenessScore; + + /// + /// The score indicating the fluency of the given speech. + /// If this is less 0, it means the pronunciation assessment failed. + /// + const double FluencyScore; + + /// + /// The score indicating the prosody of the given speech. + /// If this is less 0, it means the prosody assessment is not enabled. + /// + const double ProsodyScore; + + /// + /// The content assessment result. Only available when content assessment is enabled. + /// + std::shared_ptr ContentAssessmentResult; + + +protected: + + /*! \cond PROTECTED */ + + + explicit PronunciationAssessmentResult(const PropertyCollection& properties) : + AccuracyScore(std::stod(properties.GetProperty("AccuracyScore", "-1"))), + PronunciationScore(std::stod(properties.GetProperty("PronScore", "-1"))), + CompletenessScore(std::stod(properties.GetProperty("CompletenessScore", "-1"))), + FluencyScore(std::stod(properties.GetProperty("FluencyScore", "-1"))), + ProsodyScore(std::stod(properties.GetProperty("ProsodyScore", "-1"))) + { + if (!properties.GetProperty("ContentAssessment_GrammarScore").empty()) + { + this->ContentAssessmentResult = std::make_shared(properties); + } + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(PronunciationAssessmentResult); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_properties.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_properties.h new file mode 100644 index 0000000..5a8a7b8 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_properties.h @@ -0,0 +1,99 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +class KeywordRecognizer; + +/// +/// Class to retrieve or set a property value from a property collection. +/// +class PropertyCollection +{ +public: + + /// + /// Destructor. + /// + ~PropertyCollection() + { + if (property_bag_is_valid(m_propbag)) + { + property_bag_release(m_propbag); + m_propbag = SPXHANDLE_INVALID; + } + } + + /// + /// Set value of a property. + /// + /// The id of the property. See + /// value to set + void SetProperty(PropertyId propertyID, const SPXSTRING& value) + { + property_bag_set_string(m_propbag, (int)propertyID, NULL, Utils::ToUTF8(value).c_str()); + } + + /// + /// Set value of a property. + /// + /// The name of property. + /// value to set + void SetProperty(const SPXSTRING& propertyName, const SPXSTRING& value) + { + property_bag_set_string(m_propbag, -1, Utils::ToUTF8(propertyName).c_str(), Utils::ToUTF8(value).c_str()); + } + + /// + /// Returns value of a property. + /// If the property value is not defined, the specified default value is returned. + /// + /// The id of the property. See + /// The default value which is returned if no value is defined for the property (empty string by default). + /// value of the property. + SPXSTRING GetProperty(PropertyId propertyID, const SPXSTRING& defaultValue = SPXSTRING()) const + { + const char* propCch = property_bag_get_string(m_propbag, static_cast(propertyID), nullptr, Utils::ToUTF8(defaultValue).c_str()); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(propCch)); + } + + /// + /// Returns value of a property. + /// If the property value is not defined, the specified default value is returned. + /// + /// The name of the property. + /// The default value which is returned if no value is defined for the property (empty string by default). + /// value of the property. + SPXSTRING GetProperty(const SPXSTRING& propertyName, const SPXSTRING& defaultValue = SPXSTRING()) const + { + const char* propCch = property_bag_get_string(m_propbag, -1, Utils::ToUTF8(propertyName).c_str(), Utils::ToUTF8(defaultValue).c_str()); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(propCch)); + } + +protected: + friend class KeywordRecognizer; + + /*! \cond PROTECTED */ + + PropertyCollection(SPXPROPERTYBAGHANDLE propbag) : m_propbag(propbag) {} + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(PropertyCollection); + + SPXPROPERTYBAGHANDLE m_propbag; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_async_recognizer.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_async_recognizer.h new file mode 100644 index 0000000..6a7f2ca --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_async_recognizer.h @@ -0,0 +1,473 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_recognition_async_recognizer.h: Public API declarations for AsyncRecognizer C++ template class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// AsyncRecognizer abstract base class. +/// +template +class AsyncRecognizer : public Recognizer +{ +public: + + /// + /// Performs recognition in a non-blocking (asynchronous) mode. + /// + /// Future containing result value (a shared pointer to RecoResult) + /// of the asynchronous recognition. + /// + virtual std::future> RecognizeOnceAsync() = 0; + + /// + /// Asynchronously initiates continuous recognition operation. + /// + /// An empty future. + virtual std::future StartContinuousRecognitionAsync() = 0; + + /// + /// Asynchronously terminates ongoing continuous recognition operation. + /// + /// An empty future. + virtual std::future StopContinuousRecognitionAsync() = 0; + + /// + /// Asynchronously initiates keyword recognition operation. + /// + /// The keyword recognition model that specifies the keyword to be recognized. + /// An asynchronous operation that starts the keyword recognition. + virtual std::future StartKeywordRecognitionAsync(std::shared_ptr model) = 0; + + /// + /// Asynchronously terminates ongoing keyword recognition operation. + /// + /// An empty future. + virtual std::future StopKeywordRecognitionAsync() = 0; + + /// + /// Signal for events indicating the start of a recognition session (operation). + /// + EventSignal SessionStarted; + + /// + /// Signal for events indicating the end of a recognition session (operation). + /// + EventSignal SessionStopped; + + /// + /// Signal for events indicating the start of speech. + /// + EventSignal SpeechStartDetected; + + /// + /// Signal for events indicating the end of speech. + /// + EventSignal SpeechEndDetected; + + /// + /// Signal for events containing intermediate recognition results. + /// + EventSignal Recognizing; + + /// + /// Signal for events containing final recognition results. + /// (indicating a successful recognition attempt). + /// + EventSignal Recognized; + + /// + /// Signal for events containing canceled recognition results + /// (indicating a recognition attempt that was canceled as a result or a direct cancellation request + /// or, alternatively, a transport or protocol failure). + /// + EventSignal Canceled; + +protected: + + /*! \cond PROTECTED */ + + explicit AsyncRecognizer(SPXRECOHANDLE hreco) throw() : + Recognizer(hreco), + SessionStarted(GetSessionEventConnectionsChangedCallback()), + SessionStopped(GetSessionEventConnectionsChangedCallback()), + SpeechStartDetected(GetRecognitionEventConnectionsChangedCallback()), + SpeechEndDetected(GetRecognitionEventConnectionsChangedCallback()), + Recognizing(GetRecoEventConnectionsChangedCallback()), + Recognized(GetRecoEventConnectionsChangedCallback()), + Canceled(GetRecoCanceledEventConnectionsChangedCallback()), + m_properties(hreco), + m_hasyncRecognize(SPXHANDLE_INVALID), + m_hasyncStartContinuous(SPXHANDLE_INVALID), + m_hasyncStopContinuous(SPXHANDLE_INVALID), + m_hasyncStartKeyword(SPXHANDLE_INVALID), + m_hasyncStopKeyword(SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + }; + + virtual ~AsyncRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + }; + + virtual void TermRecognizer() override + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // Disconnect the event signals in reverse construction order + Canceled.DisconnectAll(); + Recognized.DisconnectAll(); + Recognizing.DisconnectAll(); + SpeechEndDetected.DisconnectAll(); + SpeechStartDetected.DisconnectAll(); + SessionStopped.DisconnectAll(); + SessionStarted.DisconnectAll(); + + // Close the async handles we have open for Recognize, StartContinuous, and StopContinuous + for (auto handle : { &m_hasyncRecognize, &m_hasyncStartContinuous, &m_hasyncStopContinuous }) + { + if (*handle != SPXHANDLE_INVALID && ::recognizer_async_handle_is_valid(*handle)) + { + ::recognizer_async_handle_release(*handle); + *handle = SPXHANDLE_INVALID; + } + } + + // Ask the base to term + Recognizer::TermRecognizer(); + } + + std::future> RecognizeOnceAsyncInternal() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> std::shared_ptr { + SPX_INIT_HR(hr); + + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(hr = recognizer_recognize_once(m_hreco, &hresult)); + + return std::make_shared(hresult); + }); + + return future; + }; + + std::future StartContinuousRecognitionAsyncInternal() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStartContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async(m_hreco, &m_hasyncStartContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_start_continuous_recognition_async_wait_for(m_hasyncStartContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStartContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStartContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + }; + + std::future StopContinuousRecognitionAsyncInternal() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStopContinuous)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async(m_hreco, &m_hasyncStopContinuous)); + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_continuous_recognition_async_wait_for(m_hasyncStopContinuous, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStopContinuous); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStopContinuous = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + } + + std::future StartKeywordRecognitionAsyncInternal(std::shared_ptr model) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, model, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStartKeyword)); // close any unfinished previous attempt + + auto hkeyword = (SPXKEYWORDHANDLE)(*model.get()); + SPX_EXITFN_ON_FAIL(hr = recognizer_start_keyword_recognition_async(m_hreco, hkeyword, &m_hasyncStartKeyword)); + SPX_EXITFN_ON_FAIL(hr = recognizer_start_keyword_recognition_async_wait_for(m_hasyncStartKeyword, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStartKeyword); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStartKeyword = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + }; + + std::future StopKeywordRecognitionAsyncInternal() + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPX_INIT_HR(hr); + SPX_THROW_ON_FAIL(hr = recognizer_async_handle_release(m_hasyncStopKeyword)); // close any unfinished previous attempt + + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_keyword_recognition_async(m_hreco, &m_hasyncStopKeyword)); + SPX_EXITFN_ON_FAIL(hr = recognizer_stop_keyword_recognition_async_wait_for(m_hasyncStopKeyword, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = recognizer_async_handle_release(m_hasyncStopKeyword); + SPX_REPORT_ON_FAIL(releaseHr); + m_hasyncStartKeyword = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(hr); + }); + + return future; + }; + + virtual void RecoEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Recognizing) + { + recognizer_recognizing_set_callback(m_hreco, Recognizing.IsConnected() ? AsyncRecognizer::FireEvent_Recognizing: nullptr, this); + } + else if (&recoEvent == &Recognized) + { + recognizer_recognized_set_callback(m_hreco, Recognized.IsConnected() ? AsyncRecognizer::FireEvent_Recognized: nullptr, this); + } + } + } + + virtual void RecoCanceledEventConnectionsChanged(const EventSignal& recoEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recoEvent == &Canceled) + { + recognizer_canceled_set_callback(m_hreco, Canceled.IsConnected() ? AsyncRecognizer::FireEvent_Canceled : nullptr, this); + } + } + } + + virtual void RecognitionEventConnectionsChanged(const EventSignal& recognitionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&recognitionEvent == &SpeechStartDetected) + { + recognizer_speech_start_detected_set_callback(m_hreco, SpeechStartDetected.IsConnected() ? AsyncRecognizer::FireEvent_SpeechStartDetected : nullptr, this); + } + else if (&recognitionEvent == &SpeechEndDetected) + { + recognizer_speech_end_detected_set_callback(m_hreco, SpeechEndDetected.IsConnected() ? AsyncRecognizer::FireEvent_SpeechEndDetected : nullptr, this); + } + } + } + + virtual void SessionEventConnectionsChanged(const EventSignal& sessionEvent) + { + if (m_hreco != SPXHANDLE_INVALID) + { + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + SPX_DBG_TRACE_VERBOSE_IF(!::recognizer_handle_is_valid(m_hreco), "%s: m_hreco is INVALID!!!", __FUNCTION__); + + if (&sessionEvent == &SessionStarted) + { + recognizer_session_started_set_callback(m_hreco, SessionStarted.IsConnected() ? AsyncRecognizer::FireEvent_SessionStarted: nullptr, this); + } + else if (&sessionEvent == &SessionStopped) + { + recognizer_session_stopped_set_callback(m_hreco, SessionStopped.IsConnected() ? AsyncRecognizer::FireEvent_SessionStopped : nullptr, this); + } + } + } + + static void FireEvent_SessionStarted(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent { new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStarted.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SessionStopped(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr sessionEvent { new SessionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SessionStopped.Signal(*sessionEvent.get()); + + // SessionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SpeechStartDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechStartDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_SpeechEndDetected(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new RecognitionEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SpeechEndDetected.Signal(*recoEvent.get()); + + // RecognitionEventArgs doesn't hold hevent, and thus can't release it properly ... release it here + SPX_DBG_ASSERT(recognizer_event_handle_is_valid(hevent)); + recognizer_event_handle_release(hevent); + } + + static void FireEvent_Recognizing(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent { new RecoEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Recognizing.Signal(*recoEvent.get()); + } + + static void FireEvent_Recognized(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent { new RecoEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Recognized.Signal(*recoEvent.get()); + } + + static void FireEvent_Canceled(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + + auto ptr = new RecoCanceledEventArgs(hevent); + std::shared_ptr recoEvent(ptr); + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Canceled.Signal(*ptr); + } + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRECOHANDLE hreco) : + PropertyCollection( + [=](){ + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + recognizer_get_property_bag(hreco, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + SPXASYNCHANDLE m_hasyncRecognize; + SPXASYNCHANDLE m_hasyncStartContinuous; + SPXASYNCHANDLE m_hasyncStopContinuous; + SPXASYNCHANDLE m_hasyncStartKeyword; + SPXASYNCHANDLE m_hasyncStopKeyword; + + template + static Handle HandleOrInvalid(std::shared_ptr audioInput) + { + return audioInput == nullptr + ? (Handle)SPXHANDLE_INVALID + : (Handle)(*audioInput.get()); + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(AsyncRecognizer); + + inline std::function&)> GetSessionEventConnectionsChangedCallback() + { + return [=](const EventSignal& sessionEvent) { this->SessionEventConnectionsChanged(sessionEvent); }; + } + + inline std::function&)> GetRecoEventConnectionsChangedCallback() + { + return [=](const EventSignal& recoEvent) { this->RecoEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecoCanceledEventConnectionsChangedCallback() + { + return [=](const EventSignal& recoEvent) { this->RecoCanceledEventConnectionsChanged(recoEvent); }; + } + + inline std::function&)> GetRecognitionEventConnectionsChangedCallback() + { + return [=](const EventSignal& recoEvent) { this->RecognitionEventConnectionsChanged(recoEvent); }; + } +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_base_async_recognizer.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_base_async_recognizer.h new file mode 100644 index 0000000..5723e46 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_base_async_recognizer.h @@ -0,0 +1,53 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_recognition_base_async_recognizer.h: Public API declarations for BaseAsyncRecognizer C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// BaseAsyncRecognizer class. +/// +class BaseAsyncRecognizer : public AsyncRecognizer +{ +protected: + + /*! \cond PROTECTED */ + + using BaseType = AsyncRecognizer; + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit BaseAsyncRecognizer(SPXRECOHANDLE hreco) : + BaseType(hreco) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + ~BaseAsyncRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + DISABLE_DEFAULT_CTORS(BaseAsyncRecognizer); + + /*! \endcond */ +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_eventargs.h new file mode 100644 index 0000000..59713c8 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_eventargs.h @@ -0,0 +1,68 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_recognition_eventargs.h: Public API declarations for RecognitionEventArgs C++ base class +// + +#pragma once +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Provides data for the RecognitionEvent. +/// +class RecognitionEventArgs : public SessionEventArgs +{ +public: + + /// + /// Constructor. Creates a new instance using the provided handle. + /// + /// Event handle. + explicit RecognitionEventArgs(SPXEVENTHANDLE hevent) : + SessionEventArgs(hevent), + Offset(m_offset), + m_offset(GetOffset(hevent)) + { + }; + + /// + virtual ~RecognitionEventArgs() {} + + /// + /// The offset of recognition event + /// + const uint64_t& Offset; + +protected: + + /*! \cond PROTECTED */ + + /// + /// Extract offset from given event handle + /// + static uint64_t GetOffset(SPXEVENTHANDLE hevent) + { + uint64_t offset = 0; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_offset(hevent, &offset)); + return offset; + } + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(RecognitionEventArgs); + uint64_t m_offset; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_result.h new file mode 100644 index 0000000..4ff95c1 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognition_result.h @@ -0,0 +1,319 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_recognition_result.h: Public API declarations for RecognitionResult C++ base class and related enum class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Contains detailed information about result of a recognition operation. +/// +class RecognitionResult +{ +private: + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Virtual destructor. + /// + virtual ~RecognitionResult() + { + ::recognizer_result_handle_release(m_hresult); + m_hresult = SPXHANDLE_INVALID; + }; + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Recognition reason. + /// + const Speech::ResultReason& Reason; + + /// + /// Normalized text generated by a speech recognition engine from recognized input. + /// + const SPXSTRING& Text; + + /// + /// Duration of recognized speech in ticks. + /// A single tick represents one hundred nanoseconds or one ten-millionth of a second. + /// + /// Duration of recognized speech in ticks. + uint64_t Duration() const { return m_duration; } + + /// + /// Offset of the recognized speech in ticks. + /// A single tick represents one hundred nanoseconds or one ten-millionth of a second. + /// + /// Offset of the recognized speech in ticks. + uint64_t Offset() const { return m_offset; } + + /// + /// Index of the input audio channel where the speech was recognized. + /// Numbering starts at zero. + /// + /// Channel index. + uint32_t Channel() const { return m_channel; } + + /// + /// Collection of additional RecognitionResult properties. + /// + const PropertyCollection& Properties; + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + +protected: + + /*! \cond PROTECTED */ + + explicit RecognitionResult(SPXRESULTHANDLE hresult) : + m_properties(hresult), + ResultId(m_resultId), + Reason(m_reason), + Text(m_text), + Properties(m_properties), + Handle(m_hresult), + m_hresult(hresult) + { + PopulateResultFields(hresult, &m_resultId, &m_reason, &m_text); + } + + const SPXRESULTHANDLE& Handle; + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(RecognitionResult); + + void PopulateResultFields(SPXRESULTHANDLE hresult, SPXSTRING* resultId, Speech::ResultReason* reason, SPXSTRING* text) + { + + SPX_INIT_HR(hr); + + const size_t maxCharCount = 2048; + char sz[maxCharCount + 1] = {}; + + if (resultId != nullptr) + { + SPX_THROW_ON_FAIL(hr = result_get_result_id(hresult, sz, maxCharCount)); + *resultId = Utils::ToSPXString(sz); + } + + if (reason != nullptr) + { + Result_Reason resultReason; + SPX_THROW_ON_FAIL(hr = result_get_reason(hresult, &resultReason)); + *reason = (Speech::ResultReason)resultReason; + } + + if (text != nullptr) + { + SPX_THROW_ON_FAIL(hr = result_get_text(hresult, sz, maxCharCount)); + *text = Utils::ToSPXString(sz); + } + + SPX_THROW_ON_FAIL(hr = result_get_offset(hresult, &m_offset)); + SPX_THROW_ON_FAIL(hr = result_get_duration(hresult, &m_duration)); + SPX_THROW_ON_FAIL(hr = result_get_channel(hresult, &m_channel)); + } + + SPXRESULTHANDLE m_hresult; + + SPXSTRING m_resultId; + Speech::ResultReason m_reason; + SPXSTRING m_text; + uint64_t m_offset; + uint64_t m_duration; + uint32_t m_channel; +}; + + +/// +/// Contains detailed information about why a result was canceled. +/// +class CancellationDetails +{ +private: + + CancellationReason m_reason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Creates an instance of CancellationDetails object for the canceled RecognitionResult. + /// + /// The result that was canceled. + /// A shared pointer to CancellationDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + // VSTS 1407221 + // SPX_THROW_HR_IF(result->Reason != ResultReason::Canceled, SPXERR_INVALID_ARG); + auto ptr = new CancellationDetails(result.get()); + auto cancellation = std::shared_ptr(ptr); + return cancellation; + } + + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// Added in version 1.1.0. + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +protected: + + /*! \cond PROTECTED */ + + CancellationDetails(RecognitionResult* result) : + m_reason(GetCancellationReason(result)), + m_errorCode(GetCancellationErrorCode(result)), + Reason(m_reason), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::SpeechServiceResponse_JsonErrorDetails)) + { + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(CancellationDetails); + + Speech::CancellationReason GetCancellationReason(RecognitionResult* result) + { + Result_CancellationReason reason = CancellationReason_Error; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_reason_canceled(hresult, &reason)); + + return (Speech::CancellationReason)reason; + } + + Speech::CancellationErrorCode GetCancellationErrorCode(RecognitionResult* result) + { + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_canceled_error_code(hresult, &errorCode)); + + return (Speech::CancellationErrorCode)errorCode; + } + +}; + + +/// +/// Contains detailed information for NoMatch recognition results. +/// +class NoMatchDetails +{ +private: + + NoMatchReason m_reason; + +public: + + /// + /// Creates an instance of NoMatchDetails object for NoMatch RecognitionResults. + /// + /// The recognition result that was not recognized. + /// A shared pointer to NoMatchDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + // VSTS 1407221 + // SPX_THROW_HR_IF(SPXERR_INVALID_ARG, result->Reason != ResultReason::NoMatch); + auto ptr = new NoMatchDetails(result.get()); + auto noMatch = std::shared_ptr(ptr); + return noMatch; + } + + /// + /// The reason the result was not recognized. + /// + const NoMatchReason& Reason; + +protected: + + /*! \cond PROTECTED */ + + NoMatchDetails(RecognitionResult* result) : + m_reason(GetNoMatchReason(result)), + Reason(m_reason) + { + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(NoMatchDetails); + + Speech::NoMatchReason GetNoMatchReason(RecognitionResult* result) + { + Result_NoMatchReason reason = NoMatchReason_NotRecognized; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_no_match_reason(hresult, &reason)); + + return (Speech::NoMatchReason)reason; + } + +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognizer.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognizer.h new file mode 100644 index 0000000..a63e451 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_recognizer.h @@ -0,0 +1,72 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_recognizer.h: Public API declarations for Recognizer C++ base class +// + +#pragma once +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Recognizer base class. +/// +class Recognizer : public std::enable_shared_from_this +{ + friend class Connection; + friend class PronunciationAssessmentConfig; +public: + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXRECOHANDLE() const { return m_hreco; } + +protected: + + /*! \cond PROTECTED */ + + explicit Recognizer(SPXRECOHANDLE hreco) : + m_hreco(hreco) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + } + + virtual ~Recognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + virtual void TermRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + if (m_hreco != SPXHANDLE_INVALID) + { + ::recognizer_handle_release(m_hreco); + m_hreco = SPXHANDLE_INVALID; + SPX_DBG_TRACE_VERBOSE("%s: m_hreco=0x%8p", __FUNCTION__, (void*)m_hreco); + } + } + + SPXRECOHANDLE m_hreco; + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(Recognizer); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_session.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_session.h new file mode 100644 index 0000000..92c6c38 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_session.h @@ -0,0 +1,86 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_session.h: Public API declarations for Session C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/*! \cond PRIVATE */ + +class Session +{ +private: + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXSESSIONHANDLE hsession) : + PropertyCollection( + [=](){ + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + session_get_property_bag(hsession, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + +public: + + template + static std::shared_ptr FromRecognizer(std::shared_ptr recognizer) + { + SPX_INIT_HR(hr); + + SPXSESSIONHANDLE hsession = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(hr = ::session_from_recognizer(recognizer->m_hreco, &hsession)); + + return std::make_shared(hsession); + } + + explicit Session(SPXSESSIONHANDLE hsession) : + m_properties(hsession), + Properties(m_properties), + m_hsession(hsession) + { + SPX_DBG_TRACE_FUNCTION(); + } + + virtual ~Session() + { + SPX_DBG_TRACE_FUNCTION(); + + if (m_hsession != SPXHANDLE_INVALID) + { + ::session_handle_release(m_hsession); + m_hsession = SPXHANDLE_INVALID; + } + } + + PropertyCollection& Properties; + +private: + + DISABLE_COPY_AND_MOVE(Session); + + SPXSESSIONHANDLE m_hsession; +}; + +/*! \endcond */ + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_session_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_session_eventargs.h new file mode 100644 index 0000000..117de20 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_session_eventargs.h @@ -0,0 +1,73 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_session_eventargs.h: Public API declarations for SessionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Base class for session event arguments. +/// +class SessionEventArgs : public EventArgs +{ +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SessionEventArgs(SPXEVENTHANDLE hevent) : + SessionId(m_sessionId), + m_sessionId(GetSessionId(hevent)) + { + }; + + /// + virtual ~SessionEventArgs() {} + + /// + /// Session identifier (a GUID in string format). + /// + const SPXSTRING& SessionId; + + +protected: + + /*! \cond PROTECTED */ + + /// + /// Extract session identifier from given event handle + /// + static const SPXSTRING GetSessionId(SPXEVENTHANDLE hevent) + { + static const auto cchMaxUUID = 36; + static const auto cchMaxSessionId = cchMaxUUID + 1; + char sessionId[cchMaxSessionId] = {}; + + SPX_THROW_ON_FAIL(recognizer_session_event_get_session_id(hevent, sessionId, cchMaxSessionId)); + return Utils::ToSPXString(sessionId); + }; + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(SessionEventArgs); + + SPXSTRING m_sessionId; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_smart_handle.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_smart_handle.h new file mode 100644 index 0000000..d4727b9 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_smart_handle.h @@ -0,0 +1,60 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_smart_handle.h: Public API declarations for SmartHandle class and related typedef +// + +#pragma once +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +typedef SPXHR(SPXAPI_CALLTYPE *SmartHandleCloseFunction)(SPXHANDLE); + + +/// +/// Smart handle class. +/// +template +class SmartHandle +{ +public: + + SmartHandle(T handle = SPXHANDLE_INVALID) : m_handle(handle) { }; + ~SmartHandle() { reset(); } + + explicit operator T&() const { return m_handle; } + + T get() const { return m_handle; } + operator T() const { return m_handle; } + + T* operator &() + { + SPX_THROW_HR_IF(SPXERR_ALREADY_INITIALIZED, !InvalidHandle(m_handle)); + return &m_handle; + } + + void reset() + { + if (!InvalidHandle(m_handle)) + { + closeFunction(m_handle); + m_handle = SPXHANDLE_INVALID; + } + } + +private: + + static bool InvalidHandle(T t) { return t == nullptr || t == SPXHANDLE_INVALID; } + + DISABLE_COPY_AND_MOVE(SmartHandle); + T m_handle; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_source_lang_config.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_source_lang_config.h new file mode 100644 index 0000000..a312525 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_source_lang_config.h @@ -0,0 +1,91 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include +#include +#include + +#include "speechapi_cxx_properties.h" +#include +#include +#include "speechapi_c_common.h" + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines source language configuration, added in 1.8.0 +/// +class SourceLanguageConfig +{ +public: + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXSOURCELANGCONFIGHANDLE() const { return m_hconfig; } + + /// + /// Creates an instance of the SourceLanguageConfig with source language + /// + /// The source language + /// A shared pointer to the new SourceLanguageConfig instance. + static std::shared_ptr FromLanguage(const SPXSTRING& language) + { + SPXSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(source_lang_config_from_language(&hconfig, language.c_str())); + auto ptr = new SourceLanguageConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the SourceLanguageConfig with source language and custom endpoint id. A custom endpoint id corresponds to custom models. + /// + /// The source language + /// The custom endpoint id + /// A shared pointer to the new SourceLanguageConfig instance. + static std::shared_ptr FromLanguage(const SPXSTRING& language, const SPXSTRING& endpointId) + { + SPXSOURCELANGCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(source_lang_config_from_language_and_endpointId(&hconfig, language.c_str(), endpointId.c_str())); + auto ptr = new SourceLanguageConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Destructs the object. + /// + virtual ~SourceLanguageConfig() + { + source_lang_config_release(m_hconfig); + property_bag_release(m_propertybag); + } + +private: + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit SourceLanguageConfig(SPXSOURCELANGCONFIGHANDLE hconfig) + :m_hconfig(hconfig) + { + SPX_THROW_ON_FAIL(source_lang_config_get_property_bag(hconfig, &m_propertybag)); + } + + /// + /// Internal member variable that holds the config + /// + SPXSOURCELANGCONFIGHANDLE m_hconfig; + + /// + /// Internal member variable that holds the properties of the config + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + DISABLE_COPY_AND_MOVE(SourceLanguageConfig); +}; +}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_source_language_recognizer.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_source_language_recognizer.h new file mode 100644 index 0000000..36b851a --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_source_language_recognizer.h @@ -0,0 +1,173 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_source_language_recognizer.h: Public API declarations for SourceLanguageRecognizer C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +class Session; + +/// +/// Class for source language recognizers. +/// You can use this class for standalone language detection. +/// Added in version 1.17.0 +/// +class SourceLanguageRecognizer final : public AsyncRecognizer +{ +public: + + using BaseType = AsyncRecognizer; + + /// + /// Create a source language recognizer from a speech config, auto detection source language config and audio config + /// + /// Speech configuration + /// Auto detection source language config + /// Audio configuration + /// A smart pointer wrapped source language recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_source_language_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit SourceLanguageRecognizer(SPXRECOHANDLE hreco) : BaseType(hreco), Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~SourceLanguageRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + /// + /// Starts speech recognition, and returns after a single utterance is recognized. The end of a + /// single utterance is determined by listening for silence at the end or until a maximum of about 30 + /// seconds of audio is processed. The task returns the recognition text as result. + /// Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single + /// shot recognition like command or query. + /// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead. + /// + /// Future containing result value (a shared pointer to SpeechRecognitionResult) + /// of the asynchronous speech recognition. + /// + std::future> RecognizeOnceAsync() override + { + return BaseType::RecognizeOnceAsyncInternal(); + } + + /// + /// Asynchronously initiates continuous speech recognition operation. + /// + /// An empty future. + std::future StartContinuousRecognitionAsync() override + { + return BaseType::StartContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously terminates ongoing continuous speech recognition operation. + /// + /// An empty future. + std::future StopContinuousRecognitionAsync() override + { + return BaseType::StopContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously initiates keyword recognition operation. + /// + /// Specifies the keyword model to be used. + /// An empty future. + std::future StartKeywordRecognitionAsync(std::shared_ptr model) override + { + return BaseType::StartKeywordRecognitionAsyncInternal(model); + } + + /// + /// Asynchronously terminates keyword recognition operation. + /// + /// An empty future. + std::future StopKeywordRecognitionAsync() override + { + return BaseType::StopKeywordRecognitionAsyncInternal(); + } + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Gets the endpoint ID of a customized speech model that is used for speech recognition. + /// + /// the endpoint ID of a customized speech model that is used for speech recognition + SPXSTRING GetEndpointId() + { + return Properties.GetProperty(PropertyId::SpeechServiceConnection_EndpointId, SPXSTRING()); + } + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the recognizer will encounter errors during recognition. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + +private: + DISABLE_DEFAULT_CTORS(SourceLanguageRecognizer); + friend class Microsoft::CognitiveServices::Speech::Session; +}; +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_identification_model.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_identification_model.h new file mode 100644 index 0000000..786e3e7 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_identification_model.h @@ -0,0 +1,77 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speaker_identification_model.h: Public API declarations for SpeakerIdentificationModel C++ class +// + +#pragma once +#include +#include +#include + +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Represents speaker identification model used with speaker recognition class. +/// Added in version 1.12.0 +/// +class SpeakerIdentificationModel : public std::enable_shared_from_this +{ +public: + + /// + /// Creates a speaker identification model using the voice profiles. + /// + /// a vector of voice profiles. + /// A shared pointer to speaker identification model. + static std::shared_ptr FromProfiles(const std::vector>& profiles) + { + SPXSIMODELHANDLE hsimodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speaker_identification_model_create(&hsimodel)); + for (auto& profile : profiles) + { + SPX_THROW_ON_FAIL(speaker_identification_model_add_profile(hsimodel, (SPXVOICEPROFILEHANDLE)(*profile))); + } + + return std::shared_ptr{ new SpeakerIdentificationModel(hsimodel) }; + } + + /// + /// Virtual destructor. + /// + virtual ~SpeakerIdentificationModel() { speaker_identification_model_release_handle(m_simodel); } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXSIMODELHANDLE() { return m_simodel; } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a speaker identification model using the provided handle. + /// + /// speaker identification handle. + explicit SpeakerIdentificationModel(SPXSIMODELHANDLE hsimodel = SPXHANDLE_INVALID) : m_simodel(hsimodel) { } + + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(SpeakerIdentificationModel); + + SPXSIMODELHANDLE m_simodel; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_recognition_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_recognition_result.h new file mode 100644 index 0000000..d37e643 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_recognition_result.h @@ -0,0 +1,236 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speaker_recognition_result.h: Public API declarations for SpeakerRecognitionResult C++ class +// + +#pragma once +#include +#include +#include + +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Represents speaker recognition result. +/// Added in 1.12.0 +/// +class SpeakerRecognitionResult +{ + +private: + + /// Internal member variable that holds the speakerRecognition result handle. + /// + SPXRESULTHANDLE m_hresult; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [hresult]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties associating to the speaker recognition result. + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Result handle. + explicit SpeakerRecognitionResult(SPXRESULTHANDLE hresult) : + m_hresult(hresult), + m_properties(hresult), + ResultId(m_resultId), + Reason(m_reason), + ProfileId(m_profileId), + Properties(m_properties), + m_profileId(Properties.GetProperty("speakerrecognition.profileid","")), + m_score(std::stof(Properties.GetProperty("speakerrecognition.score", "0.0"))) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + PopulateResultFields(hresult, &m_resultId, &m_reason); + } + + /// + /// Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + /// + /// Destructor. + /// + ~SpeakerRecognitionResult() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + recognizer_result_handle_release(m_hresult); + } + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Reason of the speaker recognition result. + /// + const ResultReason& Reason; + + /// + /// The profile id of the first verified/identified speaker. The rest of recognized speakers can be retrieved by parsing the json result string in the Properties. + /// + const SPXSTRING& ProfileId; + + /// + /// Collection of additional properties. + /// + const PropertyCollection& Properties; + + /// + /// Returns a similarity score. + /// + /// A float number indicating the similarity between input audio and targeted voice profile.This number is between 0 and 1. A higher number means higher similarity.< / returns> + double GetScore() const + { + return m_score; + } + +private: + + /*! \cond PRIVATE */ + + DISABLE_DEFAULT_CTORS(SpeakerRecognitionResult); + + void PopulateResultFields(SPXRESULTHANDLE hresult, SPXSTRING* resultId, Speech::ResultReason* reason) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 2048; + char sz[maxCharCount + 1] = {}; + + if (resultId != nullptr) + { + SPX_THROW_ON_FAIL(hr = result_get_result_id(hresult, sz, maxCharCount)); + *resultId = Utils::ToSPXString(sz); + } + + if (reason != nullptr) + { + Result_Reason resultReason; + SPX_THROW_ON_FAIL(hr = result_get_reason(hresult, &resultReason)); + *reason = (Speech::ResultReason)resultReason; + } + } + + SPXSTRING m_resultId; + ResultReason m_reason; + SPXSTRING m_profileId; + float m_score; + + /*! \endcond */ +}; + +/// +/// Represents the details of a canceled speaker recognition result. +/// +class SpeakerRecognitionCancellationDetails +{ +private: + + CancellationReason m_reason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Creates an instance of SpeakerRecognitionCancellationDetails object for the canceled speaker recognition result. + /// + /// The result that was canceled. + /// A shared pointer to SpeakerRecognitionCancellationDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + return std::shared_ptr { new SpeakerRecognitionCancellationDetails(result.get()) }; + } + + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful speaker recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful speaker recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +protected: + + /*! \cond PROTECTED */ + + SpeakerRecognitionCancellationDetails(SpeakerRecognitionResult* result) : + m_reason(GetCancellationReason(result)), + m_errorCode(GetCancellationErrorCode(result)), + Reason(m_reason), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::SpeechServiceResponse_JsonErrorDetails)) + { + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(SpeakerRecognitionCancellationDetails); + + + CancellationReason GetCancellationReason(SpeakerRecognitionResult* result) + { + Result_CancellationReason reason = CancellationReason_Error; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_reason_canceled(hresult, &reason)); + + return static_cast(reason); + } + + CancellationErrorCode GetCancellationErrorCode(SpeakerRecognitionResult* result) + { + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_canceled_error_code(hresult, &errorCode)); + + return static_cast(errorCode); + } +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_recognizer.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_recognizer.h new file mode 100644 index 0000000..6922573 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_recognizer.h @@ -0,0 +1,142 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speaker_recognizer.h: Public API declarations for speaker recognizer C++ class +// + +#pragma once +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Perform speaker recognition. +/// Added in version 1.12.0 +/// +class SpeakerRecognizer : public std::enable_shared_from_this +{ +public: + + /// + /// Create a speaker recognizer from a speech config and audio config. + /// + /// A shared smart pointer of a speech config. + /// A shared smart pointer of a audio config. + /// A smart pointer wrapped speaker recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::shared_ptr audioInput) + { + SPXSPEAKERIDHANDLE hSpeakerRecognizerHandle; + SPX_THROW_ON_FAIL(::recognizer_create_speaker_recognizer_from_config( + &hSpeakerRecognizerHandle, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(audioInput))); + return std::shared_ptr{ new SpeakerRecognizer(hSpeakerRecognizerHandle) }; + } + + /// + /// Destructor. + /// + virtual ~SpeakerRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + ::speaker_recognizer_release_handle(m_hSpeakerRecognizer); + m_hSpeakerRecognizer = SPXHANDLE_INVALID; + } + + /// + /// Verify the speaker in the verification model. + /// + /// A shared smart pointer of a speaker verficiation model. + /// A smart pointer wrapped speaker recognition result future. + std::future> RecognizeOnceAsync(std::shared_ptr model) + { + return RunAsync(speaker_recognizer_verify, model); + } + + /// + /// Identify the speakers in the Speaker Identification Model. + /// + /// A shared smart pointer of a speaker identification model. + /// A smart pointer wrapped speaker recognition result future. + std::future> RecognizeOnceAsync(std::shared_ptr model) + { + return RunAsync(speaker_recognizer_identify, model); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// A Speaker Recognizer handle. + explicit SpeakerRecognizer(SPXSPEAKERIDHANDLE hSpeakerRecognizer) : + m_hSpeakerRecognizer(hSpeakerRecognizer), + m_properties(hSpeakerRecognizer), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /*! \endcond */ + +private: + + /*! \cond PRIVATE */ + + SPXSPEAKERIDHANDLE m_hSpeakerRecognizer; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXSPEAKERIDHANDLE hSpeakerRecognizer) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + speaker_recognizer_get_property_bag(hSpeakerRecognizer, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + template < class SpeakerModelPtrType, class SpeakerModelHandleType> + inline std::future> RunAsync(std::function func, std::shared_ptr model) + { + auto keepalive = this->shared_from_this(); + return std::async(std::launch::async, [keepalive, this, func, model]() + { + SPXRESULTHANDLE hResultHandle = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(func(m_hSpeakerRecognizer, (SpeakerModelHandleType)(*model), &hResultHandle)); + return std::shared_ptr { new SpeakerRecognitionResult{ hResultHandle } }; + }); + } + + /*! \endcond */ + +public: + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_verification_model.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_verification_model.h new file mode 100644 index 0000000..14ffd59 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speaker_verification_model.h @@ -0,0 +1,71 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speaker_verification_model.h: Public API declarations for SpeakerVerificationModel C++ class +// + +#pragma once +#include +#include + +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Represents speaker verification model used with speaker recognition class. +/// Added in version 1.12.0 +/// +class SpeakerVerificationModel : public std::enable_shared_from_this +{ +public: + + /// + /// Creates a speaker verification model using the voice profile. + /// + /// The voice profile. + /// A shared pointer to speaker verification model. + static std::shared_ptr FromProfile(std::shared_ptr profile) + { + SPXSVMODELHANDLE hsvmodel = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speaker_verification_model_create(&hsvmodel, (SPXVOICEPROFILEHANDLE)(*profile))); + return std::shared_ptr{ new SpeakerVerificationModel(hsvmodel) }; + } + + /// + /// Virtual destructor. + /// + virtual ~SpeakerVerificationModel() { speaker_verification_model_release_handle(m_svmodel); } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXSVMODELHANDLE() { return m_svmodel; } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance of speaker verification model using the provided handle. + /// + /// speaker verification model handle. + explicit SpeakerVerificationModel(SPXSIMODELHANDLE hsvmodel = SPXHANDLE_INVALID) : m_svmodel(hsvmodel) { } + + /*! \endcond */ + +private: + + DISABLE_COPY_AND_MOVE(SpeakerVerificationModel); + + SPXSVMODELHANDLE m_svmodel; +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_config.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_config.h new file mode 100644 index 0000000..b2f9db7 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_config.h @@ -0,0 +1,491 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_config.h: Public API declarations for SpeechConfig C++ class +// +#pragma once + +#include + +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +namespace Dialog { class DialogServiceConfig; } +class EmbeddedSpeechConfig; +class HybridSpeechConfig; + +/// +/// Class that defines configurations for speech / intent recognition, or speech synthesis. +/// +class SpeechConfig +{ +public: + friend Dialog::DialogServiceConfig; + friend EmbeddedSpeechConfig; + friend HybridSpeechConfig; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXSPEECHCONFIGHANDLE() const { return m_hconfig; } + + /// + /// Creates an instance of the speech config with specified subscription key and region. + /// + /// The subscription key. + /// The region name (see the region page). + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromSubscription(const SPXSTRING& subscription, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_subscription(&hconfig, Utils::ToUTF8(subscription).c_str(), Utils::ToUTF8(region).c_str())); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the speech config with specified authorization token and region. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// As configuration values are copied when creating a new recognizer, the new token value will not apply to recognizers that have already been created. + /// For recognizers that have been created before, you need to set authorization token of the corresponding recognizer + /// to refresh the token. Otherwise, the recognizers will encounter errors during recognition. + /// + /// The authorization token. + /// The region name (see the region page). + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromAuthorizationToken(const SPXSTRING& authToken, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_authorization_token(&hconfig, Utils::ToUTF8(authToken).c_str(), Utils::ToUTF8(region).c_str())); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the speech config with specified endpoint and subscription. + /// This method is intended only for users who use a non-standard service endpoint. + /// Note: The query parameters specified in the endpoint URI are not changed, even if they are set by any other APIs. + /// For example, if the recognition language is defined in URI as query parameter "language=de-DE", and also set by SetSpeechRecognitionLanguage("en-US"), + /// the language setting in URI takes precedence, and the effective language is "de-DE". + /// Only the parameters that are not specified in the endpoint URI can be set by other APIs. + /// Note: To use an authorization token with FromEndpoint, use FromEndpoint(const SPXSTRING&), + /// and then call SetAuthorizationToken() on the created SpeechConfig instance. + /// + /// The service endpoint to connect to. + /// The subscription key. + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromEndpoint(const SPXSTRING& endpoint, const SPXSTRING& subscription) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_endpoint(&hconfig, Utils::ToUTF8(endpoint).c_str(), Utils::ToUTF8(subscription).c_str())); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of SpeechConfig with specified endpoint. + /// This method is intended only for users who use a non-standard service endpoint. + /// Note: The query parameters specified in the endpoint URI are not changed, even if they are set by any other APIs. + /// Whether a specific query parameter is supported or not, depends on the endpoint and scenario. + /// For example, if the recognition language is defined in URI as query parameter "language=de-DE", and also set by SetSpeechRecognitionLanguage("en-US"), + /// the language setting in URI takes precedence, and the effective language is "de-DE". + /// The example only applies when the endpoint and scenario combination supports language as a query parameter. + /// Only the parameters that are not specified in the endpoint URI can be set by other APIs. + /// Note: If the endpoint requires a subscription key for authentication, use FromEndpoint(const SPXSTRING&, const SPXSTRING&) to pass + /// the subscription key as parameter. + /// To use an authorization token with FromEndpoint, use this method to create a SpeechConfig instance, and then + /// call SetAuthorizationToken() on the created SpeechConfig instance. + /// Note: Added in version 1.5.0. + /// + /// The service endpoint URI to connect to. + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromEndpoint(const SPXSTRING& endpoint) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_endpoint(&hconfig, Utils::ToUTF8(endpoint).c_str(), nullptr)); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of the speech config with specified host and subscription. + /// This method is intended only for users who use a non-default service host. Standard resource path will be assumed. + /// For services with a non-standard resource path or no path at all, use FromEndpoint instead. + /// Note: Query parameters are not allowed in the host URI and must be set by other APIs. + /// Note: To use an authorization token with FromHost, use FromHost(const SPXSTRING&), + /// and then call SetAuthorizationToken() on the created SpeechConfig instance. + /// Note: Added in version 1.8.0. + /// + /// The service host to connect to. Format is "protocol://host:port" where ":port" is optional. + /// The subscription key. + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromHost(const SPXSTRING& host, const SPXSTRING& subscription) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_host(&hconfig, Utils::ToUTF8(host).c_str(), Utils::ToUTF8(subscription).c_str())); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Creates an instance of SpeechConfig with specified host. + /// This method is intended only for users who use a non-default service host. Standard resource path will be assumed. + /// For services with a non-standard resource path or no path at all, use FromEndpoint instead. + /// Note: Query parameters are not allowed in the host URI and must be set by other APIs. + /// Note: If the host requires a subscription key for authentication, use FromHost(const SPXSTRING&, const SPXSTRING&) to pass + /// the subscription key as parameter. + /// To use an authorization token with FromHost, use this method to create a SpeechConfig instance, and then + /// call SetAuthorizationToken() on the created SpeechConfig instance. + /// Note: Added in version 1.8.0. + /// + /// The service host URI to connect to. Format is "protocol://host:port" where ":port" is optional. + /// A shared pointer to the new speech config instance. + static std::shared_ptr FromHost(const SPXSTRING& host) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_config_from_host(&hconfig, Utils::ToUTF8(host).c_str(), nullptr)); + + auto ptr = new SpeechConfig(hconfig); + return std::shared_ptr(ptr); + } + + /// + /// Set the input language to the speech recognizer. + /// + /// Specifies the name of spoken language to be recognized in BCP-47 format. + void SetSpeechRecognitionLanguage(const SPXSTRING& lang) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_RecoLanguage), nullptr, Utils::ToUTF8(lang).c_str()); + } + + /// + /// Gets the input language to the speech recognition. + /// The language is specified in BCP-47 format. + /// + /// The speech recognition language. + SPXSTRING GetSpeechRecognitionLanguage() const + { + return GetProperty(PropertyId::SpeechServiceConnection_RecoLanguage); + } + + /// + /// Sets the language of the speech synthesizer. + /// Added in version 1.4.0 + /// + /// Specifies the name of language (e.g. en-US) + void SetSpeechSynthesisLanguage(const SPXSTRING& lang) + { + SPX_THROW_ON_FAIL(property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_SynthLanguage), nullptr, Utils::ToUTF8(lang).c_str())); + } + + /// + /// Gets the language of the speech synthesizer. + /// Added in version 1.4.0 + /// + /// The speech synthesis language. + SPXSTRING GetSpeechSynthesisLanguage() const + { + return GetProperty(PropertyId::SpeechServiceConnection_SynthLanguage); + } + + /// + /// Set the voice of the speech synthesizer. + /// Added in version 1.4.0 + /// + /// Specifies the name of voice + void SetSpeechSynthesisVoiceName(const SPXSTRING& voiceName) + { + SPX_THROW_ON_FAIL(property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_SynthVoice), nullptr, Utils::ToUTF8(voiceName).c_str())); + } + + /// + /// Gets the voice of the speech synthesizer. + /// Added in version 1.4.0 + /// + /// The speech synthesis voice name. + SPXSTRING GetSpeechSynthesisVoiceName() const + { + return GetProperty(PropertyId::SpeechServiceConnection_SynthVoice); + } + + /// + /// Sets the speech synthesis output format (e.g. Riff16Khz16BitMonoPcm). + /// Added in version 1.4.0 + /// + /// Specifies the output format ID + void SetSpeechSynthesisOutputFormat(SpeechSynthesisOutputFormat formatId) + { + SPX_THROW_ON_FAIL(speech_config_set_audio_output_format(m_hconfig, static_cast(formatId))); + } + + /// + /// Gets the speech synthesis output format. + /// Added in version 1.4.0 + /// + /// The speech synthesis output format. + SPXSTRING GetSpeechSynthesisOutputFormat() const + { + return GetProperty(PropertyId::SpeechServiceConnection_SynthOutputFormat); + } + + /// + /// Sets the endpoint ID of Custom Speech or Custom Voice. + /// + /// Endpoint ID. + void SetEndpointId(const SPXSTRING& endpointId) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_EndpointId), nullptr, Utils::ToUTF8(endpointId).c_str()); + } + + /// + /// Gets the endpoint ID of Custom Speech or Custom Voice. + /// + /// Endpoint ID. + SPXSTRING GetEndpointId() const + { + return GetProperty(PropertyId::SpeechServiceConnection_EndpointId); + } + + /// + /// Sets the authorization token to connect to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// As configuration values are copied when creating a new recognizer, the new token value will not apply to recognizers that have already been created. + /// For recognizers that have been created before, you need to set authorization token of the corresponding recognizer + /// to refresh the token. Otherwise, the recognizers will encounter errors during recognition. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceAuthorization_Token), nullptr, Utils::ToUTF8(token).c_str()); + } + + /// + /// Gets the authorization token to connect to the service. + /// + /// The authorization token. + SPXSTRING GetAuthorizationToken() const + { + return GetProperty(PropertyId::SpeechServiceAuthorization_Token); + } + + /// + /// Gets the subscription key that is used to create Speech Recognizer or Intent Recognizer or Translation Recognizer or Speech Synthesizer. + /// + /// The subscription key. + SPXSTRING GetSubscriptionKey() const + { + return GetProperty(PropertyId::SpeechServiceConnection_Key); + } + + /// + /// Gets the region key that used to create Speech Recognizer or Intent Recognizer or Translation Recognizer or speech Synthesizer. + /// + /// Region. + SPXSTRING GetRegion() const + { + return GetProperty(PropertyId::SpeechServiceConnection_Region); + } + + /// + /// Gets speech recognition output format (simple or detailed). + /// Note: This output format is for speech recognition result, use to get synthesized audio output format. + /// + /// Speech recognition output format. + OutputFormat GetOutputFormat() const + { + auto result = GetProperty(PropertyId::SpeechServiceResponse_RequestDetailedResultTrueFalse); + return result == Utils::ToSPXString(TrueString) ? OutputFormat::Detailed : OutputFormat::Simple; + } + + /// + /// Sets speech recognition output format (simple or detailed). + /// Note: This output format is for speech recognition result, use to set synthesized audio output format. + /// + /// Speech recognition output format + void SetOutputFormat(OutputFormat format) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceResponse_RequestDetailedResultTrueFalse), nullptr, + format == OutputFormat::Detailed ? Utils::ToUTF8(TrueString) : Utils::ToUTF8(FalseString)); + } + + /// + /// Sets profanity option. + /// Added in version 1.5.0. + /// + /// Profanity option value. + void SetProfanity(ProfanityOption profanity) + { + SPX_THROW_ON_FAIL(speech_config_set_profanity(m_hconfig, (SpeechConfig_ProfanityOption)profanity)); + } + + /// + /// Enables audio logging in service. + /// Added in version 1.5.0. + /// + /// + /// Audio and content logs are stored either in Microsoft-owned storage, or in your own storage account linked + /// to your Cognitive Services subscription (Bring Your Own Storage (BYOS) enabled Speech resource). + /// + void EnableAudioLogging() + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_EnableAudioLogging), nullptr, TrueString); + } + + /// + /// Includes word-level timestamps in response result. + /// Added in version 1.5.0. + /// + void RequestWordLevelTimestamps() + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceResponse_RequestWordLevelTimestamps), nullptr, TrueString); + } + + /// + /// Enables dictation mode. Only supported in speech continuous recognition. + /// Added in version 1.5.0. + /// + void EnableDictation() + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_RecoMode), nullptr, "DICTATION"); + } + + /// + /// Sets proxy configuration + /// Added in version 1.1.0 + /// + /// Note: Proxy functionality is not available on macOS. This function will have no effect on this platform. + /// + /// The host name of the proxy server, without the protocol scheme (`http://`) + /// The port number of the proxy server + /// The user name of the proxy server + /// The password of the proxy server + void SetProxy(const SPXSTRING& proxyHostName, uint32_t proxyPort, const SPXSTRING& proxyUserName = SPXSTRING(), const SPXSTRING& proxyPassword = SPXSTRING()) + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, proxyHostName.empty()); + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, proxyPort == 0); + + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_ProxyHostName), nullptr, + Utils::ToUTF8(proxyHostName).c_str()); + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_ProxyPort), nullptr, + std::to_string(proxyPort).c_str()); + if (!proxyUserName.empty()) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_ProxyUserName), nullptr, + Utils::ToUTF8(proxyUserName).c_str()); + } + if (!proxyPassword.empty()) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_ProxyPassword), nullptr, + Utils::ToUTF8(proxyPassword).c_str()); + } + } + + /// + /// Sets a property value by name. + /// + /// The property name. + /// The property value. + void SetProperty(const SPXSTRING& name, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), Utils::ToUTF8(value).c_str()); + } + + /// + /// Gets a property value by name. + /// + /// The parameter name. + /// The property value. + SPXSTRING GetProperty(const SPXSTRING& name) const + { + const char* value = property_bag_get_string(m_propertybag, -1, Utils::ToUTF8(name).c_str(), ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + + /// + /// Gets a property value by ID. + /// + /// The parameter id. + /// The property value. + SPXSTRING GetProperty(PropertyId id) const + { + const char* value = property_bag_get_string(m_propertybag, static_cast(id), nullptr, ""); + return Utils::ToSPXString(Utils::CopyAndFreePropertyString(value)); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, static_cast(id), nullptr, Utils::ToUTF8(value).c_str()); + } + + /// + /// Sets a property value that will be passed to service using the specified channel. + /// Added in version 1.5.0. + /// + /// The property name. + /// The property value. + /// The channel used to pass the specified property to service. + void SetServiceProperty(const SPXSTRING& name, const SPXSTRING& value, ServicePropertyChannel channel) + { + SPX_THROW_ON_FAIL(speech_config_set_service_property(m_hconfig, Utils::ToUTF8(name).c_str(), Utils::ToUTF8(value).c_str(), (SpeechConfig_ServicePropertyChannel)channel)); + } + + /// + /// Destructs the object. + /// + virtual ~SpeechConfig() + { + speech_config_release(m_hconfig); + property_bag_release(m_propertybag); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit SpeechConfig(SPXSPEECHCONFIGHANDLE hconfig) + :m_hconfig(hconfig) + { + SPX_THROW_ON_FAIL(speech_config_get_property_bag(hconfig, &m_propertybag)); + } + + /// + /// Internal member variable that holds the speech config + /// + SPXSPEECHCONFIGHANDLE m_hconfig; + + /// + /// Internal member variable that holds the properties of the speech config + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + /*! \endcond */ + +private: + DISABLE_COPY_AND_MOVE(SpeechConfig); + + }; + +}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognition_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognition_eventargs.h new file mode 100644 index 0000000..f9106a6 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognition_eventargs.h @@ -0,0 +1,169 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_recognition_eventargs.h: Public API declarations for SpeechRecognitionEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Class for speech recognition event arguments. +/// +class SpeechRecognitionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechRecognitionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~SpeechRecognitionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(recognizer_event_handle_release(m_hevent)); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Speech recognition event result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Speech recognition event result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(SpeechRecognitionEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + + +/// +/// Class for speech recognition canceled event arguments. +/// +class SpeechRecognitionCanceledEventArgs final : public SpeechRecognitionEventArgs +{ +private: + + std::shared_ptr m_cancellation; + CancellationReason m_cancellationReason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechRecognitionCanceledEventArgs(SPXEVENTHANDLE hevent) : + SpeechRecognitionEventArgs(hevent), + m_cancellation(CancellationDetails::FromResult(GetResult())), + m_cancellationReason(m_cancellation->Reason), + m_errorCode(m_cancellation->ErrorCode), + Reason(m_cancellationReason), + ErrorCode(m_errorCode), + ErrorDetails(m_cancellation->ErrorDetails) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + virtual ~SpeechRecognitionCanceledEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// Added in version 1.1.0. + /// + const CancellationErrorCode& ErrorCode; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// CancellationDetails. + /// + std::shared_ptr GetCancellationDetails() const { return m_cancellation; } + +private: + + DISABLE_DEFAULT_CTORS(SpeechRecognitionCanceledEventArgs); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognition_model.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognition_model.h new file mode 100644 index 0000000..7f7bb70 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognition_model.h @@ -0,0 +1,108 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_recognition_model.h: Public API declarations for SpeechRecognitionModel C++ class +// + +#pragma once +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Speech recognition model information. +/// +class SpeechRecognitionModel +{ +private: + + /// + /// Internal member variable that holds the model handle. + /// + SPXSPEECHRECOMODELHANDLE m_hmodel; + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Model handle. + explicit SpeechRecognitionModel(SPXSPEECHRECOMODELHANDLE hmodel) : + m_hmodel(hmodel), + Name(m_name), + Locales(m_locales), + Path(m_path), + Version(m_version) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + m_name = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_recognition_model_get_name(m_hmodel))); + m_locales = Utils::Split(Utils::CopyAndFreePropertyString(speech_recognition_model_get_locales(m_hmodel)), '|'); + m_path = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_recognition_model_get_path(m_hmodel))); + m_version = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_recognition_model_get_version(m_hmodel))); + } + + /// + /// Explicit conversion operator. + /// + /// Model handle. + explicit operator SPXSPEECHRECOMODELHANDLE() { return m_hmodel; } + + /// + /// Destructor. + /// + ~SpeechRecognitionModel() + { + speech_recognition_model_handle_release(m_hmodel); + } + + /// + /// Model name. + /// + const SPXSTRING& Name; + + /// + /// Locales of the model in BCP-47 format. + /// + const std::vector& Locales; + + /// + /// Model path (only valid for offline models). + /// + const SPXSTRING& Path; + + /// + /// Model version. + /// + const SPXSTRING& Version; + +private: + + DISABLE_DEFAULT_CTORS(SpeechRecognitionModel); + + /// + /// Internal member variable that holds the model name. + /// + SPXSTRING m_name; + + /// + /// Internal member variable that holds the model locales. + /// + std::vector m_locales; + + /// + /// Internal member variable that holds the model path. + /// + SPXSTRING m_path; + + /// + /// Internal member variable that holds the model version. + /// + SPXSTRING m_version; +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognition_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognition_result.h new file mode 100644 index 0000000..24f0f37 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognition_result.h @@ -0,0 +1,45 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_recognition_result.h: Public API declarations for SpeechRecognitionResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Base class for speech recognition results. +/// +class SpeechRecognitionResult : public RecognitionResult +{ +public: + + explicit SpeechRecognitionResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s; reason=0x%x; text=%s", __FUNCTION__, (void*)this, (void*)Handle, Utils::ToUTF8(ResultId).c_str(), Reason, Utils::ToUTF8(Text).c_str()); + } + + virtual ~SpeechRecognitionResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)Handle); + } + + +private: + DISABLE_DEFAULT_CTORS(SpeechRecognitionResult); +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognizer.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognizer.h new file mode 100644 index 0000000..1c4e122 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_recognizer.h @@ -0,0 +1,351 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_recognizer.h: Public API declarations for SpeechRecognizer C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +class Session; + +/// +/// Class for speech recognizers. +/// +class SpeechRecognizer final : public AsyncRecognizer +{ +public: + + using BaseType = AsyncRecognizer; + + /// + /// Create a speech recognizer from a speech config + /// + /// Speech configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from an embedded speech config. + /// Added in version 1.19.0 + /// + /// Embedded speech configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a hybrid speech config. + /// + /// Hybrid speech configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a speech config and audio config. + /// + /// Speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from an embedded speech config and audio config. + /// Added in version 1.19.0 + /// + /// Embedded speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioConfig = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioConfig))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a hybrid speech config and audio config. + /// + /// Hybrid speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioConfig = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioConfig))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a speech config, auto detection source language config and audio config + /// Added in 1.8.0 + /// + /// Speech configuration. + /// Auto detection source language config. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from an embedded speech config, auto detection source language config and audio config + /// Added in 1.20.0 + /// + /// Embedded speech configuration. + /// Auto detection source language config. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a hybrid speech config, auto detection source language config and audio config + /// + /// Hybrid speech configuration. + /// Auto detection source language config. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a speech config, source language config and audio config + /// Added in 1.8.0 + /// + /// Speech configuration. + /// Source language config. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr sourceLanguageConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_speech_recognizer_from_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(sourceLanguageConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a speech recognizer from a speech config, source language and audio config + /// Added in 1.8.0 + /// + /// Speech configuration. + /// Source language. + /// Audio configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + const SPXSTRING& sourceLanguage, + std::shared_ptr audioInput = nullptr) + { + return FromConfig(speechconfig, SourceLanguageConfig::FromLanguage(sourceLanguage), audioInput); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit SpeechRecognizer(SPXRECOHANDLE hreco) : BaseType(hreco), Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + ~SpeechRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + TermRecognizer(); + } + + /// + /// Starts speech recognition, and returns after a single utterance is recognized. The end of a + /// single utterance is determined by listening for silence at the end or until a maximum of about 30 + /// seconds of audio is processed. The task returns the recognition text as result. + /// Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single + /// shot recognition like command or query. + /// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead. + /// + /// Future containing result value (a shared pointer to SpeechRecognitionResult) + /// of the asynchronous speech recognition. + /// + std::future> RecognizeOnceAsync() override + { + return BaseType::RecognizeOnceAsyncInternal(); + } + + /// + /// Asynchronously initiates continuous speech recognition operation. + /// + /// An empty future. + std::future StartContinuousRecognitionAsync() override + { + return BaseType::StartContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously terminates ongoing continuous speech recognition operation. + /// + /// An empty future. + std::future StopContinuousRecognitionAsync() override + { + return BaseType::StopContinuousRecognitionAsyncInternal(); + } + + /// + /// Asynchronously initiates keyword recognition operation. + /// + /// Specifies the keyword model to be used. + /// An empty future. + std::future StartKeywordRecognitionAsync(std::shared_ptr model) override + { + return BaseType::StartKeywordRecognitionAsyncInternal(model); + } + + /// + /// Asynchronously terminates keyword recognition operation. + /// + /// An empty future. + std::future StopKeywordRecognitionAsync() override + { + return BaseType::StopKeywordRecognitionAsyncInternal(); + } + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Gets the endpoint ID of a customized speech model that is used for speech recognition. + /// + /// the endpoint ID of a customized speech model that is used for speech recognition + SPXSTRING GetEndpointId() + { + return Properties.GetProperty(PropertyId::SpeechServiceConnection_EndpointId, SPXSTRING()); + } + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the recognizer will encounter errors during recognition. + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + +private: + DISABLE_DEFAULT_CTORS(SpeechRecognizer); + friend class Microsoft::CognitiveServices::Speech::Session; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_bookmark_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_bookmark_eventargs.h new file mode 100644 index 0000000..46535e8 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_bookmark_eventargs.h @@ -0,0 +1,81 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class for speech synthesis bookmark event arguments. +/// Added in version 1.16.0 +/// +class SpeechSynthesisBookmarkEventArgs : public EventArgs +{ +private: + + SPXEVENTHANDLE m_hEvent; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechSynthesisBookmarkEventArgs(SPXEVENTHANDLE hevent) : + m_hEvent(hevent), + ResultId(m_resultId), + Text(m_text) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + synthesizer_bookmark_event_get_values(hevent, &m_audioOffset); + AudioOffset = m_audioOffset; + + m_text = Utils::ToSPXString(Utils::CopyAndFreePropertyString(synthesizer_event_get_text(hevent))); + + const size_t maxCharCount = 256; + char sz[maxCharCount + 1]; + SPX_THROW_ON_FAIL(synthesizer_event_get_result_id(hevent, sz, maxCharCount)); + m_resultId = Utils::ToSPXString(sz); + }; + + /// + virtual ~SpeechSynthesisBookmarkEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + SPX_THROW_ON_FAIL(synthesizer_event_handle_release(m_hEvent)); + } + + /// + /// Unique result id. + /// Added in version 1.25.0 + /// + const SPXSTRING& ResultId; + + /// + /// Audio offset, in ticks (100 nanoseconds). + /// + uint64_t AudioOffset; + + /// + /// The bookmark text. + /// + const SPXSTRING& Text; + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisBookmarkEventArgs); + + SPXSTRING m_resultId; + uint64_t m_audioOffset{ 0 }; + SPXSTRING m_text; +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_eventargs.h new file mode 100644 index 0000000..b6d8321 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_eventargs.h @@ -0,0 +1,70 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_synthesis_eventargs.h: Public API declarations for SpeechSynthesisEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Class for speech synthesis event arguments. +/// Added in version 1.4.0 +/// +class SpeechSynthesisEventArgs : public EventArgs +{ +private: + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechSynthesisEventArgs(SPXEVENTHANDLE hevent) : + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + virtual ~SpeechSynthesisEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + SPX_THROW_ON_FAIL(synthesizer_event_handle_release(m_hevent)); + } + + /// + /// Speech synthesis event result. + /// + std::shared_ptr Result; + + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(synthesizer_synthesis_event_get_result(hevent, &hresult)); + return hresult; + } + +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_request.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_request.h new file mode 100644 index 0000000..4441a27 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_request.h @@ -0,0 +1,271 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_config.h: Public API declarations for SpeechConfig C++ class +// +#pragma once + +#include + +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class that defines the speech synthesis request. +/// This class is in preview and is subject to change. +/// Added in version 1.37.0 +/// +class SpeechSynthesisRequest +{ +public: + + /// + /// Represents an input stream for speech synthesis request. + /// Note: This class is in preview and may be subject to change in future versions. + /// Added in version 1.37.0 + /// + class InputStream + { + public: + friend class SpeechSynthesisRequest; + /// + /// Send a piece of text to the speech synthesis service to be synthesized. + /// + /// The text piece to be synthesized. + void Write(const SPXSTRING &text) + { + m_parent.SendTextPiece(text); + } + + /// + /// Finish the text input. + /// + void Close() + { + m_parent.FinishInput(); + } + + private: + InputStream(SpeechSynthesisRequest& parent) + : m_parent(parent) + { + } + SpeechSynthesisRequest& m_parent; + DISABLE_COPY_AND_MOVE(InputStream); + }; + + /// + /// Internal operator used to get underlying handle value. + /// + /// A handle. + explicit operator SPXREQUESTHANDLE() const { return m_hrequest; } + + /// + /// Creates a speech synthesis request, with text streaming is enabled. + /// + /// A shared pointer to the new speech synthesis request instance. + static std::shared_ptr NewTextStreamingRequest() + { + SPXREQUESTHANDLE hrequest = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_synthesis_request_create(true, false, nullptr, 0, &hrequest)); + + auto ptr = new SpeechSynthesisRequest(hrequest); + return std::shared_ptr(ptr); + } + + /// + /// Gets the input stream for the speech synthesis request. + /// + /// The input stream. + InputStream& GetInputStream() + { + return m_inputStream; + } + + /// + /// Sets the pitch of the synthesized speech. + /// + /// The pitch of the synthesized speech. + void SetPitch(const SPXSTRING& pitch) { + SetProperty(PropertyId::SpeechSynthesisRequest_Pitch, pitch); + } + + /// + /// Set the speaking rate. + /// + /// The speaking rate. + void SetRate(const SPXSTRING& rate) { + SetProperty(PropertyId::SpeechSynthesisRequest_Rate, rate); + } + + /// + /// Set the speaking volume. + /// + /// The speaking volume. + void SetVolume(const SPXSTRING& volume) { + SetProperty(PropertyId::SpeechSynthesisRequest_Volume, volume); + } + + /// + /// Set the speaking style. + /// + /// The speaking style. + void SetStyle(const SPXSTRING& style) { + SetProperty(PropertyId::SpeechSynthesisRequest_Style, style); + } + + /// + /// Set the speaking temperature. + /// + /// The speaking temperature. + void SetTemperature(const float temperature) { + SetProperty(PropertyId::SpeechSynthesisRequest_Temperature, std::to_string(temperature)); + } + + /// + /// Set the custom lexicon URL. + /// + /// URL to a custom pronunciation lexicon. + void SetCustomLexiconUrl(const SPXSTRING& customLexiconUrl) { + SetProperty(PropertyId::SpeechSynthesisRequest_CustomLexiconUrl, customLexiconUrl); + } + + /// + /// Set the preferred locales for speech synthesis. + /// + /// Comma-separated list of locale names in order of preference. + void SetPreferLocales(const SPXSTRING& preferLocales) { + SetProperty(PropertyId::SpeechSynthesisRequest_PreferLocales, preferLocales); + } + + /// + /// Destructs the object. + /// + virtual ~SpeechSynthesisRequest() + { + speech_synthesis_request_release(m_hrequest); + property_bag_release(m_propertybag); + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit SpeechSynthesisRequest(SPXREQUESTHANDLE hrequest) + :m_hrequest(hrequest), + m_inputStream(*this) + { + SPX_THROW_ON_FAIL(speech_synthesis_request_get_property_bag(hrequest, &m_propertybag)); + } + + /// + /// Internal member variable that holds the speech synthesis request handle. + /// + SPXREQUESTHANDLE m_hrequest; + + /// + /// Internal member variable that holds the properties of the speech synthesis request. + /// + SPXPROPERTYBAGHANDLE m_propertybag; + + InputStream m_inputStream; + + /// + /// Send a piece of text to the speech synthesis service to be synthesized, used in text streaming mode. + /// + /// The text piece to be synthesized. + void SendTextPiece(const SPXSTRING& text) + { + auto u8text = Utils::ToUTF8(text); + SPX_THROW_ON_FAIL(speech_synthesis_request_send_text_piece(m_hrequest, u8text.c_str(), static_cast(u8text.length()))); + } + + /// + /// Finish the text input, used in text streaming mode. + /// + void FinishInput() + { + SPX_THROW_ON_FAIL(speech_synthesis_request_finish(m_hrequest)); + } + + /// + /// Sets a property value by ID. + /// + /// The property id. + /// The property value. + void SetProperty(PropertyId id, const SPXSTRING& value) + { + property_bag_set_string(m_propertybag, static_cast(id), nullptr, Utils::ToUTF8(value).c_str()); + } + + /*! \endcond */ + +private: + DISABLE_COPY_AND_MOVE(SpeechSynthesisRequest); + + + +}; + +/// +/// Class that defines the speech synthesis request for personal voice (aka.ms/azureai/personal-voice). +/// This class is in preview and is subject to change. +/// Added in version 1.39.0 +/// +class PersonalVoiceSynthesisRequest: public SpeechSynthesisRequest +{ +public: + + /// + /// Creates a personal voice speech synthesis request, with text streaming is enabled. + /// + /// The name of the personal voice to be used for synthesis. + /// The name of the model. E.g., DragonLatestNeural or PhoenixLatestNeural + /// A shared pointer to the new speech synthesis request instance. + static std::shared_ptr NewTextStreamingRequest(const std::string& personalVoiceName, const std::string& modelName) + { + SPXREQUESTHANDLE hrequest = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_synthesis_request_create(true, false, nullptr, 0, &hrequest)); + + SPX_THROW_ON_FAIL(speech_synthesis_request_set_voice(hrequest, nullptr, personalVoiceName.c_str(), modelName.c_str())); + + auto ptr = new PersonalVoiceSynthesisRequest(hrequest); + return std::shared_ptr(ptr); + } + + /// + /// Destructs the object. + /// + virtual ~PersonalVoiceSynthesisRequest() + { + + } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit PersonalVoiceSynthesisRequest(SPXREQUESTHANDLE hrequest) + :SpeechSynthesisRequest(hrequest) + {} + + /*! \endcond */ + +private: + DISABLE_COPY_AND_MOVE(PersonalVoiceSynthesisRequest); + +}; + +}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_result.h new file mode 100644 index 0000000..145e6ec --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_result.h @@ -0,0 +1,310 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_synthesis_result.h: Public API declarations for SpeechSynthesisResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Contains information about result from text-to-speech synthesis. +/// Added in version 1.4.0 +/// +class SpeechSynthesisResult +{ +private: + + /// + /// Internal member variable that holds the tts result handle. + /// + SPXRESULTHANDLE m_hresult; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + synth_result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties associating to the tts result. + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Result handle. + explicit SpeechSynthesisResult(SPXRESULTHANDLE hresult) : + m_hresult(hresult), + m_properties(hresult), + ResultId(m_resultId), + Reason(m_reason), + AudioDuration(m_audioDuration), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + const size_t maxCharCount = 1024; + char sz[maxCharCount + 1]; + + SPX_THROW_ON_FAIL(synth_result_get_result_id(hresult, sz, maxCharCount)); + m_resultId = Utils::ToSPXString(sz); + + Result_Reason resultReason; + SPX_THROW_ON_FAIL(synth_result_get_reason(hresult, &resultReason)); + m_reason = static_cast(resultReason); + + uint32_t audioLength = 0; + uint64_t audioDuration = 0; + SPX_THROW_ON_FAIL(synth_result_get_audio_length_duration(m_hresult, &audioLength, &audioDuration)); + m_audioDuration = std::chrono::milliseconds(audioDuration); + + m_audioData = std::make_shared>(audioLength); + + if (audioLength > 0) + { + uint32_t filledSize = 0; + SPX_THROW_ON_FAIL(synth_result_get_audio_data(m_hresult, m_audioData->data(), audioLength, &filledSize)); + } + } + + /// + /// Gets the size of synthesized audio in bytes. + /// + /// Length of synthesized audio + uint32_t GetAudioLength() + { + return static_cast(m_audioData->size()); + } + + /// + /// Gets the synthesized audio. + /// + /// Synthesized audio data + std::shared_ptr> GetAudioData() + { + return m_audioData; + } + + /// + /// Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + /// + /// Destructor. + /// + ~SpeechSynthesisResult() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + synthesizer_result_handle_release(m_hresult); + } + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Reason of the synthesis result. + /// + const ResultReason& Reason; + + /// + /// Time duration of the synthesized audio, only valid for completed synthsis. + /// Added in version 1.21.0 + /// + const std::chrono::milliseconds& AudioDuration; + + /// + /// Collection of additional SpeechSynthesisResult properties. + /// + const PropertyCollection& Properties; + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisResult); + + /// + /// Internal member variable that holds the result ID. + /// + SPXSTRING m_resultId; + + /// + /// Internal member variable that holds the result reason. + /// + ResultReason m_reason; + + /// + /// Internal member variable that holds the audio data + /// + std::shared_ptr> m_audioData; + + /// + /// Internal member variable that holds the audio duration + // + std::chrono::milliseconds m_audioDuration; +}; + + +/// +/// Contains detailed information about why a result was canceled. +/// Added in version 1.4.0 +/// +class SpeechSynthesisCancellationDetails +{ +private: + + CancellationReason m_reason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Creates an instance of SpeechSynthesisCancellationDetails object for the canceled SpeechSynthesisResult. + /// + /// The result that was canceled. + /// A shared pointer to CancellationDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + auto ptr = new SpeechSynthesisCancellationDetails(result.get()); + auto cancellation = std::shared_ptr(ptr); + return cancellation; + } + + /// + /// Creates an instance of SpeechSynthesisCancellationDetails object for the canceled SpeechSynthesisResult. + /// + /// The audio data stream that was canceled. + /// A shared pointer to CancellationDetails. + static std::shared_ptr FromStream(std::shared_ptr stream) + { + auto ptr = new SpeechSynthesisCancellationDetails(stream.get()); + auto cancellation = std::shared_ptr(ptr); + return cancellation; + } + + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful speech synthesis ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful speech synthesis ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisCancellationDetails); + + SpeechSynthesisCancellationDetails(SpeechSynthesisResult* result) : + m_reason(GetCancellationReason(result)), + m_errorCode(GetCancellationErrorCode(result)), + Reason(m_reason), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::CancellationDetails_ReasonDetailedText)) + { + } + + SpeechSynthesisCancellationDetails(AudioDataStream* stream) : + m_reason(GetCancellationReason(stream)), + m_errorCode(GetCancellationErrorCode(stream)), + Reason(m_reason), + ErrorCode(m_errorCode), + ErrorDetails(stream->Properties.GetProperty(PropertyId::CancellationDetails_ReasonDetailedText)) + { + } + + Speech::CancellationReason GetCancellationReason(SpeechSynthesisResult* result) + { + Result_CancellationReason reason = CancellationReason_Error; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(synth_result_get_reason_canceled(hresult, &reason)); + + return static_cast(reason); + } + + Speech::CancellationErrorCode GetCancellationErrorCode(SpeechSynthesisResult* result) + { + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(synth_result_get_canceled_error_code(hresult, &errorCode)); + + return static_cast(errorCode); + } + + Speech::CancellationReason GetCancellationReason(AudioDataStream* stream) + { + Result_CancellationReason reason = CancellationReason_Error; + + SPXAUDIOSTREAMHANDLE hstream = (SPXAUDIOSTREAMHANDLE)(*stream); + SPX_IFFAILED_THROW_HR(audio_data_stream_get_reason_canceled(hstream, &reason)); + + return static_cast(reason); + } + + Speech::CancellationErrorCode GetCancellationErrorCode(AudioDataStream* stream) + { + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXAUDIOSTREAMHANDLE hstream = (SPXAUDIOSTREAMHANDLE)(*stream); + SPX_IFFAILED_THROW_HR(audio_data_stream_get_canceled_error_code(hstream, &errorCode)); + + return static_cast(errorCode); + } +}; + +inline std::shared_ptr AudioDataStream::FromResult(std::shared_ptr result) +{ + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + if (result != nullptr) + { + hresult = (SPXRESULTHANDLE)(*result.get()); + } + + SPXAUDIOSTREAMHANDLE hstream = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(audio_data_stream_create_from_result(&hstream, hresult)); + + auto stream = new AudioDataStream(hstream); + return std::shared_ptr(stream); +} + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_viseme_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_viseme_eventargs.h new file mode 100644 index 0000000..726b95f --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_viseme_eventargs.h @@ -0,0 +1,88 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class for speech synthesis viseme event arguments. +/// Added in version 1.16.0 +/// +class SpeechSynthesisVisemeEventArgs : public EventArgs +{ +private: + + SPXEVENTHANDLE m_hEvent; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechSynthesisVisemeEventArgs(SPXEVENTHANDLE hevent) : + m_hEvent(hevent), + ResultId(m_resultId), + Animation(m_animation) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + synthesizer_viseme_event_get_values(hevent, &m_audioOffset, &m_visemeId); + AudioOffset = m_audioOffset; + VisemeId = m_visemeId; + + m_animation = Utils::ToSPXString(Utils::CopyAndFreePropertyString(synthesizer_viseme_event_get_animation(hevent))); + + const size_t maxCharCount = 256; + char sz[maxCharCount + 1]; + SPX_THROW_ON_FAIL(synthesizer_event_get_result_id(hevent, sz, maxCharCount)); + m_resultId = Utils::ToSPXString(sz); + }; + + /// + virtual ~SpeechSynthesisVisemeEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + SPX_THROW_ON_FAIL(synthesizer_event_handle_release(m_hEvent)); + } + + /// + /// Unique result id. + /// Added in version 1.25.0 + /// + const SPXSTRING& ResultId; + + /// + /// Audio offset, in ticks (100 nanoseconds). + /// + uint64_t AudioOffset; + + /// + /// Viseme ID. + /// + uint32_t VisemeId; + + /// + /// Animation, could be svg or other format. + /// + const SPXSTRING& Animation; + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisVisemeEventArgs); + + SPXSTRING m_resultId; + uint64_t m_audioOffset{ 0 }; + uint32_t m_visemeId { 0 }; + SPXSTRING m_animation; +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_word_boundary_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_word_boundary_eventargs.h new file mode 100644 index 0000000..c44fe60 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesis_word_boundary_eventargs.h @@ -0,0 +1,120 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_synthesis_word_boundary_eventargs.h: Public API declarations for SpeechSynthesisWordBoundaryEventArgs C++ class +// + +#pragma once +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + + +/// +/// Class for speech synthesis word boundary event arguments. +/// Added in version 1.7.0 +/// +class SpeechSynthesisWordBoundaryEventArgs : public EventArgs +{ +private: + + SPXEVENTHANDLE m_hEvent; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit SpeechSynthesisWordBoundaryEventArgs(SPXEVENTHANDLE hevent) : + m_hEvent(hevent), + ResultId(m_resultId), + Duration(m_duration), + Text(m_text), + BoundaryType(m_boundaryType) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + uint64_t durationTicks; + SpeechSynthesis_BoundaryType boundaryType = SpeechSynthesis_BoundaryType_Word; + synthesizer_word_boundary_event_get_values(hevent, &m_audioOffset, &durationTicks, &m_textOffset, &m_wordLength, &boundaryType); + m_duration = std::chrono::milliseconds(durationTicks / static_cast(10000)); + m_boundaryType = static_cast(boundaryType); + AudioOffset = m_audioOffset; + TextOffset = m_textOffset; + WordLength = m_wordLength; + m_text = Utils::ToSPXString(Utils::CopyAndFreePropertyString(synthesizer_event_get_text(hevent))); + + const size_t maxCharCount = 256; + char sz[maxCharCount + 1]; + SPX_THROW_ON_FAIL(synthesizer_event_get_result_id(hevent, sz, maxCharCount)); + m_resultId = Utils::ToSPXString(sz); + }; + + /// + virtual ~SpeechSynthesisWordBoundaryEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hEvent); + SPX_THROW_ON_FAIL(synthesizer_event_handle_release(m_hEvent)); + } + + /// + /// Unique result id. + /// Added in version 1.25.0 + /// + const SPXSTRING& ResultId; + + /// + /// Word boundary audio offset. + /// + uint64_t AudioOffset; + + /// + /// Time duration of the audio. + /// Added in version 1.21.0 + /// + const std::chrono::milliseconds& Duration; + + /// + /// Word boundary text offset. + /// + uint32_t TextOffset; + + /// + /// Word boundary word length. + /// + uint32_t WordLength; + + /// + /// The text. + /// Added in version 1.21.0 + /// + const SPXSTRING& Text; + + /// + /// Word boundary type. + /// Added in version 1.21.0 + /// + const SpeechSynthesisBoundaryType& BoundaryType; + +private: + + DISABLE_DEFAULT_CTORS(SpeechSynthesisWordBoundaryEventArgs); + + SPXSTRING m_resultId; + uint64_t m_audioOffset{ 0 }; + std::chrono::milliseconds m_duration{ 0 }; + uint32_t m_textOffset{ 0 }; + uint32_t m_wordLength{ 0 }; + SPXSTRING m_text; + SpeechSynthesisBoundaryType m_boundaryType{ SpeechSynthesisBoundaryType::Word }; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesizer.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesizer.h new file mode 100644 index 0000000..b3d119f --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_synthesizer.h @@ -0,0 +1,793 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_synthesizer.h: Public API declarations for SpeechSynthesizer C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Class for speech synthesizer. +/// Updated in version 1.14.0 +/// +class SpeechSynthesizer : public std::enable_shared_from_this +{ + friend class Connection; +private: + + /// + /// Internal member variable that holds the speech synthesizer handle. + /// + SPXSYNTHHANDLE m_hsynth; + + std::shared_ptr m_audioConfig; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXSYNTHHANDLE hsynth) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + synthesizer_get_property_bag(hsynth, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties of the speech synthesizer + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Create a speech synthesizer from a speech config. + /// + /// Speech configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + SPXHANDLE_INVALID)); + + auto ptr = new SpeechSynthesizer(hsynth); + return std::shared_ptr(ptr); + } + + /// + /// Create a speech synthesizer from an embedded speech config. + /// Added in version 1.19.0 + /// + /// Embedded speech configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + SPXHANDLE_INVALID)); + auto ptr = new SpeechSynthesizer(hsynth); + return std::shared_ptr(ptr); + } + + /// + /// Create a speech synthesizer from a hybrid speech config. + /// + /// Hybrid speech configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + SPXHANDLE_INVALID)); + auto ptr = new SpeechSynthesizer(hsynth); + return std::shared_ptr(ptr); + } + + /// + /// Create a speech synthesizer from a speech config and audio config. + /// + /// Speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr audioconfig = Audio::AudioConfig::FromDefaultSpeakerOutput()) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(audioconfig))); + + auto ptr = new SpeechSynthesizer(hsynth); + auto synthesizer = std::shared_ptr(ptr); + synthesizer->m_audioConfig = audioconfig; + return synthesizer; + } + + /// + /// Create a speech synthesizer from an embedded speech config and audio config. + /// Added in version 1.19.0 + /// + /// Embedded speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr audioconfig = Audio::AudioConfig::FromDefaultSpeakerOutput()) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(audioconfig))); + auto ptr = new SpeechSynthesizer(hsynth); + auto synthesizer = std::shared_ptr(ptr); + synthesizer->m_audioConfig = audioconfig; + return synthesizer; + } + + /// + /// Create a speech synthesizer from a hybrid speech config and audio config. + /// + /// Hybrid speech configuration. + /// Audio configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr audioconfig = Audio::AudioConfig::FromDefaultSpeakerOutput()) + { + SPXSYNTHHANDLE hsynth = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(audioconfig))); + auto ptr = new SpeechSynthesizer(hsynth); + auto synthesizer = std::shared_ptr(ptr); + synthesizer->m_audioConfig = audioconfig; + return synthesizer; + } + + /// + /// Create a speech synthesizer from a speech config, auto detection source language config and audio config + /// Added in 1.13.0 + /// + /// Speech configuration. + /// Auto detection source language config. + /// Audio configuration. + /// A smart pointer wrapped speech synthesizer pointer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioconfig = Audio::AudioConfig::FromDefaultSpeakerOutput()) + { + SPXSYNTHHANDLE hsynth; + + SPX_THROW_ON_FAIL(::synthesizer_create_speech_synthesizer_from_auto_detect_source_lang_config( + &hsynth, + Utils::HandleOrInvalid(speechconfig), + Utils::HandleOrInvalid(autoDetectSourceLangConfig), + Utils::HandleOrInvalid(audioconfig))); + + auto ptr = new SpeechSynthesizer(hsynth); + auto synthesizer = std::shared_ptr(ptr); + synthesizer->m_audioConfig = audioconfig; + return synthesizer; + } + + /// + /// Execute the speech synthesis on plain text, synchronously. + /// + /// The plain text for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr SpeakText(const std::string& text) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_text(m_hsynth, text.data(), static_cast(text.length()), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Execute the speech synthesis on plain text, synchronously. + /// Added in 1.9.0 + /// + /// The plain text for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr SpeakText(const std::wstring& text) + { + return SpeakText(Utils::ToUTF8(text)); + } + + /// + /// Execute the speech synthesis on SSML, synchronously. + /// + /// The SSML for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr SpeakSsml(const std::string& ssml) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_ssml(m_hsynth, ssml.data(), static_cast(ssml.length()), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Execute the speech synthesis on SSML, synchronously. + /// Added in version 1.9.0 + /// + /// The SSML for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr SpeakSsml(const std::wstring& ssml) + { + return SpeakSsml(Utils::ToUTF8(ssml)); + } + + /// + /// Execute the speech synthesis on request, synchronously. + /// This API could be used to synthesize speech from an input text stream, to reduce latency for text generation scenarios. + /// Note: the feature is in preview and is subject to change. + /// Added in version 1.37.0 + /// + /// The synthesis request. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr Speak(const std::shared_ptr& request) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_request(m_hsynth, Utils::HandleOrInvalid(request), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Execute the speech synthesis on plain text, asynchronously. + /// + /// The plain text for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> SpeakTextAsync(const std::string& text) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, text]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_text_async(m_hsynth, text.data(), static_cast(text.length()), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_speak_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Execute the speech synthesis on plain text, asynchronously. + /// Added in version 1.9.0 + /// + /// The plain text for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> SpeakTextAsync(const std::wstring& text) + { + return SpeakTextAsync(Utils::ToUTF8(text)); + } + + /// + /// Execute the speech synthesis on SSML, asynchronously. + /// + /// The SSML for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> SpeakSsmlAsync(const std::string& ssml) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, ssml]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_ssml_async(m_hsynth, ssml.data(), static_cast(ssml.length()), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_speak_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Execute the speech synthesis on SSML, asynchronously. + /// Added in version 1.9.0 + /// + /// The SSML for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> SpeakSsmlAsync(const std::wstring& ssml) + { + return SpeakSsmlAsync(Utils::ToUTF8(ssml)); + } + + /// + /// Execute the speech synthesis on on request, synchronously. + /// This API could be used to synthesize speech from an input text stream, to reduce latency for text generation scenarios. + /// Note: the feature is in preview and is subject to change. + /// Added in version 1.37.0 + /// + /// The synthesis request. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> SpeakAsync(const std::shared_ptr& request) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, request]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_speak_request_async(m_hsynth, Utils::HandleOrInvalid(request), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_speak_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Start the speech synthesis on plain text, synchronously. + /// + /// The plain text for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr StartSpeakingText(const std::string& text) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_start_speaking_text(m_hsynth, text.data(), static_cast(text.length()), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Start the speech synthesis on plain text, synchronously. + /// Added in version 1.9.0 + /// + /// The plain text for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr StartSpeakingText(const std::wstring& text) + { + return StartSpeakingText(Utils::ToUTF8(text)); + } + + /// + /// Start the speech synthesis on SSML, synchronously. + /// + /// The SSML for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr StartSpeakingSsml(const std::string& ssml) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_start_speaking_ssml(m_hsynth, ssml.data(), static_cast(ssml.length()), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Start the speech synthesis on SSML, synchronously. + /// Added in version 1.9.0 + /// + /// The SSML for synthesis. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr StartSpeakingSsml(const std::wstring& ssml) + { + return StartSpeakingSsml(Utils::ToUTF8(ssml)); + } + + /// + /// Start the speech synthesis on on request, synchronously. + /// This API could be used to synthesize speech from an input text stream, to reduce latency for text generation scenarios. + /// Note: the feature is in preview and is subject to change. + /// Added in version 1.37.0 + /// + /// The synthesis request. + /// A smart pointer wrapping a speech synthesis result. + std::shared_ptr StartSpeaking(const std::shared_ptr& request) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_start_speaking_request(m_hsynth, Utils::HandleOrInvalid(request), &hresult)); + + return std::make_shared(hresult); + } + + /// + /// Start the speech synthesis on plain text, asynchronously. + /// + /// The plain text for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> StartSpeakingTextAsync(const std::string& text) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, text]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_start_speaking_text_async(m_hsynth, text.data(), static_cast(text.length()), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_speak_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Start the speech synthesis on plain text, asynchronously. + /// Added in version 1.9.0 + /// + /// The plain text for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> StartSpeakingTextAsync(const std::wstring& text) + { + return StartSpeakingTextAsync(Utils::ToUTF8(text)); + } + + /// + /// Start the speech synthesis on SSML, asynchronously. + /// + /// The SSML for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> StartSpeakingSsmlAsync(const std::string& ssml) + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this, ssml]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_start_speaking_ssml_async(m_hsynth, ssml.data(), static_cast(ssml.length()), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_speak_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Start the speech synthesis on SSML, asynchronously. + /// Added in version 1.9.0 + /// + /// The SSML for synthesis. + /// An asynchronous operation representing the synthesis. It returns a value of as result. + std::future> StartSpeakingSsmlAsync(const std::wstring& ssml) + { + return StartSpeakingSsmlAsync(Utils::ToUTF8(ssml)); + } + + /// + /// Stop the speech synthesis, asynchronously. + /// Added in version 1.14.0 + /// + /// An empty future. + std::future StopSpeakingAsync() + { + auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, this]() -> void { + SPXASYNCHANDLE hasyncStop = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_stop_speaking_async(m_hsynth, &hasyncStop)); + SPX_EXITFN_ON_FAIL(::synthesizer_stop_speaking_async_wait_for(hasyncStop, UINT32_MAX)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasyncStop); + SPX_REPORT_ON_FAIL(releaseHr); + }); + + return future; + } + + /// + /// Get the available voices, asynchronously. + /// Added in version 1.16.0 + /// + /// Specify the locale of voices, in BCP-47 format; or leave it empty to get all available voices. + /// An asynchronous operation representing the voices list. It returns a value of as result. + std::future> GetVoicesAsync(const SPXSTRING& locale = SPXSTRING()) + { + const auto keepAlive = this->shared_from_this(); + + auto future = std::async(std::launch::async, [keepAlive, locale, this]() -> std::shared_ptr { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPXASYNCHANDLE hasync = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesizer_get_voices_list_async(m_hsynth, Utils::ToUTF8(locale).c_str(), &hasync)); + SPX_EXITFN_ON_FAIL(::synthesizer_get_voices_list_async_wait_for(hasync, UINT32_MAX, &hresult)); + + SPX_EXITFN_CLEANUP: + auto releaseHr = synthesizer_async_handle_release(hasync); + SPX_REPORT_ON_FAIL(releaseHr); + + return std::make_shared(hresult); + }); + + return future; + } + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the synthesizer will encounter errors while speech synthesis. + /// Added in version 1.7.0 + /// + /// The authorization token. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// Added in version 1.7.0 + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() const + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Destructor. + /// + ~SpeechSynthesizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + // Disconnect the event signals in reverse construction order + BookmarkReached.DisconnectAll(); + VisemeReceived.DisconnectAll(); + WordBoundary.DisconnectAll(); + SynthesisCanceled.DisconnectAll(); + SynthesisCompleted.DisconnectAll(); + Synthesizing.DisconnectAll(); + SynthesisStarted.DisconnectAll(); + + synthesizer_handle_release(m_hsynth); + } + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// The event signals that a speech synthesis result is received when the synthesis just started. + /// + EventSignal SynthesisStarted; + + /// + /// The event signals that a speech synthesis result is received while the synthesis is on going. + /// + EventSignal Synthesizing; + + /// + /// The event signals that a speech synthesis result is received when the synthesis completed. + /// + EventSignal SynthesisCompleted; + + /// + /// The event signals that a speech synthesis result is received when the synthesis is canceled. + /// + EventSignal SynthesisCanceled; + + /// + /// The event signals that a speech synthesis word boundary is received while the synthesis is on going. + /// Added in version 1.7.0 + /// + EventSignal WordBoundary; + + /// + /// The event signals that a speech synthesis viseme event is received while the synthesis is on going. + /// Added in version 1.16.0 + /// + EventSignal VisemeReceived; + + /// + /// The event signals that a speech synthesis bookmark is reached while the synthesis is on going. + /// Added in version 1.16.0 + /// + EventSignal BookmarkReached; + +private: + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Synthesizer handle. + explicit SpeechSynthesizer(SPXSYNTHHANDLE hsynth) : + m_hsynth(hsynth), + m_properties(hsynth), + Properties(m_properties), + SynthesisStarted(GetSpeechSynthesisEventConnectionsChangedCallback()), + Synthesizing(GetSpeechSynthesisEventConnectionsChangedCallback()), + SynthesisCompleted(GetSpeechSynthesisEventConnectionsChangedCallback()), + SynthesisCanceled(GetSpeechSynthesisEventConnectionsChangedCallback()), + WordBoundary(GetWordBoundaryEventConnectionsChangedCallback()), + VisemeReceived(GetVisemeEventConnectionsChangedCallback()), + BookmarkReached(GetBookmarkEventConnectionsChangedCallback()) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + std::function&)> GetSpeechSynthesisEventConnectionsChangedCallback() + { + return [=](const EventSignal& eventSignal) { + if (&eventSignal == &SynthesisStarted) + { + synthesizer_started_set_callback(m_hsynth, SynthesisStarted.IsConnected() ? FireEvent_SynthesisStarted : nullptr, this); + } + else if (&eventSignal == &Synthesizing) + { + synthesizer_synthesizing_set_callback(m_hsynth, Synthesizing.IsConnected() ? FireEvent_Synthesizing : nullptr, this); + } + else if (&eventSignal == &SynthesisCompleted) + { + synthesizer_completed_set_callback(m_hsynth, SynthesisCompleted.IsConnected() ? FireEvent_SynthesisCompleted : nullptr, this); + } + else if (&eventSignal == &SynthesisCanceled) + { + synthesizer_canceled_set_callback(m_hsynth, SynthesisCanceled.IsConnected() ? FireEvent_SynthesisCanceled : nullptr, this); + } + }; + } + + std::function&)> GetWordBoundaryEventConnectionsChangedCallback() + { + return [=](const EventSignal& eventSignal) { + if (&eventSignal == &WordBoundary) + { + synthesizer_word_boundary_set_callback(m_hsynth, WordBoundary.IsConnected() ? FireEvent_WordBoundary : nullptr, this); + } + }; + } + + std::function&)> GetVisemeEventConnectionsChangedCallback() + { + return [=](const EventSignal& eventSignal) { + if (&eventSignal == &VisemeReceived) + { + synthesizer_viseme_received_set_callback(m_hsynth, VisemeReceived.IsConnected() ? FireEvent_VisemeReceived : nullptr, this); + } + }; + } + + std::function&)> GetBookmarkEventConnectionsChangedCallback() + { + return [=](const EventSignal& eventSignal) { + if (&eventSignal == &BookmarkReached) + { + synthesizer_bookmark_reached_set_callback(m_hsynth, BookmarkReached.IsConnected() ? FireEvent_BookmarkReached : nullptr, this); + } + }; + } + + static void FireEvent_SynthesisStarted(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr synthEvent{ new SpeechSynthesisEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SynthesisStarted.Signal(*synthEvent.get()); + } + + static void FireEvent_Synthesizing(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr synthEvent{ new SpeechSynthesisEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Synthesizing.Signal(*synthEvent.get()); + } + + static void FireEvent_SynthesisCompleted(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr synthEvent{ new SpeechSynthesisEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SynthesisCompleted.Signal(*synthEvent.get()); + } + + static void FireEvent_SynthesisCanceled(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr synthEvent{ new SpeechSynthesisEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->SynthesisCanceled.Signal(*synthEvent.get()); + } + + static void FireEvent_WordBoundary(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr wordBoundaryEvent{ new SpeechSynthesisWordBoundaryEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->WordBoundary.Signal(*wordBoundaryEvent.get()); + } + + static void FireEvent_VisemeReceived(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr visemeReceivedEvent{ new SpeechSynthesisVisemeEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->VisemeReceived.Signal(*visemeReceivedEvent.get()); + } + + static void FireEvent_BookmarkReached(SPXSYNTHHANDLE hsynth, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hsynth); + std::unique_ptr bookmarkReachedEvent{ new SpeechSynthesisBookmarkEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->BookmarkReached.Signal(*bookmarkReachedEvent.get()); + } +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_translation_config.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_translation_config.h new file mode 100644 index 0000000..1b7d785 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_translation_config.h @@ -0,0 +1,213 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include +#include + +#include "speechapi_c_common.h" +#include "speechapi_c_speech_config.h" +#include "speechapi_c_speech_translation_config.h" +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Translation { + +/// +/// Class that defines configurations for translation with speech input. +/// +class SpeechTranslationConfig final : public SpeechConfig +{ +public: + /// + /// Creates an instance of the speech translation config with specified subscription key and region. + /// + /// The subscription key. + /// The region name (see the region page). + /// Shared pointer to the speech translation config instance. + static std::shared_ptr FromSubscription(const SPXSTRING& subscription, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_subscription(&hconfig, Utils::ToUTF8(subscription).c_str(), Utils::ToUTF8(region).c_str())); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + /// + /// Creates an instance of the speech translation config with specified authorization token and region. + /// + /// The authorization token. + /// The region name (see the region page). + /// Shared pointer to the speech translation config instance. + static std::shared_ptr FromAuthorizationToken(const SPXSTRING& authToken, const SPXSTRING& region) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_authorization_token(&hconfig, Utils::ToUTF8(authToken).c_str(), Utils::ToUTF8(region).c_str())); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + // + /// Creates an instance of the speech translation config with specified endpoint and subscription. + /// This method is intended only for users who use a non-standard service endpoint. + /// Note: The query parameters specified in the endpoint URI are not changed, even if they are set by any other APIs. + /// For example, if the recognition language is defined in URI as query parameter "language=de-DE", and also set by SetSpeechRecognitionLanguage("en-US"), + /// the language setting in URI takes precedence, and the effective language is "de-DE". + /// Only the parameters that are not specified in the endpoint URI can be set by other APIs. + /// Note: To use an authorization token with FromEndpoint, please use FromEndpoint(const SPXSTRING&), + /// and then call SetAuthorizationToken() on the created SpeechTranslationConfig instance. + /// + /// The service endpoint to connect to. + /// The subscription key. + /// Shared pointer to the new SpeechTranslationConfig instance. + static std::shared_ptr FromEndpoint(const SPXSTRING& endpoint, const SPXSTRING& subscription) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_endpoint(&hconfig, Utils::ToUTF8(endpoint).c_str(), Utils::ToUTF8(subscription).c_str())); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + /// + /// Creates an instance of the speech translation config with specified endpoint. + /// This method is intended only for users who use a non-standard service endpoint. + /// Note: The query parameters specified in the endpoint URI are not changed, even if they are set by any other APIs. + /// For example, if the recognition language is defined in URI as query parameter "language=de-DE", and also set by SetSpeechRecognitionLanguage("en-US"), + /// the language setting in URI takes precedence, and the effective language is "de-DE". + /// Only the parameters that are not specified in the endpoint URI can be set by other APIs. + /// Note: if the endpoint requires a subscription key for authentication, please use FromEndpoint(const SPXSTRING&, const SPXSTRING&) to pass + /// the subscription key as parameter. + /// To use an authorization token with FromEndpoint, use this method to create a SpeechTranslationConfig instance, and then + /// call SetAuthorizationToken() on the created SpeechTranslationConfig instance. + /// Note: Added in version 1.5.0. + /// + /// The service endpoint to connect to. + /// A shared pointer to the new SpeechTranslationConfig instance. + static std::shared_ptr FromEndpoint(const SPXSTRING& endpoint) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_endpoint(&hconfig, Utils::ToUTF8(endpoint).c_str(), nullptr)); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + /// + /// Creates an instance of the speech translation config with specified host and subscription. + /// This method is intended only for users who use a non-default service host. Standard resource path will be assumed. + /// For services with a non-standard resource path or no path at all, use FromEndpoint instead. + /// Note: Query parameters are not allowed in the host URI and must be set by other APIs. + /// Note: To use an authorization token with FromHost, use FromHost(const SPXSTRING&), + /// and then call SetAuthorizationToken() on the created SpeechTranslationConfig instance. + /// Note: Added in version 1.8.0. + /// + /// The service host to connect to. Format is "protocol://host:port" where ":port" is optional. + /// The subscription key. + /// Shared pointer to the new SpeechTranslationConfig instance. + static std::shared_ptr FromHost(const SPXSTRING& host, const SPXSTRING& subscription) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_host(&hconfig, Utils::ToUTF8(host).c_str(), Utils::ToUTF8(subscription).c_str())); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + /// + /// Creates an instance of the speech translation config with specified host. + /// This method is intended only for users who use a non-default service host. Standard resource path will be assumed. + /// For services with a non-standard resource path or no path at all, use FromEndpoint instead. + /// Note: Query parameters are not allowed in the host URI and must be set by other APIs. + /// Note: If the host requires a subscription key for authentication, use FromHost(const SPXSTRING&, const SPXSTRING&) to pass + /// the subscription key as parameter. + /// To use an authorization token with FromHost, use this method to create a SpeechTranslationConfig instance, and then + /// call SetAuthorizationToken() on the created SpeechTranslationConfig instance. + /// Note: Added in version 1.8.0. + /// + /// The service host to connect to. Format is "protocol://host:port" where ":port" is optional. + /// A shared pointer to the new SpeechTranslationConfig instance. + static std::shared_ptr FromHost(const SPXSTRING& host) + { + SPXSPEECHCONFIGHANDLE hconfig = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(speech_translation_config_from_host(&hconfig, Utils::ToUTF8(host).c_str(), nullptr)); + return std::shared_ptr(new SpeechTranslationConfig(hconfig)); + } + + /// + /// Adds a target language for translation. + /// + /// Translation target language to add. + void AddTargetLanguage(const SPXSTRING& language) + { + SPX_THROW_ON_FAIL(speech_translation_config_add_target_language(m_hconfig, Utils::ToUTF8(language).c_str())); + } + + /// + /// Removes a target language for translation. + /// Added in release 1.7.0. + /// + /// Translation target language to remove. + void RemoveTargetLanguage(const SPXSTRING& language) + { + SPX_THROW_ON_FAIL(speech_translation_config_remove_target_language(m_hconfig, Utils::ToUTF8(language).c_str())); + } + + /// + /// Sets a Category Id that will be passed to service. Category Id is used to find the custom model. + /// + /// Category Id to set. + void SetCustomModelCategoryId(const SPXSTRING& categoryId) + { + SPX_THROW_ON_FAIL(speech_translation_config_set_custom_model_category_id(m_hconfig, Utils::ToUTF8(categoryId).c_str())); + } + + /// + /// Gets target languages for translation. + /// + /// Vector of translation target languages. + std::vector GetTargetLanguages() const + { + std::vector result; + auto targetLanguages = Utils::ToUTF8(GetProperty(PropertyId::SpeechServiceConnection_TranslationToLanguages)); + if (targetLanguages.empty()) + return result; + + // Getting languages one by one. + std::stringstream languageStream(targetLanguages); + std::string token; + while (std::getline(languageStream, token, CommaDelim)) + { + result.push_back(Utils::ToSPXString(token)); + } + return result; + } + + /// + /// Sets output voice name. + /// + /// Voice name to set. + void SetVoiceName(const SPXSTRING& voice) + { + property_bag_set_string(m_propertybag, static_cast(PropertyId::SpeechServiceConnection_TranslationVoice), nullptr, Utils::ToUTF8(voice).c_str()); + } + + /// + /// Gets output voice name. + /// + /// Output voice name. + SPXSTRING GetVoiceName() const + { + return GetProperty(PropertyId::SpeechServiceConnection_TranslationVoice); + } + +private: + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + explicit SpeechTranslationConfig(SPXSPEECHCONFIGHANDLE hconfig) : SpeechConfig(hconfig) { } + + DISABLE_COPY_AND_MOVE(SpeechTranslationConfig); + +}; + +}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_translation_model.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_translation_model.h new file mode 100644 index 0000000..b94513d --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_speech_translation_model.h @@ -0,0 +1,120 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_translation_model.h: Public API declarations for SpeechTranslationModel C++ class +// + +#pragma once +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Speech translation model information. +/// +class SpeechTranslationModel +{ +private: + + /// + /// Internal member variable that holds the model handle. + /// + SPXSPEECHRECOMODELHANDLE m_hmodel; + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Model handle. + explicit SpeechTranslationModel(SPXSPEECHRECOMODELHANDLE hmodel) : + m_hmodel(hmodel), + Name(m_name), + SourceLanguages(m_sourceLanguages), + TargetLanguages(m_targetLanguages), + Path(m_path), + Version(m_version) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + m_name = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_translation_model_get_name(m_hmodel))); + m_sourceLanguages = Utils::Split(Utils::CopyAndFreePropertyString(speech_translation_model_get_source_languages(m_hmodel)), '|'); + m_targetLanguages = Utils::Split(Utils::CopyAndFreePropertyString(speech_translation_model_get_target_languages(m_hmodel)), '|'); + m_path = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_translation_model_get_path(m_hmodel))); + m_version = Utils::ToSPXString(Utils::CopyAndFreePropertyString(speech_translation_model_get_version(m_hmodel))); + } + + /// + /// Explicit conversion operator. + /// + /// Model handle. + explicit operator SPXSPEECHRECOMODELHANDLE() { return m_hmodel; } + + /// + /// Destructor. + /// + ~SpeechTranslationModel() + { + speech_translation_model_handle_release(m_hmodel); + } + + /// + /// Model name. + /// + const SPXSTRING& Name; + + /// + /// Source languages that the model supports. + /// + const std::vector& SourceLanguages; + + /// + /// Target languages that the model supports. + /// + const std::vector& TargetLanguages; + + /// + /// Model path (only valid for offline models). + /// + const SPXSTRING& Path; + + /// + /// Model version. + /// + const SPXSTRING& Version; + +private: + + DISABLE_DEFAULT_CTORS(SpeechTranslationModel); + + /// + /// Internal member variable that holds the model name. + /// + SPXSTRING m_name; + + /// + /// Internal member variable that holds the model source languages. + /// + std::vector m_sourceLanguages; + + /// + /// Internal member variable that holds the model target languages. + /// + std::vector m_targetLanguages; + + /// + /// Internal member variable that holds the model path. + /// + SPXSTRING m_path; + + /// + /// Internal member variable that holds the model version. + /// + SPXSTRING m_version; +}; + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_string_helpers.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_string_helpers.h new file mode 100644 index 0000000..4dcbb0e --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_string_helpers.h @@ -0,0 +1,137 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define SPXSTRING std::string +#define SPXSTRING_EMPTY std::string() + +namespace Microsoft{ +namespace CognitiveServices { +namespace Speech { +namespace Utils { + +namespace Details { + + inline std::string to_string(const std::wstring& value) + { + const auto size = pal_wstring_to_string(nullptr, value.c_str(), 0); + auto buffer = std::make_unique(size); + pal_wstring_to_string(buffer.get(), value.c_str(), size); + return std::string{ buffer.get() }; + } + + inline std::wstring to_string(const std::string& value) + { + const auto size = pal_string_to_wstring(nullptr, value.c_str(), 0); + auto buffer = std::make_unique(size); + pal_string_to_wstring(buffer.get(), value.c_str(), size); + return std::wstring{ buffer.get() }; + } +} + +inline std::string ToSPXString(const char* value) +{ + return value == nullptr ? "" : value; +} + +inline std::string ToSPXString(const std::string& value) +{ + return value; +} + +inline std::string ToUTF8(const std::wstring& value) +{ + return Details::to_string(value); +} + +inline std::string ToUTF8(const wchar_t* value) +{ + if (!value) + return ""; + return ToUTF8(std::wstring(value)); +} + +inline std::string ToUTF8(const std::string& value) +{ + return value; +} + +inline const char* ToUTF8(const char* value) +{ + return value; +} + +inline static std::string CopyAndFreePropertyString(const char* value) +{ + std::string copy = (value == nullptr) ? "" : value; + property_bag_free_string(value); + return copy; +} + +template +inline static size_t Find(const TCHAR* pStr, const size_t numChars, const TCHAR find, size_t startAt = 0) +{ + for (size_t i = startAt; i < numChars; i++) + { + TCHAR c = pStr[i]; + if (c == '\0') + { + break; + } + else if (c == find) + { + return i; + } + } + + return (std::numeric_limits::max)(); // weird syntax to avoid Windows min/max macros +} + +template +static std::vector> Split(const TCHAR* pStr, const size_t numChars, const TCHAR delim) +{ + std::vector> result; + if (pStr == nullptr) + { + return result; + } + + size_t start = 0; + size_t end = Find(pStr, numChars, delim, 0); + while (end != (std::numeric_limits::max)()) + { + result.push_back(std::basic_string(pStr + start, end - start)); + start = end + 1; + end = Find(pStr, numChars, delim, start); + } + + if (start < numChars) + { + result.push_back(std::basic_string(pStr + start, numChars - start)); + } + + return result; +} + +template +inline static std::vector> Split(const std::basic_string& str, const TCHAR delim) +{ + return Split(str.c_str(), str.size(), delim); +} + +}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_synthesis_voices_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_synthesis_voices_result.h new file mode 100644 index 0000000..cc287c3 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_synthesis_voices_result.h @@ -0,0 +1,165 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_synthesis_voices_result.h: Public API declarations for SynthesisVoicesResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Contains information about result from voices list of speech synthesizers. +/// Added in version 1.16.0 +/// +class SynthesisVoicesResult +{ +private: + + /// + /// Internal member variable that holds the voices list result handle. + /// + SPXRESULTHANDLE m_hresult; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + synthesis_voices_result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties associating to the voices list result. + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Result handle. + explicit SynthesisVoicesResult(SPXRESULTHANDLE hresult) : + m_hresult(hresult), + m_properties(hresult), + Voices(m_voices), + ErrorDetails(m_errorDetails), + ResultId(m_resultId), + Reason(m_reason), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + uint32_t voiceNum; + SPX_THROW_ON_FAIL(::synthesis_voices_result_get_voice_num(hresult, &voiceNum)); + m_voices = std::vector>(voiceNum); + + for (uint32_t i = 0; i < voiceNum; ++i) + { + SPXRESULTHANDLE hVoice = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(::synthesis_voices_result_get_voice_info(m_hresult, i, &hVoice)); + m_voices[i] = std::make_shared(hVoice); + } + + const size_t maxCharCount = 1024; + char sz[maxCharCount + 1]; + SPX_THROW_ON_FAIL(synthesis_voices_result_get_result_id(hresult, sz, maxCharCount)); + m_resultId = Utils::ToSPXString(sz); + + Result_Reason resultReason = ResultReason_NoMatch; + SPX_THROW_ON_FAIL(synthesis_voices_result_get_reason(hresult, &resultReason)); + m_reason = static_cast(resultReason); + + m_errorDetails = m_properties.GetProperty(PropertyId::CancellationDetails_ReasonDetailedText); + } + + /// + /// Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + /// + /// Destructor. + /// + ~SynthesisVoicesResult() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + synthesizer_result_handle_release(m_hresult); + } + + /// + /// Retrieved voices. + /// + const std::vector>& Voices; + + /// + /// Error details. + /// + const SPXSTRING& ErrorDetails; + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Reason of the voices list result. + /// + const ResultReason& Reason; + + /// + /// Collection of additional SynthesisVoicesResult properties. + /// + const PropertyCollection& Properties; + +private: + + DISABLE_DEFAULT_CTORS(SynthesisVoicesResult); + + /// + /// Internal member variable that holds the result ID. + /// + SPXSTRING m_resultId; + + /// + /// Internal member variable that holds the result reason. + /// + ResultReason m_reason; + + /// + /// Internal member variable that holds the voices list. + /// + std::vector> m_voices; + + /// + /// Internal member variable that holds the error details. + /// + SPXSTRING m_errorDetails; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_translation_eventargs.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_translation_eventargs.h new file mode 100644 index 0000000..42f54ef --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_translation_eventargs.h @@ -0,0 +1,235 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// + +#pragma once +#include +#include +#include +#include +#include + + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Translation { + + +/// +/// Defines payload that is sent with the event or . +/// +class TranslationRecognitionEventArgs : public RecognitionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + + /// + /// It is intended for internal use only. It creates an instance of . + /// + /// The handle returned by recognizer in C-API. + explicit TranslationRecognitionEventArgs(SPXEVENTHANDLE hevent) : + RecognitionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(ResultHandleFromEventHandle(hevent))), + Result(m_result) + { + UNUSED(m_hevent); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + /// Destructs the instance. + /// + virtual ~TranslationRecognitionEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + recognizer_event_handle_release(m_hevent); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Contains the translation recognition result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +protected: +#endif + + /*! \cond PROTECTED */ + + /// + /// Contains the translation text result. + /// + std::shared_ptr GetResult() const { return m_result; } + + /*! \endcond */ + +private: + DISABLE_DEFAULT_CTORS(TranslationRecognitionEventArgs); + + SPXRESULTHANDLE ResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + + +/// +/// Class for translation recognition canceled event arguments. +/// +class TranslationRecognitionCanceledEventArgs final : public TranslationRecognitionEventArgs +{ +private: + + std::shared_ptr m_cancellation; + CancellationReason m_cancellationReason; + CancellationErrorCode m_errorCode; + +public: + + /// + /// Constructor. + /// + /// Event handle + explicit TranslationRecognitionCanceledEventArgs(SPXEVENTHANDLE hevent) : + TranslationRecognitionEventArgs(hevent), + m_cancellation(CancellationDetails::FromResult(GetResult())), + m_cancellationReason(m_cancellation->Reason), + m_errorCode(m_cancellation->ErrorCode), + Reason(m_cancellationReason), + ErrorCode(m_errorCode), + ErrorDetails(m_cancellation->ErrorDetails) + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + virtual ~TranslationRecognitionCanceledEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + /// + /// The reason the result was canceled. + /// + const CancellationReason& Reason; + + /// + /// The error code in case of an unsuccessful recognition ( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// Added in version 1.1.0. + /// + const CancellationErrorCode& ErrorCode; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + /// + /// The error message in case of an unsuccessful recognition ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// CancellationDetails. + /// + std::shared_ptr GetCancellationDetails() const { return m_cancellation; } + +private: + + DISABLE_DEFAULT_CTORS(TranslationRecognitionCanceledEventArgs); +}; + + + +/// +/// Defines payload that is sent with the event . +/// +class TranslationSynthesisEventArgs final : public SessionEventArgs +{ +private: + + SPXEVENTHANDLE m_hevent; + std::shared_ptr m_result; + +public: + /// + /// It is intended for internal use only. It creates an instance of . + /// + /// The handle returned by recognizer in C-API. + explicit TranslationSynthesisEventArgs(SPXEVENTHANDLE hevent) : + SessionEventArgs(hevent), + m_hevent(hevent), + m_result(std::make_shared(SynthesisResultHandleFromEventHandle(hevent))), + Result(m_result) + { + UNUSED(m_hevent); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + }; + + /// + /// Destructs the instance. + /// + virtual ~TranslationSynthesisEventArgs() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)m_hevent); + recognizer_event_handle_release(m_hevent); + }; + +#if defined(BINDING_OBJECTIVE_C) +private: +#endif + /// + /// Contains the translation synthesis result. + /// + std::shared_ptr Result; + +#if defined(BINDING_OBJECTIVE_C) +public: +#else +private: +#endif + /// + /// Contains the translation synthesis result. + /// + std::shared_ptr GetResult() const { return m_result; } + +private: + + DISABLE_DEFAULT_CTORS(TranslationSynthesisEventArgs); + + SPXRESULTHANDLE SynthesisResultHandleFromEventHandle(SPXEVENTHANDLE hevent) + { + SPXRESULTHANDLE hresult = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(recognizer_recognition_event_get_result(hevent, &hresult)); + return hresult; + } +}; + +} } } } // Microsoft::CognitiveServices::Speech::Translation diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_translation_recognizer.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_translation_recognizer.h new file mode 100644 index 0000000..7627e23 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_translation_recognizer.h @@ -0,0 +1,352 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_translation_recognizer.h: Public API declarations for translation recognizer in C++. +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Translation { + +/// +/// Performs translation on the speech input. +/// +class TranslationRecognizer final : public AsyncRecognizer +{ +public: + /// + /// Create a translation recognizer from a speech config + /// + /// Speech configuration. + /// A smart pointer wrapped speech recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from an embedded speech config + /// + /// Embedded speech configuration. + /// A smart pointer wrapped translation recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from a hybrid speech config + /// + /// Hybrid speech configuration. + /// A smart pointer wrapped translation recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::nullptr_t) + { + SPXRECOHANDLE hreco; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(nullptr))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from a translation config and an audio config. + /// Users should use this function to create a translation recognizer. + /// + /// Speech translation config. + /// Audio config. + /// The shared smart pointer of the created translation recognizer. + static std::shared_ptr FromConfig(std::shared_ptr speechconfig, std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco { SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from an embedded speech config and audio config. + /// + /// Embedded speech config. + /// Audio config. + /// A smart pointer wrapped translation recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioConfig = nullptr) + { + SPXRECOHANDLE hreco{ SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioConfig))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from a hybrid speech config and audio config. + /// + /// Hybrid speech config. + /// Audio config. + /// A smart pointer wrapped translation recognizer pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig, std::shared_ptr audioConfig = nullptr) + { + SPXRECOHANDLE hreco{ SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(audioConfig))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from a translation config, auto detection source language config and an audio config. + /// Users should use this function to create a translation recognizer. + /// + /// Speech translation config. + /// Auto detection source language config. + /// Audio config. + /// The shared smart pointer of the created translation recognizer. + static std::shared_ptr FromConfig( + std::shared_ptr speechconfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco { SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechconfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + /// + /// Create a translation recognizer from an embedded speech config, auto detection source language config and audio config. + /// + /// Embedded speech config. + /// Auto detection source language config. + /// Audio config. + /// The shared smart pointer of the created translation recognizer. + static std::shared_ptr FromConfig( + std::shared_ptr speechConfig, + std::shared_ptr autoDetectSourceLangConfig, + std::shared_ptr audioInput = nullptr) + { + SPXRECOHANDLE hreco{ SPXHANDLE_INVALID }; + SPX_THROW_ON_FAIL(::recognizer_create_translation_recognizer_from_auto_detect_source_lang_config( + &hreco, + HandleOrInvalid(speechConfig), + HandleOrInvalid(autoDetectSourceLangConfig), + HandleOrInvalid(audioInput))); + return std::make_shared(hreco); + } + + // The AsyncRecognizer only deals with events for translation text result. The audio output event + // is managed by OnTranslationSynthesisResult. + using BaseType = AsyncRecognizer; + + /// + /// It is intended for internal use only. It creates an instance of . + /// + /// + /// It is recommended to use SpeechTranslationConfig to create an instance of . This method is mainly + /// used in case where a recognizer handle has been created by methods via C-API. + /// + /// The handle of the recognizer that is returned by C-API. + explicit TranslationRecognizer(SPXRECOHANDLE hreco) : + BaseType(hreco), + Properties(m_properties), + Synthesizing(GetTranslationAudioEventConnectionsChangedCallback()) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Deconstruct the instance. + /// + ~TranslationRecognizer() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + Synthesizing.DisconnectAll(); + TermRecognizer(); + } + + /// + /// Starts translation recognition, and returns after a single utterance is recognized. The end of a + /// single utterance is determined by listening for silence at the end or until a maximum of about 30 + /// seconds of audio is processed. The task returns the recognized text as well as the translation. + /// Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single + /// shot recognition like command or query. + /// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead. + /// + /// An asynchronous operation representing the recognition. It returns a value of as result. + std::future> RecognizeOnceAsync() override + { + return BaseType::RecognizeOnceAsyncInternal(); + } + + /// + /// Starts translation on a continous audio stream, until StopContinuousRecognitionAsync() is called. + /// User must subscribe to events to receive recognition results. + /// + /// An asynchronous operation that starts the translation. + std::future StartContinuousRecognitionAsync() override + { + return BaseType::StartContinuousRecognitionAsyncInternal(); + } + + /// + /// Stops continuous translation. + /// + /// A task representing the asynchronous operation that stops the translation. + std::future StopContinuousRecognitionAsync() override { return BaseType::StopContinuousRecognitionAsyncInternal(); } + + /// + /// Starts keyword recognition on a continuous audio stream, until StopKeywordRecognitionAsync() is called. + /// + /// Specifies the keyword model to be used. + /// An asynchronous operation that starts the keyword recognition. + std::future StartKeywordRecognitionAsync(std::shared_ptr model) override + { + return BaseType::StartKeywordRecognitionAsyncInternal(model); + }; + + /// + /// Stops continuous keyword recognition. + /// + /// A task representing the asynchronous operation that stops the keyword recognition. + std::future StopKeywordRecognitionAsync() override + { + return BaseType::StopKeywordRecognitionAsyncInternal(); + }; + + /// + /// Sets the authorization token that will be used for connecting to the service. + /// Note: The caller needs to ensure that the authorization token is valid. Before the authorization token + /// expires, the caller needs to refresh it by calling this setter with a new valid token. + /// Otherwise, the recognizer will encounter errors during recognition. + /// + /// A string that represents the endpoint id. + void SetAuthorizationToken(const SPXSTRING& token) + { + Properties.SetProperty(PropertyId::SpeechServiceAuthorization_Token, token); + } + + /// + /// Gets the authorization token. + /// + /// Authorization token + SPXSTRING GetAuthorizationToken() + { + return Properties.GetProperty(PropertyId::SpeechServiceAuthorization_Token, SPXSTRING()); + } + + /// + /// Adds a target language for translation. + /// Added in version 1.7.0. + /// + /// Translation target language to add. + void AddTargetLanguage(const SPXSTRING& language) + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hreco == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::translator_add_target_language(m_hreco, Utils::ToUTF8(language).c_str())); + } + + /// + /// Removes a target language for translation. + /// Added in version 1.7.0. + /// + /// Translation target language to remove. + void RemoveTargetLanguage(const SPXSTRING& language) + { + SPX_THROW_HR_IF(SPXERR_INVALID_HANDLE, m_hreco == SPXHANDLE_INVALID); + SPX_THROW_ON_FAIL(::translator_remove_target_language(m_hreco, Utils::ToUTF8(language).c_str())); + } + + /// + /// Gets target languages for translation. + /// Added in version 1.7.0. + /// + /// Vector of translation target languages. + std::vector GetTargetLanguages() const + { + std::vector result; + auto targetLanguages = Utils::ToUTF8(Properties.GetProperty(PropertyId::SpeechServiceConnection_TranslationToLanguages)); + if (targetLanguages.empty()) + return result; + + // Getting languages one by one. + std::stringstream languageStream(targetLanguages); + std::string token; + while (std::getline(languageStream, token, CommaDelim)) + { + result.push_back(Utils::ToSPXString(token)); + } + return result; + } + + /// + /// The collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// The event signals that a translation synthesis result is received. + /// + EventSignal Synthesizing; + +private: + + DISABLE_DEFAULT_CTORS(TranslationRecognizer); + + friend class Microsoft::CognitiveServices::Speech::Session; + + std::function&)> GetTranslationAudioEventConnectionsChangedCallback() + { + return [=](const EventSignal& audioEvent) { + if (&audioEvent == &Synthesizing) + { + translator_synthesizing_audio_set_callback(m_hreco, Synthesizing.IsConnected() ? FireEvent_TranslationSynthesisResult : nullptr, this); + } + }; + } + + static void FireEvent_TranslationSynthesisResult(SPXRECOHANDLE hreco, SPXEVENTHANDLE hevent, void* pvContext) + { + UNUSED(hreco); + std::unique_ptr recoEvent{ new TranslationSynthesisEventArgs(hevent) }; + + auto pThis = static_cast(pvContext); + auto keepAlive = pThis->shared_from_this(); + pThis->Synthesizing.Signal(*recoEvent.get()); + } +}; +} } } } // Microsoft::CognitiveServices::Speech::Translation diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_translation_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_translation_result.h new file mode 100644 index 0000000..bbabcd6 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_translation_result.h @@ -0,0 +1,175 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_translation_result.h: Public API declarations for TranslationResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Translation { + +/// +/// Defines the translation text result. +/// +class TranslationRecognitionResult : public RecognitionResult +{ +private: + + std::map m_translations; + +public: + /// + /// It is intended for internal use only. It creates an instance of . + /// + /// The handle of the result returned by recognizer in C-API. + explicit TranslationRecognitionResult(SPXRESULTHANDLE resultHandle) : + RecognitionResult(resultHandle), + Translations(m_translations) + { + PopulateResultFields(resultHandle); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) -- resultid=%s.", __FUNCTION__, (void*)this, (void*)Handle, ResultId.c_str()); + }; + + /// + /// Destructs the instance. + /// + virtual ~TranslationRecognitionResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p)", __FUNCTION__, (void*)this, (void*)Handle); + } + + /// + /// Presents the translation results. Each item in the map is a key value pair, where key is the language tag of the translated text, + /// and value is the translation text in that language. + /// + const std::map& Translations; + +private: + void PopulateResultFields(SPXRESULTHANDLE resultHandle) + { + SPX_INIT_HR(hr); + + size_t count = 0; + hr = translation_text_result_get_translation_count(resultHandle, &count); + SPX_THROW_ON_FAIL(hr); + + size_t maxLanguageSize = 0; + size_t maxTextSize = 0; + + for (size_t i = 0; i < count; i++) + { + size_t languageSize = 0; + size_t textSize = 0; + + hr = translation_text_result_get_translation(resultHandle, i, nullptr, nullptr, &languageSize, &textSize); + SPX_THROW_ON_FAIL(hr); + + maxLanguageSize = (std::max)(maxLanguageSize, languageSize); + maxTextSize = (std::max)(maxTextSize, textSize); + } + + auto targetLanguage = std::make_unique(maxLanguageSize); + auto translationText = std::make_unique(maxTextSize); + for (size_t i = 0; i < count; i++) + { + hr = translation_text_result_get_translation(resultHandle, i, targetLanguage.get(), translationText.get(), &maxLanguageSize, &maxTextSize); + SPX_THROW_ON_FAIL(hr); + m_translations[Utils::ToSPXString(targetLanguage.get())] = Utils::ToSPXString(translationText.get()); + } + + SPX_DBG_TRACE_VERBOSE("Translation phrases: numberentries: %d", (int)m_translations.size()); +#ifdef _DEBUG + for (const auto& cf : m_translations) + { + (void)(cf); // prevent warning for cf when compiling release builds + SPX_DBG_TRACE_VERBOSE(" phrase for %s: %s", cf.first.c_str(), cf.second.c_str()); + } +#endif + }; + + DISABLE_DEFAULT_CTORS(TranslationRecognitionResult); +}; + + +/// +/// Defines the translation synthesis result, i.e. the voice output of the translated text in the target language. +/// +class TranslationSynthesisResult +{ +private: + + ResultReason m_reason; + std::vector m_audioData; + +public: + /// + /// It is intended for internal use only. It creates an instance of + /// + /// The handle of the result returned by recognizer in C-API. + explicit TranslationSynthesisResult(SPXRESULTHANDLE resultHandle) : + Reason(m_reason), + Audio(m_audioData) + { + PopulateResultFields(resultHandle); + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p, handle=0x%p) reason=0x%x", __FUNCTION__, (void*)this, (void*)resultHandle, Reason); + }; + + /// + /// Destructs the instance. + /// + virtual ~TranslationSynthesisResult() + { + SPX_DBG_TRACE_VERBOSE("%s (this=0x%p)", __FUNCTION__, (void*)this); + }; + + /// + /// Recognition reason. + /// + const ResultReason& Reason; + + /// + /// The voice output of the translated text in the target language. + /// + const std::vector& Audio; + + +private: + + DISABLE_DEFAULT_CTORS(TranslationSynthesisResult); + + void PopulateResultFields(SPXRESULTHANDLE resultHandle) + { + SPX_INIT_HR(hr); + + Result_Reason resultReason = ResultReason_NoMatch; + SPX_THROW_ON_FAIL(hr = result_get_reason(resultHandle, &resultReason)); + m_reason = (ResultReason)resultReason; + + size_t bufLen = 0; + hr = translation_synthesis_result_get_audio_data(resultHandle, nullptr, &bufLen); + if (hr == SPXERR_BUFFER_TOO_SMALL) + { + m_audioData.resize(bufLen); + hr = translation_synthesis_result_get_audio_data(resultHandle, m_audioData.data(), &bufLen); + } + SPX_THROW_ON_FAIL(hr); + + SPX_DBG_TRACE_VERBOSE("Translation synthesis: audio length: %zu, vector size: %zu", bufLen, m_audioData.size()); + }; +}; + + +} } } } // Microsoft::CognitiveServices::Speech::Translation diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_user.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_user.h new file mode 100644 index 0000000..0f0a597 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_user.h @@ -0,0 +1,77 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_user.h: Public API declarations for User C++ class +// + +#pragma once + +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Transcription { + +constexpr size_t MAX_USER_ID_LEN = 1024; + +/// +/// Represents a user in a conversation. +/// Added in version 1.5.0. +/// +class User +{ +public: + + /// + /// Create a user with identification string. + /// + /// A user id. + /// A user object + static std::shared_ptr FromUserId(const SPXSTRING& userId) + { + SPXUSERHANDLE m_huser = SPXHANDLE_INVALID; + SPX_THROW_ON_FAIL(user_create_from_id(Utils::ToUTF8(userId).c_str(), &m_huser)); + return std::make_shared(m_huser); + } + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// A user handle. + explicit User(SPXUSERHANDLE huser = SPXHANDLE_INVALID) : m_huser(huser) { } + + /// + /// Virtual destructor. + /// + virtual ~User() { user_release_handle(m_huser); } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXUSERHANDLE() const { return m_huser; } + + /// + /// Get user's id. + /// + /// user's id. + SPXSTRING GetId() const + { + char user_id[MAX_USER_ID_LEN+1]; + std::memset(user_id, 0, MAX_USER_ID_LEN+1); + SPX_THROW_ON_FAIL(user_get_id(m_huser, user_id, MAX_USER_ID_LEN)); + + return user_id; + } + +private: + + DISABLE_COPY_AND_MOVE(User); + + SPXUSERHANDLE m_huser; + +}; + +}}}} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_utils.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_utils.h new file mode 100644 index 0000000..21b4a98 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_utils.h @@ -0,0 +1,312 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_utils.h: General utility classes and functions. +// + +#pragma once + +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Utils { + +/// +/// Base class that disables the copy constructor +/// +struct NonCopyable +{ + /// + /// Default destructor. + /// + NonCopyable() = default; + + /// + /// Virtual destructor. + /// + virtual ~NonCopyable() = default; + + /// + /// Disable copy constructor. + /// + NonCopyable(const NonCopyable&) = delete; + + /// + /// Disable copy assignment operator. + /// + /// Reference to the object. + NonCopyable& operator=(const NonCopyable &) = delete; +}; + +/// +/// Base class that disables the move constructor +/// +struct NonMovable +{ + /// + /// Default destructor. + /// + NonMovable() = default; + + /// + /// Virtual destructor. + /// + virtual ~NonMovable() = default; + + /// + /// Disable move constructor. + /// + NonMovable(NonMovable &&) = delete; + + /// + /// Disable move assignment operator. + /// + /// Reference to the object. + NonMovable& operator=(NonMovable &&) = delete; +}; + +template +SPXHANDLE CallFactoryMethodRight(F method, Args&&... args) +{ + SPXHANDLE handle; + auto hr = method(std::forward(args)..., &handle); + SPX_THROW_ON_FAIL(hr); + return handle; +} + +template +SPXHANDLE CallFactoryMethodLeft(F method, Args&&... args) +{ + SPXHANDLE handle; + auto hr = method(&handle, std::forward(args)...); + SPX_THROW_ON_FAIL(hr); + return handle; +} + +/// +/// Helper class implementing the scope guard idiom. +/// (The given function will be executed on destruction) +/// +template +class ScopeGuard +{ +public: + ScopeGuard(ScopeGuard&&) = default; + ScopeGuard(const ScopeGuard&) = delete; + + explicit ScopeGuard(F f): m_fn{ f } + {} + + ~ScopeGuard() + { + m_fn(); + } + +private: + F m_fn; +}; + +/// +/// Creates a scope guard with the given function. +/// +template +ScopeGuard MakeScopeGuard(F fn) +{ + return ScopeGuard{ fn }; +} + +/// +/// A wrapper around ABI handles that simplifies resource cleanup on exit +/// +/// The type of the ABI handle +/// The default value to set the handle to when initialising or after destroying +/// The return type of the free function +/// The signature of the free function called to release the ABI handle +template< + typename THandle, + typename TRet = AZACHR, + typename TFreeFunc = TRet(AZAC_API_CALLTYPE*)(THandle)> +class AbiHandleWrapper : public NonCopyable +{ +private: + THandle m_handle; + TFreeFunc m_free; + bool m_isValid; + +public: + /// + /// The signature of the free function + /// + using FreeFunc = TFreeFunc; + + /// + /// Creates and ABI handle wrapper for SPXHANDLE types initializing the handle + /// to be SPXHANDLE_INVALID + /// + /// The function used to release the ABI handle + template< + typename IsHandle = THandle, + std::enable_if_t::value, bool> = true + > + AbiHandleWrapper(TFreeFunc freeFunc) : + m_handle{ SPXHANDLE_INVALID }, + m_free{ freeFunc }, + m_isValid{ false } + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, freeFunc == nullptr); + } + + /// + /// Creates an ABI handle wrapper + /// + /// The function used to release the ABI handle + template< + typename IsHandle = THandle, + std::enable_if_t::value, bool> = true + > + AbiHandleWrapper(TFreeFunc freeFunc) : + m_handle{ nullptr }, + m_free{ freeFunc }, + m_isValid{ false } + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, freeFunc == nullptr); + } + + /// + /// Creates an ABI handle wrapper + /// + /// The function used to release the ABI handle + /// The initial ABI handle value + AbiHandleWrapper(TFreeFunc freeFunc, THandle&& handle) : + m_handle{ std::move(handle) }, + m_free{ freeFunc }, + m_isValid{ true } + { + SPX_THROW_HR_IF(SPXERR_INVALID_ARG, freeFunc == nullptr); + } + + /// + /// Destructor + /// + ~AbiHandleWrapper() { Destroy(); } + + /// + /// Move constructor + /// + /// The other item being moved + AbiHandleWrapper(AbiHandleWrapper&& other) : + m_handle{ other.m_handle }, + m_free{ other.m_free }, + m_isValid{ other.m_isValid } + { + other.m_handle = THandle{}; + other.m_free = nullptr; + other.m_isValid = false; + } + + /// + /// Move assignment operator + /// + /// The item being moved + /// Reference to ABI handle + AbiHandleWrapper& operator=(AbiHandleWrapper&& other) + { + if (this != &other) + { + Destroy(); + + m_handle = std::move(other.m_handle); + m_free = other.m_free; + m_isValid = other.m_isValid; + + other.m_free = nullptr; + other.m_isValid = false; + } + + return *this; + } + + /// + /// Helper to simplify assigning a new ABI handle value to this wrapper + /// + /// The handle to assign + /// Reference to assigned handle + THandle& operator=(const THandle& other) + { + Destroy(); + + m_handle = other; + return m_handle; + } + + /// + /// Gets the address of the ABI handle. This is useful when calling ABI functions that set the value + /// + THandle* operator&() { return &m_handle; } + + /// + /// Gets the ABI handle value + /// + operator THandle() const { return m_handle; } + +private: + void Destroy() + { + if (m_isValid) + { + m_isValid = false; + if (m_free != nullptr) + { + m_free(m_handle); + } + } + } +}; + +/// +/// A wrapper around ABI handles +/// +using AbiHandle = AbiHandleWrapper; + +/// +/// A wrapper around strings allocated in the ABI layer +/// +using AbiStringHandle = AbiHandleWrapper; + +/// +/// Function that converts a handle to its underlying type. +/// +/// Handle type. +/// Object type. +/// Object from which to get the handle. +template +inline Handle HandleOrInvalid(std::shared_ptr obj) +{ + return obj == nullptr + ? static_cast(SPXHANDLE_INVALID) + : static_cast(*obj.get()); +} + + +template +struct TypeList {}; + +template class F, typename L> +struct TypeListIfAny; + +template class F> +struct TypeListIfAny> +{ + static constexpr bool value{ false }; +}; + +template class F, typename U, typename... Us> +struct TypeListIfAny> +{ + static constexpr bool value = F::value || Microsoft::CognitiveServices::Speech::Utils::TypeListIfAny>::value; +}; + +} } } } diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_info.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_info.h new file mode 100644 index 0000000..bcbe99f --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_info.h @@ -0,0 +1,214 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_voice_info.h: Public API declarations for VoiceInfo C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { + +/// +/// Contains information about synthesis voice info +/// Updated in version 1.17.0 +/// +class VoiceInfo +{ +private: + + /// + /// Internal member variable that holds the voice info handle. + /// + SPXRESULTHANDLE m_hresult; + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + voice_info_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + /// + /// Internal member variable that holds the properties associating to the voice info. + /// + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Result handle. + explicit VoiceInfo(SPXRESULTHANDLE hresult) : + m_hresult(hresult), + m_properties(hresult), + Name(m_name), + Locale(m_locale), + ShortName(m_shortName), + LocalName(m_localName), + Gender(m_gender), + VoiceType(m_voiceType), + StyleList(m_styleList), + VoicePath(m_voicePath), + Status(m_status), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + m_name = Utils::ToSPXString(Utils::CopyAndFreePropertyString(voice_info_get_name(m_hresult))); + m_locale = Utils::ToSPXString(Utils::CopyAndFreePropertyString(voice_info_get_locale(m_hresult))); + m_shortName = Utils::ToSPXString(Utils::CopyAndFreePropertyString(voice_info_get_short_name(m_hresult))); + m_localName = Utils::ToSPXString(Utils::CopyAndFreePropertyString(voice_info_get_local_name(m_hresult))); + m_styleList = Utils::Split(Utils::CopyAndFreePropertyString(voice_info_get_style_list(m_hresult)), '|'); + Synthesis_VoiceType voiceType; + SPX_THROW_ON_FAIL(voice_info_get_voice_type(hresult, &voiceType)); + m_voiceType = static_cast(voiceType); + m_voicePath = Utils::ToSPXString(Utils::CopyAndFreePropertyString(voice_info_get_voice_path(m_hresult))); + auto gender = Properties.GetProperty("Gender"); + m_gender = gender == "Female" ? SynthesisVoiceGender::Female : gender == "Male" ? SynthesisVoiceGender::Male + : gender == "Neutral" ? SynthesisVoiceGender::Neutral + : SynthesisVoiceGender::Unknown; + auto status = Properties.GetProperty("Status"); + m_status = status == "GA" ? SynthesisVoiceStatus::GeneralAvailability + : status == "Preview" ? SynthesisVoiceStatus::Preview + : status == "Deprecated" ? SynthesisVoiceStatus::Deprecated + : SynthesisVoiceStatus::Unknown; + } + + /// + /// Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + /// + /// Destructor. + /// + ~VoiceInfo() + { + voice_info_handle_release(m_hresult); + } + + /// + /// Voice name. + /// + const SPXSTRING& Name; + + /// + /// Locale of the voice. + /// + const SPXSTRING& Locale; + + /// + /// Short name. + /// + const SPXSTRING& ShortName; + + /// + /// Local name. + /// + const SPXSTRING& LocalName; + + /// + /// Gender. + /// Added in version 1.17.0 + /// + const SynthesisVoiceGender& Gender; + + /// + /// Local name. + /// + const SynthesisVoiceType& VoiceType; + + /// + /// Style list + /// + const std::vector& StyleList; + + /// + /// Voice path, only valid for offline voices. + /// + const SPXSTRING& VoicePath; + + /// + /// Status of the voice. + /// + const SynthesisVoiceStatus& Status; + + /// + /// Collection of additional VoiceInfo properties. + /// + const PropertyCollection& Properties; + +private: + + DISABLE_DEFAULT_CTORS(VoiceInfo); + + /// + /// Internal member variable that holds the name. + /// + SPXSTRING m_name; + + /// + /// Internal member variable that holds the locale. + /// + SPXSTRING m_locale; + + /// + /// Internal member variable that holds the short name. + /// + SPXSTRING m_shortName; + + /// + /// Internal member variable that holds the local name. + /// + SPXSTRING m_localName; + + /// + /// Internal member variable that holds the gender. + /// + SynthesisVoiceGender m_gender; + + /// + /// Internal member variable that holds the voice type. + /// + SynthesisVoiceType m_voiceType; + + /// + /// Internal member variable that holds the style list. + /// + std::vector m_styleList; + + /// + /// Internal member variable that holds the voice path. + /// + SPXSTRING m_voicePath; + + /// + /// Internal member variable that holds the status. + /// + SynthesisVoiceStatus m_status; +}; + + +} } } // Microsoft::CognitiveServices::Speech diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile.h new file mode 100644 index 0000000..78c8c38 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile.h @@ -0,0 +1,109 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_voice_profile.h: Public API declarations for VoiceProfile C++ class +// + +#pragma once +#include + +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +// Forward declaration for friends. +class VoiceProfileClient; + +/// +/// Class for VoiceProfile. +/// Added in version 1.12.0 +/// +class VoiceProfile : public std::enable_shared_from_this +{ +public: + + static std::shared_ptr FromId(const SPXSTRING& Id, VoiceProfileType voiceProfileType = VoiceProfileType::TextIndependentIdentification) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + SPXVOICEPROFILEHANDLE hVoiceProfile; + SPX_THROW_ON_FAIL(::create_voice_profile_from_id_and_type(&hVoiceProfile,Utils::ToUTF8(Id).c_str(), static_cast(voiceProfileType))); + return std::shared_ptr { new VoiceProfile(hVoiceProfile) }; + } + + /// + /// Destructor. + /// + virtual ~VoiceProfile() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + ::voice_profile_release_handle(m_hVoiceProfile); + m_hVoiceProfile = SPXHANDLE_INVALID; + } + + /// + /// Get a voice profile id. + /// + /// the voice profile id. + const SPXSTRING GetId() const + { + // query the string length + uint32_t length = 0; + SPX_THROW_ON_FAIL(voice_profile_get_id(m_hVoiceProfile, nullptr, &length)); + + // retrieve the string + std::unique_ptr buffer(new char[length]); + SPX_THROW_ON_FAIL(voice_profile_get_id(m_hVoiceProfile, buffer.get(), &length)); + return Utils::ToSPXString(buffer.get()); + } + + /// + /// Get the VoiceProfileType from the VoiceProfile. + /// + /// + VoiceProfileType GetType() const + { + int type = -1; + SPX_THROW_ON_FAIL(voice_profile_get_type(m_hVoiceProfile, &type)); + return static_cast(type); + } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXVOICEPROFILEHANDLE() { return m_hVoiceProfile; } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Voice Profile handle. + explicit VoiceProfile(SPXVOICEPROFILEHANDLE hVoiceProfile) : + m_hVoiceProfile(hVoiceProfile) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /*! \endcond */ + +private: + + /*! \cond PRIVATE */ + friend Microsoft::CognitiveServices::Speech::Speaker::VoiceProfileClient; + DISABLE_DEFAULT_CTORS(VoiceProfile); + + SPXVOICEPROFILEHANDLE m_hVoiceProfile; + + /*! \endcond */ +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_client.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_client.h new file mode 100644 index 0000000..17b5dcd --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_client.h @@ -0,0 +1,262 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_voice_profile_client.h: Public API declarations for VoiceProfileClient C++ class +// + +#pragma once +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Class for VoiceProfileClient. +/// This class creates voice profile client for creating, doing enrollment, deleting and reseting a voice profile. +/// Added in version 1.12.0 +/// +class VoiceProfileClient : public std::enable_shared_from_this +{ +private: + + /*! \cond PRIVATE */ + + SPXVOICEPROFILECLIENTHANDLE m_hVoiceProfileClient; + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXVOICEPROFILECLIENTHANDLE hclient) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + voice_profile_client_get_property_bag(hclient, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + + /// + /// Create a Voice Profile Client from a speech config + /// + /// Speech configuration. + /// A smart pointer wrapped voice profile client pointer. + static std::shared_ptr FromConfig(std::shared_ptr speechConfig) + { + SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient; + SPX_THROW_ON_FAIL(::create_voice_profile_client_from_config(&hVoiceProfileClient, Utils::HandleOrInvalid(speechConfig))); + return std::shared_ptr{ new VoiceProfileClient(hVoiceProfileClient)}; + } + + /// + /// Destructor. + /// + virtual ~VoiceProfileClient() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + ::voice_profile_client_release_handle(m_hVoiceProfileClient); + m_hVoiceProfileClient = SPXHANDLE_INVALID; + } + + /// + /// Create a Voice Profile. + /// + /// a VoiceProfile type. + /// a locale, e.g "en-us" + /// A smart pointer wrapped voice profile client object. + std::future> CreateProfileAsync(VoiceProfileType profileType, const SPXSTRING& locale) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [profileType, locale, this, keepAlive]() -> std::shared_ptr { + SPXVOICEPROFILEHANDLE hVoiceProfileHandle; + SPX_THROW_ON_FAIL(::create_voice_profile(m_hVoiceProfileClient, static_cast(profileType), Utils::ToUTF8(locale).c_str(), &hVoiceProfileHandle)); + return std::shared_ptr { new VoiceProfile(hVoiceProfileHandle) }; + }); + + return future; + } + + /// + /// Enroll a Voice Profile. + /// + /// a voice profile object. + /// an audio Input. + /// A smart pointer wrapped voice profile enrollment result object. + std::future> EnrollProfileAsync(std::shared_ptr profile, std::shared_ptr audioInput = nullptr) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [profile, audioInput, this, keepAlive]() -> std::shared_ptr { + SPXRESULTHANDLE hresult; + SPX_THROW_ON_FAIL(::enroll_voice_profile(m_hVoiceProfileClient, + Utils::HandleOrInvalid(profile), + Utils::HandleOrInvalid(audioInput), + &hresult)); + return std::make_shared(hresult); + }); + return future; + } + + /// + /// Delete a Voice Profile. + /// + /// a voice profile object. + /// A smart pointer wrapped voice profile result object. + std::future> DeleteProfileAsync(std::shared_ptr profile) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [profile, this, keepAlive]() -> std::shared_ptr { + SPXRESULTHANDLE hResultHandle; + SPX_THROW_ON_FAIL(::delete_voice_profile(m_hVoiceProfileClient, + Utils::HandleOrInvalid(profile), + &hResultHandle)); + return std::make_shared(hResultHandle); + }); + return future; + } + + /// + /// Reset a Voice Profile. + /// + /// a voice profile object. + /// A smart pointer wrapped voice profile result object. + std::future> ResetProfileAsync(std::shared_ptr profile) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [profile, this, keepAlive]() -> std::shared_ptr { + SPXRESULTHANDLE hResultHandle; + SPX_THROW_ON_FAIL(::reset_voice_profile(m_hVoiceProfileClient, + Utils::HandleOrInvalid(profile), + &hResultHandle)); + return std::make_shared(hResultHandle); + }); + return future; + } + + /// + /// Retrieve an enrollment result given the id and type of the Voice Profile. + /// + /// The VoiceProfile Id. + /// The VoiceProfileType. + /// A future of the retrieved VoiceProfileEnrollmentResult. + std::future> RetrieveEnrollmentResultAsync(const SPXSTRING& voiceProfileId, VoiceProfileType voiceProfileType) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [voiceProfileId, voiceProfileType, this, keepAlive]() -> std::shared_ptr { + SPXRESULTHANDLE hResultHandle; + SPX_THROW_ON_FAIL(::retrieve_enrollment_result(m_hVoiceProfileClient, Utils::ToUTF8(voiceProfileId).c_str(), static_cast(voiceProfileType), &hResultHandle)); + return std::make_shared(hResultHandle); + }); + return future; + } + + /// + /// Retrieve an enrollment result given the Voice Profile. + /// + /// a voice profile object. + /// + std::future> RetrieveEnrollmentResultAsync(const VoiceProfile& voiceProfile) + { + return RetrieveEnrollmentResultAsync(voiceProfile.GetId(), voiceProfile.GetType()); + } + + /// + /// Get all profiles having the given type. + /// + /// The VoiceProfileType. + /// A future of a vector of extant VoiceProfiles. + std::future>> GetAllProfilesAsync(VoiceProfileType voiceProfileType) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [voiceProfileType, this, keepAlive]() -> std::vector> + { + std::vector> list; + + size_t numChars = 0; + char* json = nullptr; + auto deleteJsonOnEixt = Utils::MakeScopeGuard([&json]() { + ::property_bag_free_string(json); + }); + + SPX_THROW_ON_FAIL(::get_profiles_json(m_hVoiceProfileClient, static_cast(voiceProfileType), &json, &numChars)); + + auto profileList = Utils::Split(json, numChars, '|'); + for (auto& profile: profileList) + { + list.push_back(VoiceProfile::FromId(Utils::ToSPXString(profile), voiceProfileType)); + } + + return list; + }); + return future; + } + + std::future> GetActivationPhrasesAsync(VoiceProfileType voiceProfileType, const SPXSTRING& locale) + { + auto keepAlive = this->shared_from_this(); + auto future = std::async(std::launch::async, [voiceProfileType, locale, this, keepAlive]() -> std::shared_ptr { + SPXRESULTHANDLE hresult; + SPX_THROW_ON_FAIL(::get_activation_phrases(m_hVoiceProfileClient, + Utils::ToUTF8(locale).c_str(), + static_cast(voiceProfileType), + &hresult)); + return std::make_shared(hresult); + }); + return future; + } + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXVOICEPROFILECLIENTHANDLE() { return m_hVoiceProfileClient; } + +protected: + + /*! \cond PROTECTED */ + + /// + /// Internal constructor. Creates a new instance using the provided handle. + /// + /// Recognizer handle. + explicit VoiceProfileClient(SPXVOICEPROFILECLIENTHANDLE hVoiceProfileClient) : + m_hVoiceProfileClient(hVoiceProfileClient), + m_properties(hVoiceProfileClient), + Properties(m_properties) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /*! \endcond */ + +private: + + DISABLE_DEFAULT_CTORS(VoiceProfileClient); +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_enrollment_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_enrollment_result.h new file mode 100644 index 0000000..22decb5 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_enrollment_result.h @@ -0,0 +1,242 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_voice_profile_enrollment_result.h: Public API declarations for VoiceProfileEnrollmentResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// A enum that represents the timing information of an enrollment. +/// Added in version 1.12.0. +/// +enum class EnrollmentInfoType +{ + /// + /// Number of enrollment audios accepted for this profile. + /// + EnrollmentsCount = 0, + + /// + /// Total length of enrollment audios accepted for this profile. + /// + EnrollmentsLength = 1, + + /// + /// Summation of pure speech(which is the amount of audio after removing silence and non - speech segments) across all profile enrollments. + /// + EnrollmentsSpeechLength = 2, + + /// + /// Amount of pure speech (which is the amount of audio after removing silence and non-speech segments) needed to complete profile enrollment. + /// + RemainingEnrollmentsSpeechLength = 3, + + /// + /// Number of enrollment audios needed to complete profile enrollment. + /// + RemainingEnrollmentsCount = 4, + + /// + /// This enrollment audio length in hundred nanoseconds. + /// + AudioLength = 5, + + /// + /// This enrollment audio pure speech(which is the amount of audio after removing silence and non - speech segments) length in hundred nanoseconds. + /// + AudioSpeechLength = 6 +}; + +/// +/// Represents the result of an enrollment. +/// Added in version 1.12.0. +/// +class VoiceProfileEnrollmentResult final : public RecognitionResult +{ +private: + + SPXSTRING m_profileId; + const int enrollmentsCount; + const uint64_t enrollmentsLength; + const uint64_t enrollmentsSpeechLength; + const int remainingEnrollmentsCount; + const uint64_t remainingEnrollmentsSpeechLength; + const uint64_t audioLength; + const uint64_t audioSpeechLength; + const SPXSTRING createdDateTime; + const SPXSTRING lastUpdatedDateTime; + +public: + + /// + /// Creates a new instance using the provided handle. + /// + /// Result handle. + explicit VoiceProfileEnrollmentResult(SPXRESULTHANDLE hresult) : + RecognitionResult(hresult), + m_profileId(Properties.GetProperty("enrollment.profileId", "")), + enrollmentsCount(std::stoi(Properties.GetProperty("enrollment.enrollmentsCount", "0"))), + enrollmentsLength(static_cast(std::stoll(Properties.GetProperty("enrollment.enrollmentsLengthInSec", "0")))), + enrollmentsSpeechLength(static_cast(std::stoll(Properties.GetProperty("enrollment.enrollmentsSpeechLengthInSec", "0")))), + remainingEnrollmentsCount(std::stoi(Properties.GetProperty("enrollment.remainingEnrollmentsCount", "0"))), + remainingEnrollmentsSpeechLength(std::stoll(Properties.GetProperty("enrollment.remainingEnrollmentsSpeechLengthInSec", "0"))), + audioLength(static_cast(std::stoll(Properties.GetProperty("enrollment.audioLengthInSec", "0")))), + audioSpeechLength(static_cast(std::stoll(Properties.GetProperty("enrollment.audioSpeechLengthInSec", "0")))), + createdDateTime(Properties.GetProperty("enrollment.createdDateTime", "")), + lastUpdatedDateTime(Properties.GetProperty("enrollment.lastUpdatedDateTime", "")), + ProfileId(m_profileId) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// Destructor. + /// + virtual ~VoiceProfileEnrollmentResult() + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + } + + /// + /// The profile id of the speaker in the enrollment. + /// + const SPXSTRING& ProfileId; + + /// + /// Retrieve a textual representation of the created time of the voice profile. + /// + /// + const SPXSTRING& GetCreatedTime() const + { + return createdDateTime; + } + + /// + /// Retrieve a textual representation of the last updated time of the voice profile. + /// + /// + const SPXSTRING& GetLastUpdatedDateTime() const + { + return lastUpdatedDateTime; + } + + /// + /// Enrollment information in ticks. + /// A single tick represents one hundred nanoseconds or one ten-millionth of a second. + /// + /// an enum of EnrollmentInfoType. + /// Duration of recognized speech in ticks. + uint64_t GetEnrollmentInfo(EnrollmentInfoType type) const + { + switch (type) + { + case EnrollmentInfoType::EnrollmentsCount: + return static_cast(enrollmentsCount); + + case EnrollmentInfoType::EnrollmentsLength: + return enrollmentsLength; + + case EnrollmentInfoType::EnrollmentsSpeechLength: + return enrollmentsSpeechLength; + + case EnrollmentInfoType::RemainingEnrollmentsCount: + return static_cast(remainingEnrollmentsCount); + + case EnrollmentInfoType::RemainingEnrollmentsSpeechLength: + return remainingEnrollmentsSpeechLength; + + case EnrollmentInfoType::AudioLength: + return audioLength; + + case EnrollmentInfoType::AudioSpeechLength: + return audioSpeechLength; + + default: + throw std::runtime_error("Invalid enrollmentInfoType!"); + } + } + +private: + + DISABLE_DEFAULT_CTORS(VoiceProfileEnrollmentResult); + +}; + +/// +/// Represents the cancellation details of a result of an enrollment. +/// Added in version 1.12.0. +/// +class VoiceProfileEnrollmentCancellationDetails +{ +private: + + CancellationErrorCode m_errorCode; + +public: + + /// + /// Create an object that represents the details of a canceled enrollment result. + /// + /// a voice profile enrollment result object. + /// a smart pointer of voice profile enrollment cancellation details object. + static std::shared_ptr FromResult(std::shared_ptr result) + { + return std::shared_ptr { new VoiceProfileEnrollmentCancellationDetails(result.get()) }; + } + + /// + /// The error code in case of an unsuccessful enrollment ( is set to Error). + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful enrollment ( is set to Error). + /// + const SPXSTRING ErrorDetails; + +protected: + + /*! \cond PROTECTED */ + + VoiceProfileEnrollmentCancellationDetails(VoiceProfileEnrollmentResult* result) : + m_errorCode(GetCancellationErrorCode(result)), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::SpeechServiceResponse_JsonErrorDetails)) + { + } + + /*! \endcond */ + +private: + DISABLE_DEFAULT_CTORS(VoiceProfileEnrollmentCancellationDetails); + + /*! \cond PRIVATE */ + + CancellationErrorCode GetCancellationErrorCode(VoiceProfileEnrollmentResult* result) + { + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_canceled_error_code(hresult, &errorCode)); + + return static_cast(errorCode); + } + + /*! \endcond */ +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_phrase_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_phrase_result.h new file mode 100644 index 0000000..5695413 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_phrase_result.h @@ -0,0 +1,194 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_voice_profile_phrase_result.h: Public API declarations for VoiceProfilePhraseResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace Microsoft { + namespace CognitiveServices { + namespace Speech { + namespace Speaker { + + /// + /// Class for VoiceProfilePhraseResult. + /// This class represents the result of requesting valid activation phrases for speaker recognition. + /// Added in version 1.18.0 + /// + class VoiceProfilePhraseResult + { + private: + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + /*! \endcond */ + + public: + explicit VoiceProfilePhraseResult(SPXRESULTHANDLE hresult) : + m_properties(hresult), + ResultId(m_resultId), + Reason(m_reason), + Properties(m_properties), + m_phrases(std::make_shared>(Utils::Split(m_properties.GetProperty("speakerrecognition.phrases", ""), '|'))), + m_hresult(hresult) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + PopulateResultFields(hresult, &m_resultId, &m_reason); + } + + virtual ~VoiceProfilePhraseResult() + { + ::recognizer_result_handle_release(m_hresult); + m_hresult = SPXHANDLE_INVALID; + } + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Voice profile result reason. + /// + const ResultReason& Reason; + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Gets the activation phrases. + /// + /// Vector of phrases in string form + std::shared_ptr> GetPhrases() + { + return m_phrases; + } + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + + private: + DISABLE_DEFAULT_CTORS(VoiceProfilePhraseResult); + + void PopulateResultFields(SPXRESULTHANDLE hresult, SPXSTRING* resultId, Speech::ResultReason* reason) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 2048; + char sz[maxCharCount + 1] = {}; + + if (resultId != nullptr) + { + SPX_THROW_ON_FAIL(hr = result_get_result_id(hresult, sz, maxCharCount)); + *resultId = Utils::ToSPXString(sz); + } + + if (reason != nullptr) + { + Result_Reason resultReason; + SPX_THROW_ON_FAIL(hr = result_get_reason(hresult, &resultReason)); + *reason = (Speech::ResultReason)resultReason; + } + } + + ResultReason m_reason; + SPXSTRING m_resultId; + std::shared_ptr> m_phrases; + SPXRESULTHANDLE m_hresult; + }; + + /// + /// Class for VoiceProfilePhraseCancellationDetails. + /// This class represents error details of a voice profile result. + /// + class VoiceProfilePhraseCancellationDetails + { + private: + CancellationErrorCode m_errorCode; + + public: + + /// + /// Creates an instance of VoiceProfilePhraseCancellationDetails object for the canceled VoiceProfile. + /// + /// The result that was canceled. + /// A shared pointer to VoiceProfilePhraseCancellationDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + return std::shared_ptr { new VoiceProfilePhraseCancellationDetails(result.get()) }; + } + + /// + /// The error code in case of an unsuccessful voice profile action( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful voice profile action( is set to Error). + /// + const SPXSTRING ErrorDetails; + + protected: + + /*! \cond PROTECTED */ + + VoiceProfilePhraseCancellationDetails(VoiceProfilePhraseResult* result) : + m_errorCode(GetCancellationErrorCode(result)), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::SpeechServiceResponse_JsonErrorDetails)) + { + } + + /*! \endcond */ + + private: + DISABLE_DEFAULT_CTORS(VoiceProfilePhraseCancellationDetails); + + + CancellationErrorCode GetCancellationErrorCode(VoiceProfilePhraseResult* result) + { + UNUSED(result); + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_canceled_error_code(hresult, &errorCode)); + + return (CancellationErrorCode)errorCode; + } + }; + + } + } + } +} diff --git a/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_result.h b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_result.h new file mode 100644 index 0000000..83d0db1 --- /dev/null +++ b/third_party/azure_speech_sdk/include/cxx_api/speechapi_cxx_voice_profile_result.h @@ -0,0 +1,180 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// See https://aka.ms/csspeech/license for the full license information. +// +// speechapi_cxx_speech_voice_profile_result.h: Public API declarations for VoiceProfileResult C++ class +// + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace Microsoft { +namespace CognitiveServices { +namespace Speech { +namespace Speaker { + +/// +/// Class for VoiceProfileResult. +/// This class represents the result of processing voice profiles. +/// Added in version 1.12.0 +/// +class VoiceProfileResult +{ +private: + + /*! \cond PRIVATE */ + + class PrivatePropertyCollection : public PropertyCollection + { + public: + PrivatePropertyCollection(SPXRESULTHANDLE hresult) : + PropertyCollection( + [=]() { + SPXPROPERTYBAGHANDLE hpropbag = SPXHANDLE_INVALID; + result_get_property_bag(hresult, &hpropbag); + return hpropbag; + }()) + { + } + }; + + PrivatePropertyCollection m_properties; + + /*! \endcond */ + +public: + explicit VoiceProfileResult(SPXRESULTHANDLE hresult) : + m_properties(hresult), + ResultId(m_resultId), + Reason(m_reason), + Properties(m_properties), + m_hresult(hresult) + { + SPX_DBG_TRACE_SCOPE(__FUNCTION__, __FUNCTION__); + + PopulateResultFields(hresult, &m_resultId, &m_reason); + } + + virtual ~VoiceProfileResult() + { + ::recognizer_result_handle_release(m_hresult); + m_hresult = SPXHANDLE_INVALID; + } + + /// + /// Unique result id. + /// + const SPXSTRING& ResultId; + + /// + /// Voice profile result reason. + /// + const ResultReason& Reason; + + /// + /// A collection of properties and their values defined for this . + /// + PropertyCollection& Properties; + + /// + /// Internal. Explicit conversion operator. + /// + /// A handle. + explicit operator SPXRESULTHANDLE() { return m_hresult; } + +private: + DISABLE_DEFAULT_CTORS(VoiceProfileResult); + + void PopulateResultFields(SPXRESULTHANDLE hresult, SPXSTRING* resultId, Speech::ResultReason* reason) + { + SPX_INIT_HR(hr); + + const size_t maxCharCount = 2048; + char sz[maxCharCount + 1] = {}; + + if (resultId != nullptr) + { + SPX_THROW_ON_FAIL(hr = result_get_result_id(hresult, sz, maxCharCount)); + *resultId = Utils::ToSPXString(sz); + } + + if (reason != nullptr) + { + Result_Reason resultReason; + SPX_THROW_ON_FAIL(hr = result_get_reason(hresult, &resultReason)); + *reason = (Speech::ResultReason)resultReason; + } + } + + ResultReason m_reason; + SPXSTRING m_resultId; + SPXRESULTHANDLE m_hresult; +}; + +/// +/// Class for VoiceProfileCancellationDetails. +/// This class represents error details of a voice profile result. +/// +class VoiceProfileCancellationDetails +{ +private: + CancellationErrorCode m_errorCode; + +public: + + /// + /// Creates an instance of VoiceProfileCancellationDetails object for the canceled VoiceProfile. + /// + /// The result that was canceled. + /// A shared pointer to VoiceProfileCancellationDetails. + static std::shared_ptr FromResult(std::shared_ptr result) + { + return std::shared_ptr { new VoiceProfileCancellationDetails(result.get()) }; + } + + /// + /// The error code in case of an unsuccessful voice profile action( is set to Error). + /// If Reason is not Error, ErrorCode is set to NoError. + /// + const CancellationErrorCode& ErrorCode; + + /// + /// The error message in case of an unsuccessful voice profile action( is set to Error). + /// + const SPXSTRING ErrorDetails; + +protected: + + /*! \cond PROTECTED */ + + VoiceProfileCancellationDetails(VoiceProfileResult* result) : + m_errorCode(GetCancellationErrorCode(result)), + ErrorCode(m_errorCode), + ErrorDetails(result->Properties.GetProperty(PropertyId::SpeechServiceResponse_JsonErrorDetails)) + { + } + + /*! \endcond */ + +private: + DISABLE_DEFAULT_CTORS(VoiceProfileCancellationDetails); + + + CancellationErrorCode GetCancellationErrorCode(VoiceProfileResult* result) + { + UNUSED(result); + Result_CancellationErrorCode errorCode = CancellationErrorCode_NoError; + + SPXRESULTHANDLE hresult = (SPXRESULTHANDLE)(*result); + SPX_IFFAILED_THROW_HR(result_get_canceled_error_code(hresult, &errorCode)); + + return (CancellationErrorCode)errorCode; + } +}; + +} } } } // Microsoft::CognitiveServices::Speech::Speaker diff --git a/third_party/azure_speech_sdk/lib/Microsoft.CognitiveServices.Speech.core.lib b/third_party/azure_speech_sdk/lib/Microsoft.CognitiveServices.Speech.core.lib new file mode 100644 index 0000000..e82663d Binary files /dev/null and b/third_party/azure_speech_sdk/lib/Microsoft.CognitiveServices.Speech.core.lib differ