From b478d07bbaee1021b000fafceba0d2d0d8c6c8b9 Mon Sep 17 00:00:00 2001 From: Howard Huang Date: Tue, 3 Feb 2026 07:07:31 -0500 Subject: [PATCH 1/2] Updating Azure Translation to support new 2025 preview endpoints. --- python/AzureTranslation/README.md | 79 +++- .../acs_translation_component.py | 393 ++++++++++++++++-- .../plugin-files/descriptor/descriptor.json | 30 ++ .../tests/data/language-detect-mn-Cyrl.json | 17 + .../tests/data/language-detect-mn-Mong.json | 17 + .../tests/data/language-detect-zh_chs.json | 17 + .../tests/data/language-detect-zh_cht.json | 17 + .../tests/test_acs_translation.py | 335 ++++++++++++++- 8 files changed, 846 insertions(+), 59 deletions(-) create mode 100644 python/AzureTranslation/tests/data/language-detect-mn-Cyrl.json create mode 100644 python/AzureTranslation/tests/data/language-detect-mn-Mong.json create mode 100644 python/AzureTranslation/tests/data/language-detect-zh_chs.json create mode 100644 python/AzureTranslation/tests/data/language-detect-zh_cht.json diff --git a/python/AzureTranslation/README.md b/python/AzureTranslation/README.md index d12a81f80..e1eefe4b6 100644 --- a/python/AzureTranslation/README.md +++ b/python/AzureTranslation/README.md @@ -1,17 +1,26 @@ # Overview This repository contains source code for the OpenMPF Azure Cognitive Services -Translation Component. This component utilizes the [Azure Cognitive Services -Translator REST endpoint](https://docs.microsoft.com/en-us/azure/cognitive-services/translator/reference/v3-0-translate) -to translate the content of detection properties. It has been tested against v3.0 of the -API. +Translation Component. This component utilizes the Azure Cognitive Services +Translator REST APIs to translate the content of detection properties. + +The component supports both: + +- The legacy Translator Text v3.0 API, and +- The newer Translator preview API (`api-version=2025-10-01-preview`), with + optional integration to the Azure Language "Analyze Text" API + (`api-version=2025-11-15-preview`) for language + script detection. + +By default, the component uses the latest preview translation API. Legacy +v3.0 behavior can be enabled via the `TRANSLATION_API_VERSION` job property +(see **Primary Job Properties** below). This component translates the content of existing detection properties, so it only makes sense to use it with [feed forward](https://openmpf.github.io/docs/site/Feed-Forward-Guide) and when it isn't the first element of a pipeline. -When a detection property is translated, the translation is put in to a new +When a detection property is translated, the translation is put into a new detection property named `TRANSLATION`. The original detection property is not modified. A property named `TRANSLATION TO LANGUAGE` containing the BCP-47 language code of the translated text will also be added. If the language @@ -25,16 +34,29 @@ translate one of the languages. For example, translating "你叫什么名字? ¿Cómo te llamas?" to English results in "What is your name? The Cómo te llamas?". +For languages that have multiple scripts (for example, Mongolian or Serbian), +the component will attempt to use script-aware language tags (such as `mn-Cyrl`, +`mn-Mong`, `sr-Cyrl`, `sr-Latn`) when script information is available from the +language detection endpoint or from job properties. + # Required Job Properties In order for the component to process any jobs, the job properties listed below must be provided. Neither has a default value. - `ACS_URL`: Base URL for the Azure Cognitive Services Translator Endpoint. - e.g. `https://api.cognitive.microsofttranslator.com` or - `https:///translator/text/v3.0`. The URL should - not end with `/translate` because two separate endpoints are - used. `ACS_URL + '/translate'` is used for translation. + For example: + - Legacy v3.0: + - `https://api.cognitive.microsofttranslator.com` + - `https:///translator/text/v3.0` + - Preview API: + - `https://.cognitiveservices.azure.com/translator` + + The URL should **not** end with `/translate` because the component appends + the appropriate path internally. `ACS_URL + '/translate'` is used for + translation, and `ACS_URL + '/detect'` may be used when falling back to + the legacy detection endpoint. + This property can also be configured using an environment variable named `MPF_PROP_ACS_URL`. @@ -48,6 +70,10 @@ must be provided. Neither has a default value. - `TO_LANGUAGE`: The BCP-47 language code for the language that the properties should be translated to. +- `LANGUAGE_DETECTION_ENDPOINT`: Optional base URL for the Azure Language + "Analyze Text" API used for language and script detection. If this parameter is not provided, + the component falls back to the legacy /detect behavior on `ACS_URL` (`v3.0`). + - `FEED_FORWARD_PROP_TO_PROCESS`: Comma-separated list of property names indicating which properties in the feed-forward track or detection to consider translating. For example, `TEXT,TRANSCRIPT`. If the first property listed is @@ -82,6 +108,41 @@ must be provided. Neither has a default value. for some Azure deployments. If provided, will be set in the 'Ocp-Apim-Subscription-Region' request header. +- `TRANSLATION_API_VERSION`: Selects which Azure Translator API flavor to use. + This controls both the URL structure and the request/response format. + + Supported values: + + - `LEGACY` or `3.0` – Uses the original Translator Text v3.0 API. + - `ACS_URL` should point to a v3.0-compatible endpoint such as + `https://api.cognitive.microsofttranslator.com`. + + - `LATEST` or `PREVIEW_2025` – Uses the latest preview Translator API (`api-version=2025-10-01-preview`). + - `ACS_URL` should point to a preview-compatible Translator endpoint, such + as `https://.cognitiveservices.azure.com/translator`. + + If this property is not set, the component defaults to `LATEST`(`2025-10-01-preview`). + +- `SUGGESTED_FROM_SCRIPT`: Optional ISO 15924 script code for the source + text. Only used when script information is not returned by the + detection endpoint. For example, you can set `SUGGESTED_FROM_SCRIPT=Mong` + to suggest that Mongolian input text is in traditional Mongolian script when + language detection is unable to identify the proper script for the given text. + + +- `DEPLOYMENT_NAME`: Optional preview-only setting that indicates which + Translator deployment to use for the target. When provided and + `TRANSLATION_API_VERSION` is `LATEST`, the deployment name is + passed in the `deploymentName` field for each target, allowing you to route + translations to a specific models of interest. + +- `TRANSLATION_TONE`: Optional preview-only setting that configures the tone + for generated translations. Options are `formal`, '`informal`, and `neutral` (default). + +- `TRANSLATION_GENDER`: Optional preview-only setting that configures the +preferred grammatical gender in translations Options are `male`, `female`, and `neutral` (default). + + # Text Splitter Job Properties The following settings control the behavior of dividing input text into acceptable chunks diff --git a/python/AzureTranslation/acs_translation_component/acs_translation_component.py b/python/AzureTranslation/acs_translation_component/acs_translation_component.py index 6f89c0503..520d12b70 100644 --- a/python/AzureTranslation/acs_translation_component/acs_translation_component.py +++ b/python/AzureTranslation/acs_translation_component/acs_translation_component.py @@ -139,7 +139,8 @@ class DetectResult(NamedTuple): primary_language_confidence: float alternative_language: Optional[str] = None alternative_language_confidence: Optional[float] = None - + script_name: Optional[str] = None + script_code: Optional[str] = None class TranslationResult(NamedTuple): translated_text: str @@ -158,10 +159,116 @@ class UnsupportedSourceLanguage(Exception): NO_SPACE_LANGS = ('JA', # Japanese 'YUE', # Cantonese (Traditional) + 'ZH', # Chinese (General) 'ZH-HANS', # Chinese Simplified 'ZH-HANT') # Chinese Traditional +# Mappings needed as Language Detection and Translation endpoints differ on these codes. +DETECTION_LANG_CODE_OVERRIDES: Dict[str, str] = { + 'DGO': 'doi', # Dogri + 'NO': 'nb', # Norwegian (Bokmål) + 'ZH_CHS': 'zh-Hans', # Chinese Simplified + 'ZH_CHT': 'zh-Hant', # Chinese Traditional +} + + +# The *first* entry for each base language is used as the default when +# we don't have a usable script code. +SCRIPT_SPLIT_LANG_VARIANTS: Dict[str, Dict[str, str]] = { + 'MN': { # Mongolian + 'Cyrl': 'mn-Cyrl', + 'Mong': 'mn-Mong', + }, + 'SR': { # Serbian + 'Cyrl': 'sr-Cyrl', + 'Latn': 'sr-Latn', + }, +} + + +class AzureLanguageDetectionClient: + def __init__(self, + endpoint: str, + subscription_key: str, + region: Optional[str] = None, + http_retry: Optional[mpf_util.HttpRetry] = None): + self._endpoint = endpoint.rstrip('/') + self._subscription_key = subscription_key + self._region = region + self._http_retry = http_retry + + def detect_language(self, text: str, country_hint: str = "") -> DetectResult: + url = f"{self._endpoint}/language/:analyze-text?api-version=2025-11-15-preview" + + headers: Dict[str, str] = { + 'Ocp-Apim-Subscription-Key': self._subscription_key, + 'Content-type': 'application/json; charset=UTF-8', + 'X-ClientTraceId': str(uuid.uuid4()) + } + if self._region: + headers['Ocp-Apim-Subscription-Region'] = self._region + + body = { + "kind": "LanguageDetection", + "parameters": { + "modelVersion": "latest" + }, + "analysisInput": { + "documents": [ + { + "id": "1", + "text": text, + "countryHint": country_hint + } + ] + } + } + + encoded_body = json.dumps(body).encode('utf-8') + request = urllib.request.Request(url, encoded_body, headers) + + log.info(f'Sending language detection request to {url}') + log_json(body) + + if self._http_retry: + with self._http_retry.urlopen(request) as response: + result = json.load(response) + else: + with urllib.request.urlopen(request) as response: + result = json.load(response) + + log.info(f'Received response from {url}') + log_json(result) + + try: + doc = result["results"]["documents"][0] + detected = doc["detectedLanguage"] + except (KeyError, IndexError): + raise mpf.DetectionError.DETECTION_FAILED.exception( + 'Language detection request did not return any document results.' + ) + + iso_name = detected.get("iso6391Name") + confidence = detected.get("confidenceScore", 0.0) + script_name = detected.get("scriptName") + script_code = detected.get("scriptIso15924Code") + + if iso_name == "(Unknown)": + raise mpf.DetectionError.DETECTION_FAILED.exception( + 'The language detection service returned "(Unknown)".' + ) + + return DetectResult( + primary_language=iso_name, + primary_language_confidence=confidence, + alternative_language=None, + alternative_language_confidence=None, + script_name=script_name, + script_code=script_code, + ) + + class TranslationClient: # ACS limits the number of characters that can be translated in a single /translate call. # Taken from @@ -169,16 +276,48 @@ class TranslationClient: DETECT_MAX_CHARS = 50_000 def __init__(self, job_properties: Mapping[str, str], sentence_model: TextSplitterModel): + # Decide whether to use legacy (v3.0) or latest (preview) translation API. + api_version = job_properties.get('TRANSLATION_API_VERSION', 'LATEST').upper() + if api_version in ('LEGACY', '3.0'): + self._use_legacy_api = True + elif api_version in ('LATEST', 'PREVIEW_2025'): + self._use_legacy_api = False + else: + log.warning(f'Unknown TRANSLATION_API_VERSION "{api_version}". Defaulting to LATEST.', ) + self._use_legacy_api = False + self._api_version_raw = api_version + + # Optional endpoint for language detection (Azure Language Analyze Text). + self._language_detection_endpoint = job_properties.get( + 'LANGUAGE_DETECTION_ENDPOINT', '' + ).strip() + self._subscription_key = get_required_property('ACS_SUBSCRIPTION_KEY', job_properties) self._subscription_region = job_properties.get('ACS_SUBSCRIPTION_REGION', '') + # Optional translation parameters for the translation preview API. + self._deployment_name = job_properties.get('DEPLOYMENT_NAME', '').strip() + self._translation_tone = job_properties.get('TRANSLATION_TONE', '').strip().lower() + self._translation_gender = job_properties.get('TRANSLATION_GENDER', '').strip().lower() + + self._to_script = self._normalize_script_code( + job_properties.get('TO_SCRIPT', '').strip() + ) + self._suggested_from_script = self._normalize_script_code( + job_properties.get('SUGGESTED_FROM_SCRIPT', '').strip() + ) + self._http_retry = mpf_util.HttpRetry.from_properties(job_properties, log.warning) url_builder = AcsTranslateUrlBuilder(job_properties) self._translate_url = url_builder.url self._to_language = url_builder.to_language.upper() self._provided_from_language = url_builder.from_language - # Need to know the language's word separator in case the text needs to be split up in to + + acs_url = get_required_property('ACS_URL', job_properties) + self._detect_url = create_url(acs_url, 'detect', {}) + + # Need to know the language's word separator in case the text needs to be split up into # multiple translation requests. In that case we will need to combine the results from # each request. self._to_lang_word_separator = '' if self._to_language in NO_SPACE_LANGS else ' ' @@ -191,9 +330,6 @@ def __init__(self, job_properties: Mapping[str, str], sentence_model: TextSplitt self._detect_before_translate = mpf_util.get_property(job_properties, 'DETECT_BEFORE_TRANSLATE', True) - acs_url = get_required_property('ACS_URL', job_properties) - self._detect_url = create_url(acs_url, 'detect', {}) - self._sentence_splitter = SentenceSplitter(job_properties, sentence_model) prop_names = job_properties.get('FEED_FORWARD_PROP_TO_PROCESS', 'TEXT,TRANSCRIPT') @@ -233,6 +369,9 @@ def add_translations(self, detection_properties: Dict[str, str]) -> None: detection_properties['TRANSLATION SOURCE LANGUAGE CONFIDENCE'] \ = source_lang_confidence + if detect_result.script_code: + detection_properties['TRANSLATION SOURCE SCRIPT'] = detect_result.script_code + if translation_result.skipped: detection_properties['SKIPPED TRANSLATION'] = 'TRUE' if translation_result.language_not_supported: @@ -249,6 +388,51 @@ def add_translations(self, detection_properties: Dict[str, str]) -> None: return # Only process first matched property. + def _normalize_source_language_for_translation( + self, + lang_code: Optional[str], + script_code: Optional[str] + ) -> Optional[str]: + """ + Normalize a detected / upstream language code into the form expected + by the translation endpoint. + """ + if not lang_code: + return None + + code = lang_code.strip() + if not code: + return None + + upper = code.upper() + + if upper in DETECTION_LANG_CODE_OVERRIDES: + mapped = DETECTION_LANG_CODE_OVERRIDES[upper] + log.info(f'Mapping detected language code "{lang_code}" to translation code "{mapped}".') + return mapped + + # Cases where translation requires a script-specific code. + if upper in SCRIPT_SPLIT_LANG_VARIANTS: + script_map = SCRIPT_SPLIT_LANG_VARIANTS[upper] + norm_script = self._normalize_script_code(script_code) if script_code else None + + if norm_script and norm_script in script_map: + # Exact match of script code -> translation language code. + return script_map[norm_script] + + # No script or unexpected script – fall back. + default_script, default_lang_tag = next(iter(script_map.items())) + log.warning( + f'Detected language "{lang_code}" without a usable script code (got {script_code}). ' + f'Defaulting to translation language "{default_lang_tag}" (script "{default_script}"). If possible, ' + 'configure LANGUAGE_DETECTION_ENDPOINT to use the Azure "analyze-text" ' + 'API so scriptIso15924Code is always returned.' + ) + return default_lang_tag + + return code + + def _translate_text(self, text: str, detection_properties: Dict[str, str]) -> TranslationResult: """ Translates the given text. If the text is longer than ACS allows, we will split up the @@ -275,34 +459,65 @@ def _translate_text(self, text: str, detection_properties: Dict[str, str]) -> Tr detect_result = None from_lang = from_lang_confidence = None - - if from_lang and from_lang.casefold() == self._to_language.casefold(): + if from_lang: + script_for_mapping: Optional[str] = None + if detect_result: + script_for_mapping = detect_result.script_code + normalized_from_lang = self._normalize_source_language_for_translation( + from_lang, + script_for_mapping + ) + else: + normalized_from_lang = None + + from_script: Optional[str] = None + if detect_result and detect_result.script_code: + from_script = detect_result.script_code + elif self._suggested_from_script: + # Fallback script. + from_script = self._suggested_from_script + + # If the source and target languages are the same (after normalization), + # we can skip translation and just annotate metadata. + if normalized_from_lang and normalized_from_lang.casefold() == self._to_language.casefold(): assert from_lang_confidence is not None - translation_info = TranslationResult( - text, DetectResult(from_lang, from_lang_confidence), skipped=True) + # Transfer script info if also detected. + if detect_result is None: + detect = DetectResult(normalized_from_lang, from_lang_confidence) + else: + detect = detect_result + translation_info = TranslationResult(text, detect, skipped=True) + else: - text_replaced_newlines = self._newline_behavior(text, from_lang) + newline_lang_hint = normalized_from_lang or from_lang + text_replaced_newlines = self._newline_behavior(text, newline_lang_hint) grouped_sentences = self._sentence_splitter.split_input_text( - text_replaced_newlines, from_lang, from_lang_confidence) + text_replaced_newlines, + normalized_from_lang, + from_lang_confidence + ) + if not detect_result and grouped_sentences.detected_language: assert grouped_sentences.detected_language_confidence is not None detect_result = DetectResult(grouped_sentences.detected_language, grouped_sentences.detected_language_confidence) - translation_info = self._translate_chunks(grouped_sentences, detect_result) + + translation_info = self._translate_chunks(grouped_sentences, detect_result, from_script) self._translation_cache[text] = translation_info return translation_info def _translate_chunks(self, chunks: SplitTextResult, - detect_result: Optional[DetectResult]) -> TranslationResult: + detect_result: Optional[DetectResult], + from_script: Optional[str]) -> TranslationResult: translated_text_chunks = [] detected_lang = chunks.detected_language detected_lang_confidence = chunks.detected_language_confidence for chunk in chunks.chunks: try: - response_body = self._send_translation_request(chunk, detected_lang) + response_body = self._send_translation_request(chunk, detected_lang, from_script) except UnsupportedSourceLanguage: assert detect_result is not None return TranslationResult('', detect_result, True, True) @@ -322,27 +537,98 @@ def _translate_chunks(self, chunks: SplitTextResult, DetectResult(detected_lang, detected_lang_confidence)) - def _send_translation_request(self, text: str, - from_language: Optional[str]) -> AcsResponses.Translate: - if from_language: - url = set_query_params(self._translate_url, {'from': from_language}) + def _send_translation_request(self, + text: str, + from_language: Optional[str], + from_script: Optional[str]) -> AcsResponses.Translate: + headers = get_acs_headers(self._subscription_key, self._subscription_region) + + if self._use_legacy_api: + + # Original v3.0 API behavior. + if from_language: + url = set_query_params(self._translate_url, {'from': from_language}) + else: + url = self._translate_url + + request_body = [ + {'Text': text} + ] else: + # Preview 2025-10-01 translation API. url = self._translate_url - request_body = [ - {'Text': text} - ] + + target: Dict[str, str] = {"language": self._to_language} + if self._deployment_name: + target["deploymentName"] = self._deployment_name + if self._translation_tone: + target["tone"] = self._translation_tone + if self._translation_gender: + target["gender"] = self._translation_gender + if self._to_script: + target["script"] = self._to_script + + input_obj: Dict[str, object] = { + "text": text, + "targets": [target] + } + if from_language: + # Explicitly specify source language if we know it. + input_obj["language"] = from_language + if from_script: + # ISO 15924 script code for the source text. + input_obj["script"] = from_script + + request_body = { + "inputs": [input_obj] + } + encoded_body = json.dumps(request_body).encode('utf-8') - request = urllib.request.Request(url, encoded_body, - get_acs_headers(self._subscription_key, self._subscription_region)) + request = urllib.request.Request(url, encoded_body, headers) log.info(f'Sending POST to {url}') log_json(request_body) with self._http_retry.urlopen( - request, - should_retry=self._prevent_retry_when_unsupported_language) as response: - response_body: AcsResponses.Translate = json.load(response) - log.info(f'Received response from {url}.') - log_json(response_body) - return response_body + request, + should_retry=self._prevent_retry_when_unsupported_language) as response: + raw_body = json.load(response) + + log.info(f'Received response from {url}.') + log_json(raw_body) + + if self._use_legacy_api: + response_body: AcsResponses.Translate = raw_body + else: + # Normalize preview response to the legacy AcsResponses.Translate shape so the + # rest of the code (e.g. _translate_chunks) does not need to change. + response_body_list: List[Dict[str, object]] = [] + value_items = raw_body.get('value', []) + for item in value_items: + translations = [] + for trans in item.get('translations', []): + translations.append({ + 'text': trans['text'], + 'to': trans.get('language', self._to_language) + }) + response_body_list.append({ + 'translations': translations, + 'detectedLanguage': item.get('detectedLanguage') + }) + + response_body = response_body_list + return response_body + + @staticmethod + def _normalize_script_code(script: str) -> Optional[str]: + """ + Normalize user-provided script codes to the ISO 15924 canonical form + Azure expects, e.g. 'latn' -> 'Latn'. If blank, return None. + """ + if not script: + return None + s = script.strip() + if len(s) == 4: + return s[0].upper() + s[1:].lower() + return s @staticmethod def _prevent_retry_when_unsupported_language(url: str, exception: urllib.error.URLError, @@ -355,6 +641,17 @@ def _prevent_retry_when_unsupported_language(url: str, exception: urllib.error.U return True def _detect_language(self, text: str) -> DetectResult: + # Prefer the new Azure Language Analyze Text endpoint if provided. + if self._language_detection_endpoint: + detection_client = AzureLanguageDetectionClient( + self._language_detection_endpoint, + self._subscription_key, + self._subscription_region, + self._http_retry + ) + return detection_client.detect_language(text) + + # Fallback to legacy/detect behavior. response = self._send_detect_request(text) primary_language = response[0]['language'] primary_language_score = response[0]['score'] @@ -584,28 +881,42 @@ def __init__(self, job_properties: Mapping[str, str]): url_parts = urllib.parse.urlparse(base_url) query_dict: Dict[str, List[str]] = urllib.parse.parse_qs(url_parts.query) - query_dict.setdefault('api-version', ['3.0']) self.to_language = (job_properties.get('TO_LANGUAGE') or self.from_query_dict('to', query_dict) or 'en') - query_dict['to'] = [self.to_language] self.from_language = (job_properties.get('FROM_LANGUAGE') or self.from_query_dict('from', query_dict)) - if self.from_language: - query_dict['from'] = [self.from_language] - if suggested_from := job_properties.get('SUGGESTED_FROM_LANGUAGE'): - query_dict['suggestedFrom'] = [suggested_from] + api_version = job_properties.get('TRANSLATION_API_VERSION', 'LATEST').upper() + use_legacy = api_version in ('LEGACY', '3.0') + + if use_legacy: + # Setup v3.0 translate URL. + query_dict.setdefault('api-version', ['3.0']) + query_dict['to'] = [self.to_language] + if self.from_language: + query_dict['from'] = [self.from_language] - if category := job_properties.get('CATEGORY'): - query_dict['category'] = [category] + if suggested_from := job_properties.get('SUGGESTED_FROM_LANGUAGE'): + query_dict['suggestedFrom'] = [suggested_from] - query_string = urllib.parse.urlencode(query_dict, doseq=True) - path = url_parts.path + '/translate' - replaced_parts = url_parts._replace(path=path, query=query_string) - self.url = urllib.parse.urlunparse(replaced_parts) + if category := job_properties.get('CATEGORY'): + query_dict['category'] = [category] + + query_string = urllib.parse.urlencode(query_dict, doseq=True) + path = url_parts.path + '/translate' + replaced_parts = url_parts._replace(path=path, query=query_string) + self.url = urllib.parse.urlunparse(replaced_parts) + else: + # Setup preview translate URL: + # {ACS_URL}/translate?api-version=2025-10-01-preview + base_path = url_parts.path.rstrip('/') + path = base_path + '/translate' + query_string = urllib.parse.urlencode({'api-version': '2025-10-01-preview'}) + replaced_parts = url_parts._replace(path=path, query=query_string) + self.url = urllib.parse.urlunparse(replaced_parts) @staticmethod def from_query_dict(key: str, query_dict: Dict[str, List[str]]) -> Optional[str]: diff --git a/python/AzureTranslation/plugin-files/descriptor/descriptor.json b/python/AzureTranslation/plugin-files/descriptor/descriptor.json index 8754bfad1..9cd46bedc 100644 --- a/python/AzureTranslation/plugin-files/descriptor/descriptor.json +++ b/python/AzureTranslation/plugin-files/descriptor/descriptor.json @@ -25,6 +25,18 @@ "name": "ACS_URL", "description": "Base URL for the Azure Cognitive Services Endpoint. e.g. https://api.cognitive.microsofttranslator.com . Can also be provided by an environment variable with the same name. If both are provided, the job property is used. Note that the component was only tested against v3.0 of the API.", "type": "STRING", + "defaultValue": "https://api.cognitive.microsofttranslator.com" + }, + { + "name": "TRANSLATION_API_VERSION", + "description": "Specifies Azure API Version. Current options are `LEGACY` or `3.0` for older version 3.0 of the API and `LATEST` or `PREVIEW_2025` for the upcoming 2025-2026 API. Defaults to newest API", + "type": "STRING", + "defaultValue": "LATEST" + }, + { + "name": "LANGUAGE_DETECTION_ENDPOINT", + "description": "Optional Azure Language API endpoint for language detection. If provided, new detection API is used. If omitted, legacy detection is used.", + "type": "STRING", "defaultValue": "" }, { @@ -116,6 +128,24 @@ "description": "Specifies whether to pass input language to sentence splitter algorithm. Currently, only WtP supports model adjustments by input language.", "type": "BOOLEAN", "defaultValue": "TRUE" + }, + { + "name": "DEPLOYMENT_NAME", + "description": "Optional Azure deployment name for specific translation model.", + "type": "STRING", + "defaultValue": "" + }, + { + "name": "TRANSLATION_TONE", + "description": "Optional tone for translation (`formal`, `informal`, `neutral`).", + "type": "STRING", + "defaultValue": "" + }, + { + "name": "TRANSLATION_GENDER", + "description": "Optional gender for translation (`male`, `female`, `neutral`).", + "type": "STRING", + "defaultValue": "" } ] } diff --git a/python/AzureTranslation/tests/data/language-detect-mn-Cyrl.json b/python/AzureTranslation/tests/data/language-detect-mn-Cyrl.json new file mode 100644 index 000000000..cef93baad --- /dev/null +++ b/python/AzureTranslation/tests/data/language-detect-mn-Cyrl.json @@ -0,0 +1,17 @@ +{ + "results": { + "documents": [ + { + "id": "1", + "detectedLanguage": { + "iso6391Name": "mn", + "confidenceScore": 0.99, + "scriptName": "Cyrillic", + "scriptIso15924Code": "Cyrl" + } + } + ], + "errors": [], + "modelVersion": "latest" + } +} diff --git a/python/AzureTranslation/tests/data/language-detect-mn-Mong.json b/python/AzureTranslation/tests/data/language-detect-mn-Mong.json new file mode 100644 index 000000000..ba8fecad7 --- /dev/null +++ b/python/AzureTranslation/tests/data/language-detect-mn-Mong.json @@ -0,0 +1,17 @@ +{ + "results": { + "documents": [ + { + "id": "1", + "detectedLanguage": { + "iso6391Name": "mn", + "confidenceScore": 0.99, + "scriptName": "Mongolian", + "scriptIso15924Code": "Mong" + } + } + ], + "errors": [], + "modelVersion": "latest" + } +} diff --git a/python/AzureTranslation/tests/data/language-detect-zh_chs.json b/python/AzureTranslation/tests/data/language-detect-zh_chs.json new file mode 100644 index 000000000..61df23166 --- /dev/null +++ b/python/AzureTranslation/tests/data/language-detect-zh_chs.json @@ -0,0 +1,17 @@ +{ + "results": { + "documents": [ + { + "id": "1", + "detectedLanguage": { + "iso6391Name": "zh_chs", + "confidenceScore": 0.99, + "scriptName": "Simplified", + "scriptIso15924Code": "Hans" + } + } + ], + "errors": [], + "modelVersion": "latest" + } +} diff --git a/python/AzureTranslation/tests/data/language-detect-zh_cht.json b/python/AzureTranslation/tests/data/language-detect-zh_cht.json new file mode 100644 index 000000000..9f4deba0c --- /dev/null +++ b/python/AzureTranslation/tests/data/language-detect-zh_cht.json @@ -0,0 +1,17 @@ +{ + "results": { + "documents": [ + { + "id": "1", + "detectedLanguage": { + "iso6391Name": "zh_cht", + "confidenceScore": 0.99, + "scriptName": "Traditional", + "scriptIso15924Code": "Hant" + } + } + ], + "errors": [], + "modelVersion": "latest" + } +} diff --git a/python/AzureTranslation/tests/test_acs_translation.py b/python/AzureTranslation/tests/test_acs_translation.py index d2297f717..8cef382c7 100644 --- a/python/AzureTranslation/tests/test_acs_translation.py +++ b/python/AzureTranslation/tests/test_acs_translation.py @@ -45,7 +45,7 @@ from nlp_text_splitter import TextSplitterModel from acs_translation_component.acs_translation_component import (AcsTranslationComponent, get_azure_char_count, TranslationClient, NewLineBehavior, ChineseAndJapaneseCodePoints, - AcsTranslateUrlBuilder, get_n_azure_chars) + AcsTranslateUrlBuilder, AzureLanguageDetectionClient, DetectResult, get_n_azure_chars) from acs_translation_component.convert_language_code import iso_to_bcp @@ -417,6 +417,204 @@ def test_translation_cache(self): self.get_request_body() + def test_chinese_alias_codes_mapped_for_translation(self): + with self.subTest('Simplified Chinese via updated detection/translation endpoint'): + self.set_results_file('language-detect-zh_chs.json') + self.set_results_file('preview-translate-single.json') + + props = get_test_properties( + LANGUAGE_DETECTION_ENDPOINT='http://localhost:10670', + TRANSLATION_API_VERSION='LATEST', # use preview translate + ) + client = TranslationClient(props, self.wtp_model) + + detection_props = dict(TEXT=CHINESE_SAMPLE_TEXT) + client.add_translations(detection_props) + + # The component should preserve what detection returns as metadata + self.assertEqual('zh_chs', detection_props['TRANSLATION SOURCE LANGUAGE']) + + # 1) Language detection request (new endpoint) + detect_url, detect_body = self.get_request() + self.assertIn('/language/:analyze-text', detect_url) + self.assertEqual('LanguageDetection', detect_body['kind']) + docs = detect_body['analysisInput']['documents'] + self.assertEqual(1, len(docs)) + self.assertEqual(CHINESE_SAMPLE_TEXT, docs[0]['text']) + + # 2) Preview translate request – from-language must be normalized + translate_url, translate_body = self.get_request() + parsed = urllib.parse.urlparse(translate_url) + qd = urllib.parse.parse_qs(parsed.query) + + # Preview API: only api-version in query string + self.assertEqual(['2025-10-01-preview'], qd.get('api-version')) + self.assertNotIn('from', qd) + self.assertNotIn('to', qd) + + # Body must be in "inputs" shape; source language normalized to zh-Hans + inputs = translate_body['inputs'] + self.assertEqual(1, len(inputs)) + first = inputs[0] + self.assertEqual(CHINESE_SAMPLE_TEXT, first['text']) + self.assertEqual('zh-Hans', first['language']) + + targets = first['targets'] + self.assertEqual(1, len(targets)) + # _to_language is uppercased in TranslationClient, so expect EN. + self.assertEqual('EN', targets[0]['language']) + + with self.subTest('Traditional Chinese via updated detection/translation endpoint'): + self.set_results_file('language-detect-zh_cht.json') + self.set_results_file('preview-translate-single.json') + + props = get_test_properties( + LANGUAGE_DETECTION_ENDPOINT='http://localhost:10670', + TRANSLATION_API_VERSION='LATEST', + ) + client = TranslationClient(props, self.wtp_model) + + detection_props = dict(TEXT=CHINESE_SAMPLE_TEXT) + client.add_translations(detection_props) + + self.assertEqual('zh_cht', detection_props['TRANSLATION SOURCE LANGUAGE']) + + detect_url, detect_body = self.get_request() + self.assertIn('/language/:analyze-text', detect_url) + docs = detect_body['analysisInput']['documents'] + self.assertEqual(1, len(docs)) + self.assertEqual(CHINESE_SAMPLE_TEXT, docs[0]['text']) + + translate_url, translate_body = self.get_request() + parsed = urllib.parse.urlparse(translate_url) + qd = urllib.parse.parse_qs(parsed.query) + self.assertEqual(['2025-10-01-preview'], qd.get('api-version')) + self.assertNotIn('from', qd) + self.assertNotIn('to', qd) + + inputs = translate_body['inputs'] + self.assertEqual(1, len(inputs)) + first = inputs[0] + self.assertEqual(CHINESE_SAMPLE_TEXT, first['text']) + self.assertEqual('zh-Hant', first['language']) + + targets = first['targets'] + self.assertEqual(1, len(targets)) + self.assertEqual('EN', targets[0]['language']) + + def test_mongolian_script_mapping_preview_translate_endpoint(self): + mongolian_text = 'Монгол хэлний туршилт' + + with self.subTest('Mongolian (Cyrillic) via updated detection/translation endpoint'): + self.set_results_file('language-detect-mn-Cyrl.json') + self.set_results_file('preview-translate-single.json') + + props = get_test_properties( + LANGUAGE_DETECTION_ENDPOINT='http://localhost:10670', + TRANSLATION_API_VERSION='LATEST', + ) + client = TranslationClient(props, self.wtp_model) + + detection_props = dict(TEXT=mongolian_text) + client.add_translations(detection_props) + + # Detection metadata keeps the base language code. + self.assertEqual('mn', detection_props['TRANSLATION SOURCE LANGUAGE']) + + # 1) Language detection endpoint call + detect_url, detect_body = self.get_request() + self.assertIn('/language/:analyze-text', detect_url) + docs = detect_body['analysisInput']['documents'] + self.assertEqual(1, len(docs)) + self.assertEqual(mongolian_text, docs[0]['text']) + + # 2) Preview translate call – language should be mn-Cyrl + translate_url, translate_body = self.get_request() + parsed = urllib.parse.urlparse(translate_url) + qd = urllib.parse.parse_qs(parsed.query) + self.assertEqual(['2025-10-01-preview'], qd.get('api-version')) + self.assertNotIn('from', qd) + self.assertNotIn('to', qd) + + inputs = translate_body['inputs'] + self.assertEqual(1, len(inputs)) + first = inputs[0] + self.assertEqual(mongolian_text, first['text']) + self.assertEqual('mn-Cyrl', first['language']) + + targets = first['targets'] + self.assertEqual(1, len(targets)) + self.assertEqual('EN', targets[0]['language']) + + with self.subTest('Mongolian (Traditional) via updated detection/translation endpoint'): + self.set_results_file('language-detect-mn-Mong.json') + self.set_results_file('preview-translate-single.json') + + props = get_test_properties( + LANGUAGE_DETECTION_ENDPOINT='http://localhost:10670', + TRANSLATION_API_VERSION='LATEST', + ) + client = TranslationClient(props, self.wtp_model) + + detection_props = dict(TEXT=mongolian_text) + client.add_translations(detection_props) + self.assertEqual('mn', detection_props['TRANSLATION SOURCE LANGUAGE']) + + detect_url, detect_body = self.get_request() + self.assertIn('/language/:analyze-text', detect_url) + docs = detect_body['analysisInput']['documents'] + self.assertEqual(1, len(docs)) + self.assertEqual(mongolian_text, docs[0]['text']) + + translate_url, translate_body = self.get_request() + parsed = urllib.parse.urlparse(translate_url) + qd = urllib.parse.parse_qs(parsed.query) + self.assertEqual(['2025-10-01-preview'], qd.get('api-version')) + self.assertNotIn('from', qd) + self.assertNotIn('to', qd) + + inputs = translate_body['inputs'] + self.assertEqual(1, len(inputs)) + first = inputs[0] + self.assertEqual(mongolian_text, first['text']) + self.assertEqual('mn-Mong', first['language']) + + targets = first['targets'] + self.assertEqual(1, len(targets)) + self.assertEqual('EN', targets[0]['language']) + + def test_language_detection_endpoint_client(self): + # Use the same mock server, but talk directly to the language detection endpoint + # instead of going through TranslationClient. + self.set_results_file('language-detect-mn-Cyrl.json') + + client = AzureLanguageDetectionClient( + endpoint='http://localhost:10670', + subscription_key='test_key', + region='test-region', + http_retry=None, # Use urllib.request directly + ) + + text = 'Монгол хэлний туршилт' + result = client.detect_language(text, country_hint='MN') + + # Parsed fields from JSON + self.assertEqual('mn', result.primary_language) + self.assertAlmostEqual(0.99, result.primary_language_confidence) + self.assertEqual('Cyrillic', result.script_name) + self.assertEqual('Cyrl', result.script_code) + + # Verify the request we sent + detect_url, detect_body = self.get_request() + self.assertIn('/language/:analyze-text', detect_url) + self.assertEqual('LanguageDetection', detect_body['kind']) + docs = detect_body['analysisInput']['documents'] + self.assertEqual(1, len(docs)) + self.assertEqual('MN', docs[0]['countryHint']) + self.assertEqual(text, docs[0]['text']) + + + def test_different_to_language(self): self.set_results_file('eng-detect-result.json') self.set_results_file('results-eng-to-russian.json') @@ -445,6 +643,10 @@ def test_different_to_language(self): def test_url_formation(self): def assert_expected_url(job_properties, expected_to, expected_from, expected_query): + job_properties = { + 'TRANSLATION_API_VERSION': '3.0', + **job_properties, + } builder = AcsTranslateUrlBuilder(job_properties) url_parts = urllib.parse.urlparse(builder.url) self.assertEqual('/test/translate', url_parts.path) @@ -1040,10 +1242,51 @@ def test_azure_char_count(self): self.assertEqual(20, get_azure_char_count('😀' * 5 + '👍' * 5)) + def test_source_language_normalization_overrides_and_scripts(self): + client = TranslationClient(get_test_properties(), self.wtp_model) + self.assertEqual( + 'doi', + client._normalize_source_language_for_translation('dgo', None) + ) + self.assertEqual( + 'nb', + client._normalize_source_language_for_translation('NO', None) + ) + self.assertEqual( + 'zh-Hans', + client._normalize_source_language_for_translation('zh_chs', None) + ) + self.assertEqual( + 'zh-Hant', + client._normalize_source_language_for_translation('ZH_CHT', None) + ) + + # Mongolian (based on script detected) + self.assertEqual( + 'mn-Cyrl', + client._normalize_source_language_for_translation('mn', 'Cyrl') + ) + self.assertEqual( + 'mn-Mong', + client._normalize_source_language_for_translation('MN', 'Mong') + ) + + # Serbian (based on script detected) + self.assertEqual( + 'sr-Cyrl', + client._normalize_source_language_for_translation('sr', 'Cyrl') + ) + self.assertEqual( + 'sr-Latn', + client._normalize_source_language_for_translation('sr', 'latn') + ) + + def get_test_properties(**extra_properties): return { 'ACS_URL': os.getenv('ACS_URL', 'http://localhost:10670/translator'), 'ACS_SUBSCRIPTION_KEY': os.getenv('ACS_SUBSCRIPTION_KEY', 'test_key'), + 'TRANSLATION_API_VERSION': os.getenv('TRANSLATION_API_VERSION', '3.0'), **extra_properties } @@ -1107,24 +1350,49 @@ class MockRequestHandler(http.server.BaseHTTPRequestHandler): def do_POST(self): url_parts = urllib.parse.urlparse(self.path) + path = url_parts.path + query_dict = urllib.parse.parse_qs(url_parts.query) + api_version = query_dict.get('api-version', [''])[0] - is_detect = url_parts.path == '/translator/detect' - is_translate = url_parts.path == '/translator/translate' - if not is_detect and not is_translate: + is_translator_detect = (path == '/translator/detect') + is_translator_translate = (path == '/translator/translate') + is_lang_detect = (path == '/language/:analyze-text') + + if not (is_translator_detect or is_translator_translate or is_lang_detect): self._send_error(404, 000, 'NOT FOUND') return + # For supported paths, headers must be valid. self._validate_headers() - self._validate_query_string(url_parts.query, is_translate) - max_chars = TranslationClient.DETECT_MAX_CHARS - self._validate_body(max_chars) + + if is_lang_detect: + # New Azure Language detection endpoint + self._validate_lang_detect_query_string(url_parts.query) + self._validate_lang_detect_body() + + elif is_translator_translate: + # Two API variants for translate: legacy v3.0 and preview 2025-10-01 + if api_version == '3.0': + self._validate_query_string(url_parts.query, is_translate=True) + max_chars = TranslationClient.DETECT_MAX_CHARS + self._validate_body(max_chars) + elif api_version == '2025-10-01-preview': + self._validate_preview_translate_query_string(query_dict) + self._validate_preview_translate_body() + else: + self._send_error(400, 21, 'The API version parameter is missing or invalid.') + return + + else: + self._validate_query_string(url_parts.query, is_translate=False) + max_chars = TranslationClient.DETECT_MAX_CHARS + self._validate_body(max_chars) self.send_response(200) self.end_headers() with self.server.get_results_path().open('rb') as f: shutil.copyfileobj(f, self.wfile) - def _validate_headers(self) -> None: if self.headers['Ocp-Apim-Subscription-Key'] != 'test_key': self._send_error( @@ -1156,7 +1424,6 @@ def _validate_query_string(self, query_string, is_translate) -> None: if from_lang[0] == 'si': self._send_error(400, 35, 'The source language is not valid.') - def _validate_body(self, max_chars) -> None: content_len = int(self.headers['Content-Length']) body = json.loads(self.rfile.read(content_len)) @@ -1174,6 +1441,56 @@ def _validate_body(self, max_chars) -> None: self._send_error(429, 0, 'The server rejected the request because the client has ' 'exceeded request limits.') + def _validate_lang_detect_query_string(self, query_string: str) -> None: + query_dict = urllib.parse.parse_qs(query_string) + api_version = query_dict.get('api-version', [''])[0] + if api_version != '2025-11-15-preview': + self._send_error(400, 21, 'The API version parameter is missing or invalid.') + + def _validate_lang_detect_body(self) -> None: + content_len = int(self.headers['Content-Length']) + raw = self.rfile.read(content_len) + body = json.loads(raw) + + full_url = f'http://{self.server.server_name}:{self.server.server_port}{self.path}' + self.server.set_request_info(Request(full_url, body)) + + if body.get('kind') != 'LanguageDetection': + self._send_error(430, 0, 'Language detection request "kind" must be "LanguageDetection".') + + docs = body.get('analysisInput', {}).get('documents', []) + if not docs: + self._send_error(430, 0, 'Language detection request missing "documents".') + + first_doc = docs[0] + if not first_doc.get('text'): + self._send_error(430, 0, 'Language detection request did not contain text.') + + + def _validate_preview_translate_query_string(self, query_dict) -> None: + api_version = query_dict.get('api-version', [''])[0] + if api_version != '2025-10-01-preview': + self._send_error(400, 21, 'The API version parameter is missing or invalid.') + + def _validate_preview_translate_body(self) -> None: + content_len = int(self.headers['Content-Length']) + raw = self.rfile.read(content_len) + body = json.loads(raw) + + full_url = f'http://{self.server.server_name}:{self.server.server_port}{self.path}' + self.server.set_request_info(Request(full_url, body)) + + inputs = body.get('inputs', []) + if not inputs: + self._send_error(430, 0, 'Preview translate request missing "inputs".') + + first = inputs[0] + if not first.get('text'): + self._send_error(430, 0, 'Preview translate request text is missing.') + + targets = first.get('targets', []) + if not targets: + self._send_error(430, 0, 'Preview translate request missing "targets".') def _send_error(self, http_status, subcode, message): error_body = { From cf05428d2a494937b268d6abf7cab23bef492d08 Mon Sep 17 00:00:00 2001 From: Howard Huang Date: Mon, 16 Feb 2026 06:35:31 -0500 Subject: [PATCH 2/2] Minor test case update. --- .../acs_translation_component.py | 4 +- .../plugin-files/descriptor/descriptor.json | 66 +++++++++++++++++++ .../tests/test_acs_translation.py | 50 ++++++++++++++ 3 files changed, 119 insertions(+), 1 deletion(-) diff --git a/python/AzureTranslation/acs_translation_component/acs_translation_component.py b/python/AzureTranslation/acs_translation_component/acs_translation_component.py index 520d12b70..77b510249 100644 --- a/python/AzureTranslation/acs_translation_component/acs_translation_component.py +++ b/python/AzureTranslation/acs_translation_component/acs_translation_component.py @@ -461,8 +461,10 @@ def _translate_text(self, text: str, detection_properties: Dict[str, str]) -> Tr if from_lang: script_for_mapping: Optional[str] = None - if detect_result: + if detect_result and detect_result.script_code: script_for_mapping = detect_result.script_code + elif self._suggested_from_script: + script_for_mapping = self._suggested_from_script normalized_from_lang = self._normalize_source_language_for_translation( from_lang, script_for_mapping diff --git a/python/AzureTranslation/plugin-files/descriptor/descriptor.json b/python/AzureTranslation/plugin-files/descriptor/descriptor.json index 9cd46bedc..342c19d0f 100644 --- a/python/AzureTranslation/plugin-files/descriptor/descriptor.json +++ b/python/AzureTranslation/plugin-files/descriptor/descriptor.json @@ -171,6 +171,44 @@ "description": "Uses Azure Cognitive Services to perform translation on a plain text file.", "algorithm": "AZURETRANSLATION", "properties": [] + }, + { + "name": "AZURE TRANSLATION LEGACY (WITH FF REGION) ACTION", + "description": "Uses Azure Cognitive Services v3.0 translation API on feed-forward tracks and detections.", + "algorithm": "AZURETRANSLATION", + "properties": [ + { + "name": "FEED_FORWARD_TYPE", + "value": "REGION" + }, + { + "name": "IS_ANNOTATOR", + "value": "TRUE" + }, + { + "name": "TRANSLATION_API_VERSION", + "value": "3.0" + }, + { + "name": "LANGUAGE_DETECTION_ENDPOINT", + "value": "" + } + ] + }, + { + "name": "AZURE TRANSLATION LEGACY TEXT FILE ACTION", + "description": "Uses Azure Cognitive Services v3.0 translation API on a plain text file.", + "algorithm": "AZURETRANSLATION", + "properties": [ + { + "name": "TRANSLATION_API_VERSION", + "value": "3.0" + }, + { + "name": "LANGUAGE_DETECTION_ENDPOINT", + "value": "" + } + ] } ], "tasks": [ @@ -187,6 +225,20 @@ "actions": [ "AZURE TRANSLATION TEXT FILE ACTION" ] + }, + { + "name": "AZURE TRANSLATION LEGACY (WITH FF REGION) TASK", + "description": "Uses Azure Cognitive Services v3.0 translation API on feed-forward tracks and detections.", + "actions": [ + "AZURE TRANSLATION LEGACY (WITH FF REGION) ACTION" + ] + }, + { + "name": "AZURE TRANSLATION LEGACY TEXT FILE TASK", + "description": "Uses Azure Cognitive Services v3.0 translation API on a plain text file.", + "actions": [ + "AZURE TRANSLATION LEGACY TEXT FILE ACTION" + ] } ], "pipelines": [ @@ -196,6 +248,20 @@ "tasks": [ "AZURE TRANSLATION TEXT FILE TASK" ] + }, + { + "name": "AZURE TRANSLATION LEGACY (WITH FF REGION) PIPELINE", + "description": "Uses Azure Cognitive Services v3.0 translation API on feed-forward tracks and detections.", + "tasks": [ + "AZURE TRANSLATION LEGACY (WITH FF REGION) TASK" + ] + }, + { + "name": "AZURE TRANSLATION LEGACY TEXT FILE PIPELINE", + "description": "Uses Azure Cognitive Services v3.0 translation API on a plain text file.", + "tasks": [ + "AZURE TRANSLATION LEGACY TEXT FILE TASK" + ] } ] } \ No newline at end of file diff --git a/python/AzureTranslation/tests/test_acs_translation.py b/python/AzureTranslation/tests/test_acs_translation.py index 8cef382c7..4c91dac54 100644 --- a/python/AzureTranslation/tests/test_acs_translation.py +++ b/python/AzureTranslation/tests/test_acs_translation.py @@ -613,6 +613,56 @@ def test_language_detection_endpoint_client(self): self.assertEqual('MN', docs[0]['countryHint']) self.assertEqual(text, docs[0]['text']) + def test_serbian_preview_translate_uses_suggested_script(self): + + serbian_text = 'Ово је тест' # This is a test. + self.set_results_file('preview-translate-single.json') + + # By default, if we don't get use language detection + # Serbian defaults to Cyrillic text. + props = get_test_properties( + TRANSLATION_API_VERSION='LATEST', + FROM_LANGUAGE='sr', + SUGGESTED_FROM_SCRIPT='') + client = TranslationClient(props, self.wtp_model) + + detection_props = dict(TEXT=serbian_text) + client.add_translations(detection_props) + + translate_url, translate_body = self.get_request() + parsed = urllib.parse.urlparse(translate_url) + qd = urllib.parse.parse_qs(parsed.query) + + inputs = translate_body['inputs'] + self.assertEqual(1, len(inputs)) + result = inputs[0] + + self.assertEqual(serbian_text, result['text']) + self.assertEqual('sr-Cyrl', result['language']) + + self.set_results_file('preview-translate-single.json') + + # Shift to the Latin variant: + props = get_test_properties( + TRANSLATION_API_VERSION='LATEST', + FROM_LANGUAGE='sr', + SUGGESTED_FROM_SCRIPT='Latn') + + detection_props = dict(TEXT=serbian_text) + client.add_translations(detection_props) + + translate_url, translate_body = self.get_request() + parsed = urllib.parse.urlparse(translate_url) + qd = urllib.parse.parse_qs(parsed.query) + + inputs = translate_body['inputs'] + self.assertEqual(1, len(inputs)) + result = inputs[0] + + self.assertEqual(serbian_text, result['text']) + self.assertEqual('sr-Latn', result['language']) + self.assertEqual('Latn', result['script']) + def test_different_to_language(self):