diff --git a/llm_web_kit/model/lang_id.py b/llm_web_kit/model/lang_id.py index 92121af7..03945638 100644 --- a/llm_web_kit/model/lang_id.py +++ b/llm_web_kit/model/lang_id.py @@ -234,7 +234,7 @@ def decide_language_func(content_str: str, lang_detect: LanguageIdentification) # return "empty" if the content string is empty if len(content_str.strip()) == 0: - return {'language': 'empty', 'language_details': None} + return {'language': 'empty', 'language_details': 'empty'} if lang_detect.version not in LANG_ID_SUPPORTED_VERSIONS: raise ValueError(f'Unsupported version: {lang_detect.version}. Supported versions: {LANG_ID_SUPPORTED_VERSIONS}') @@ -242,12 +242,13 @@ def decide_language_func(content_str: str, lang_detect: LanguageIdentification) predictions, probabilities = lang_detect.predict(content_str) language = decide_language_by_prob_v176(predictions, probabilities) - language_details = None if lang_detect.version == '218.bin': first_pred = predictions[0] # Extract the full label (e.g., __label__eng_Latn -> eng_Latn) if first_pred.startswith('__label__'): language_details = first_pred.replace('__label__', '') + else: + language_details = 'not_defined' return { 'language': language, diff --git a/tests/llm_web_kit/model/test_lang_id.py b/tests/llm_web_kit/model/test_lang_id.py index 2c1b1f96..5e1a3023 100644 --- a/tests/llm_web_kit/model/test_lang_id.py +++ b/tests/llm_web_kit/model/test_lang_id.py @@ -125,7 +125,7 @@ def test_decide_language_func(): lang_detect.version = '176.bin' lang_detect.predict.return_value = (['__label__en', '__label__zh'], [0.6, 0.4]) result = decide_language_func('test text', lang_detect) - assert result == {'language': 'en', 'language_details': None} + assert result == {'language': 'en', 'language_details': 'not_defined'} # Test for 218.bin version lang_detect.version = '218.bin' @@ -135,7 +135,7 @@ def test_decide_language_func(): # Test for empty string result = decide_language_func('', lang_detect) - assert result == {'language': 'empty', 'language_details': None} + assert result == {'language': 'empty', 'language_details': 'empty'} def test_update_language_by_str():