From 2d3d323f3a2384f91cdde1205c8fd90ef95fb06e Mon Sep 17 00:00:00 2001 From: qiujiantao Date: Thu, 13 Mar 2025 12:22:39 +0800 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E7=A9=BA=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E8=BF=94=E5=9B=9E=E7=9A=84=E8=AF=AD=E8=A8=80=E8=AF=A6?= =?UTF-8?q?=E6=83=85=EF=BC=8C=E5=A2=9E=E5=8A=A0=E6=9C=AA=E5=AE=9A=E4=B9=89?= =?UTF-8?q?=E8=AF=AD=E8=A8=80=E7=9A=84=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- llm_web_kit/model/lang_id.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llm_web_kit/model/lang_id.py b/llm_web_kit/model/lang_id.py index 92121af7..03945638 100644 --- a/llm_web_kit/model/lang_id.py +++ b/llm_web_kit/model/lang_id.py @@ -234,7 +234,7 @@ def decide_language_func(content_str: str, lang_detect: LanguageIdentification) # return "empty" if the content string is empty if len(content_str.strip()) == 0: - return {'language': 'empty', 'language_details': None} + return {'language': 'empty', 'language_details': 'empty'} if lang_detect.version not in LANG_ID_SUPPORTED_VERSIONS: raise ValueError(f'Unsupported version: {lang_detect.version}. Supported versions: {LANG_ID_SUPPORTED_VERSIONS}') @@ -242,12 +242,13 @@ def decide_language_func(content_str: str, lang_detect: LanguageIdentification) predictions, probabilities = lang_detect.predict(content_str) language = decide_language_by_prob_v176(predictions, probabilities) - language_details = None if lang_detect.version == '218.bin': first_pred = predictions[0] # Extract the full label (e.g., __label__eng_Latn -> eng_Latn) if first_pred.startswith('__label__'): language_details = first_pred.replace('__label__', '') + else: + language_details = 'not_defined' return { 'language': language, From 726efbd04603a4334783e6fded6323e6f210ade8 Mon Sep 17 00:00:00 2001 From: qiujiantao Date: Thu, 13 Mar 2025 12:33:32 +0800 Subject: [PATCH 2/2] update test_lang_id:test_decide_language_func --- tests/llm_web_kit/model/test_lang_id.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/llm_web_kit/model/test_lang_id.py b/tests/llm_web_kit/model/test_lang_id.py index 2c1b1f96..5e1a3023 100644 --- a/tests/llm_web_kit/model/test_lang_id.py +++ b/tests/llm_web_kit/model/test_lang_id.py @@ -125,7 +125,7 @@ def test_decide_language_func(): lang_detect.version = '176.bin' lang_detect.predict.return_value = (['__label__en', '__label__zh'], [0.6, 0.4]) result = decide_language_func('test text', lang_detect) - assert result == {'language': 'en', 'language_details': None} + assert result == {'language': 'en', 'language_details': 'not_defined'} # Test for 218.bin version lang_detect.version = '218.bin' @@ -135,7 +135,7 @@ def test_decide_language_func(): # Test for empty string result = decide_language_func('', lang_detect) - assert result == {'language': 'empty', 'language_details': None} + assert result == {'language': 'empty', 'language_details': 'empty'} def test_update_language_by_str():