From 59864e7d6ea461f744116d18bf573b3bcdf07421 Mon Sep 17 00:00:00 2001 From: Yanggq <1041206149@qq.com> Date: Mon, 25 Aug 2025 17:10:44 +0800 Subject: [PATCH] fix: unescaped dollar signs --- llm_web_kit/input/datajson.py | 2 ++ tests/llm_web_kit/extractor/test_extractor_chain.py | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/llm_web_kit/input/datajson.py b/llm_web_kit/input/datajson.py index 3ded4c2b..d1bfda07 100644 --- a/llm_web_kit/input/datajson.py +++ b/llm_web_kit/input/datajson.py @@ -221,6 +221,7 @@ def __process_nested_list(self, items, list_attribute, indent_level=0, exclude_i continue # 如果不是dict也不是list,跳过该项 item_text = item.get('c', '') + item_text = self.__escape_md_special_chars(item_text) # 创建列表项行 item_line = f'{indent}{list_prefix} {item_text}' @@ -320,6 +321,7 @@ def __content_lst_node_2_md(self, content_lst_node: dict, exclude_inline_types: if not title_content: return '' level = content_lst_node['content']['level'] + title_content = self.__escape_md_special_chars(title_content) md_title_level = '#' * int(level) md_title = f'{md_title_level} {title_content}' return md_title diff --git a/tests/llm_web_kit/extractor/test_extractor_chain.py b/tests/llm_web_kit/extractor/test_extractor_chain.py index a5f68540..38cec891 100644 --- a/tests/llm_web_kit/extractor/test_extractor_chain.py +++ b/tests/llm_web_kit/extractor/test_extractor_chain.py @@ -208,7 +208,9 @@ def test_mathlab_html_to_md(self): self.assertEqual(result['track_id'], 'mathlab_code') md_content = result.get_content_list().to_nlp_md() self.assertIn('### Use Integers for Index Variables', md_content) - self.assertIn('### Limit Use of `assert` Statements', md_content) + self.assertIn('### Limit Use of', md_content) + self.assertIn('assert', md_content) + self.assertIn('Statements', md_content) def test_list_to_md(self): """测试第三个数据:这个数据会丢失一些文本信息.""" @@ -230,7 +232,9 @@ def test_code_mix_in_list(self): input_data = DataJson(test_data) result = chain.extract(input_data) md_content = result.get_content_list().to_nlp_md() - self.assertIn('The descendant of `StandardizerActionRunner` interface has to provide', md_content) + self.assertIn('The descendant of ', md_content) + self.assertIn('StandardizerActionRunner', md_content) + self.assertIn('interface has to provide', md_content) def test_code_pre_mixed(self): chain = ExtractSimpleFactory.create(self.config)