From 595e673e73f2c60c637a17c5d98601db4b9a417e Mon Sep 17 00:00:00 2001 From: liukaiwen Date: Mon, 25 Aug 2025 17:28:32 +0800 Subject: [PATCH 1/2] : fix element dict layer key error where html has deeper layer than the template --- llm_web_kit/main_html_parser/parser/layout_batch_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llm_web_kit/main_html_parser/parser/layout_batch_parser.py b/llm_web_kit/main_html_parser/parser/layout_batch_parser.py index 616d72e9..468a5c52 100644 --- a/llm_web_kit/main_html_parser/parser/layout_batch_parser.py +++ b/llm_web_kit/main_html_parser/parser/layout_batch_parser.py @@ -151,7 +151,9 @@ def find_blocks_drop(self, element, depth, element_dict, parent_keyy, parent_lab length_tail = len(element.tail.strip()) idd = element.get('id') tag = element.tag - layer_nodes = element_dict[depth] + layer_nodes = element_dict.get(depth, []) + if len(layer_nodes) == 0: + return class_tag = element.get('class') ori_keyy = (tag, class_tag, idd) if idd and idd.strip(): From e409d66fb42d3df02a4b6b6d9406d3410cb37148 Mon Sep 17 00:00:00 2001 From: liukaiwen Date: Mon, 25 Aug 2025 19:28:10 +0800 Subject: [PATCH 2/2] : fix element dict layer key error where html has deeper layer than the template --- llm_web_kit/main_html_parser/parser/layout_batch_parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llm_web_kit/main_html_parser/parser/layout_batch_parser.py b/llm_web_kit/main_html_parser/parser/layout_batch_parser.py index 468a5c52..5259cea3 100644 --- a/llm_web_kit/main_html_parser/parser/layout_batch_parser.py +++ b/llm_web_kit/main_html_parser/parser/layout_batch_parser.py @@ -151,9 +151,7 @@ def find_blocks_drop(self, element, depth, element_dict, parent_keyy, parent_lab length_tail = len(element.tail.strip()) idd = element.get('id') tag = element.tag - layer_nodes = element_dict.get(depth, []) - if len(layer_nodes) == 0: - return + layer_nodes = element_dict.get(depth, {}) class_tag = element.get('class') ori_keyy = (tag, class_tag, idd) if idd and idd.strip():