diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..97015be
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,8 @@
+# LLM Configuration
+LLM_BASE_URL=https://api.openai.com/v1
+LLM_API_KEY=your-api-key-here
+LLM_MODEL=gpt-4o
+USE_LLM=True
+
+# Debug
+METRICS_DEBUG=False
diff --git a/.gitignore b/.gitignore
index 27dc5a1..1badaeb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,6 +47,9 @@ coverage.xml
 
 webmainbench.egg-info/*
 
+# cache files
+webmainbench/.cache/
+
 # PyPI packaging
 build/
 dist/
diff --git a/README.md b/README.md
index ca863b6..56e0bf7 100644
--- a/README.md
+++ b/README.md
@@ -148,31 +148,28 @@ hf_hub_download(
 )
 ```
 
+### Configure LLM (Optional)
+
+LLM-enhanced content splitting improves formula/table/code extraction accuracy. To enable it, copy `.env.example` to `.env` and fill in your API credentials:
+
+```bash
+cp .env.example .env
+# Edit .env and set LLM_BASE_URL, LLM_API_KEY, LLM_MODEL
+```
+
 ### Run an Evaluation
 
 ```python
 from webmainbench import DataLoader, Evaluator, ExtractorFactory
 
 dataset = DataLoader.load_jsonl("data/WebMainBench_545.jsonl")
-extractor = ExtractorFactory.create("trafilatura")
+result = Evaluator().evaluate(dataset, ExtractorFactory.create("trafilatura"))
 
-evaluator = Evaluator(llm_config={
-    "use_llm": True,
-    "llm_base_url": "https://api.openai.com/v1",
-    "llm_api_key": "sk-xxxxxxxxxxxx",
-    "llm_model": "gpt-4o",
-})
-result = evaluator.evaluate(dataset, extractor)
+m = result.overall_metrics
 
 print(f"Overall Score: {result.overall_metrics['overall']:.4f}")
 ```
 
-If you don't need LLM-enhanced content splitting (for formula/table/code extraction), disable it explicitly:
-
-```python
-evaluator = Evaluator(llm_config={"use_llm": False})
-```
-
 ### Compare Multiple Extractors
 
 ```python
diff --git a/README_zh.md b/README_zh.md
index 3694d3b..19e225c 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -148,31 +148,28 @@ hf_hub_download(
 )
 ```
 
+### 配置 LLM（可选）
+
+LLM 增强内容拆分可提升公式/表格/代码的抽取精度。如需启用，将 `.env.example` 复制为 `.env` 并填写 API 信息：
+
+```bash
+cp .env.example .env
+# 编辑 .env，设置 LLM_BASE_URL、LLM_API_KEY、LLM_MODEL
+```
+
 ### 运行评测
 
 ```python
 from webmainbench import DataLoader, Evaluator, ExtractorFactory
 
 dataset = DataLoader.load_jsonl("data/WebMainBench_545.jsonl")
-extractor = ExtractorFactory.create("trafilatura")
+result = Evaluator().evaluate(dataset, ExtractorFactory.create("trafilatura"))
 
-evaluator = Evaluator(llm_config={
-    "use_llm": True,
-    "llm_base_url": "https://api.openai.com/v1",
-    "llm_api_key": "sk-xxxxxxxxxxxx",
-    "llm_model": "gpt-4o",
-})
-result = evaluator.evaluate(dataset, extractor)
+m = result.overall_metrics
 
 print(f"Overall Score: {result.overall_metrics['overall']:.4f}")
 ```
 
-如不需要 LLM 增强内容拆分（用于公式/表格/代码抽取），可显式关闭：
-
-```python
-evaluator = Evaluator(llm_config={"use_llm": False})
-```
-
 ### 多抽取器对比
 
 ```python
diff --git a/examples/basic_usage.py b/examples/basic_usage.py
index 9e5c786..93af060 100755
--- a/examples/basic_usage.py
+++ b/examples/basic_usage.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python3
 """
-WebMainBench 基本使用示例
+WebMainBench Basic Usage Example
 """
 
 import json
 from pathlib import Path
 
-# 导入 WebMainBench 模块
+# Import WebMainBench modules
 from webmainbench import (
     DataLoader, DataSaver, BenchmarkDataset, DataSample,
     ExtractorFactory, Evaluator, 
@@ -15,9 +15,9 @@
 
 
 def create_sample_dataset():
-    """创建示例数据集"""
-    
-    # 创建示例数据 - 包含多种内容类型（代码、公式、表格等）
+    """Create a sample dataset"""
+
+    # Create sample data - includes multiple content types (code, formulas, tables, etc.)
     samples = [
         {
             "track_id": "sample-001-programming-tutorial",
@@ -280,8 +280,8 @@ def quicksort(arr):
         }
     ]
     
-    # 创建数据集
-    dataset = BenchmarkDataset(name="sample_dataset", description="示例评测数据集")
+    # Create dataset
+    dataset = BenchmarkDataset(name="sample_dataset", description="Sample evaluation dataset")
     
     for sample_data in samples:
         sample = DataSample.from_dict(sample_data)
@@ -291,121 +291,121 @@ def quicksort(arr):
 
 
 def demo_basic_mock_evaluation():
-    """演示基本评测流程"""
-    
-    print("=== WebMainBench 基本使用示例 ===\n")
-    
-    # 设置日志
+    """Demonstrate the basic evaluation workflow"""
+
+    print("=== WebMainBench Basic Usage Example ===\n")
+
+    # Set up logging
     setup_logging(level="INFO")
-    
-    # 1. 创建或加载数据集
-    print("1. 创建示例数据集...")
+
+    # 1. Create or load dataset
+    print("1. Creating sample dataset...")
     dataset = create_sample_dataset()
-    print(f"数据集包含 {len(dataset)} 个样本")
-    print(f"数据集统计: {dataset.get_statistics()}\n")
-    
-    # 2. 保存数据集到文件
+    print(f"Dataset contains {len(dataset)} samples")
+    print(f"Dataset statistics: {dataset.get_statistics()}\n")
+
+    # 2. Save dataset to file
     data_dir = Path("data")
     data_dir.mkdir(exist_ok=True)
-    
+
     dataset_path = data_dir / "sample_dataset.jsonl"
     DataSaver.save_jsonl(dataset, dataset_path, include_results=False)
-    print(f"数据集已保存到: {dataset_path}\n")
-    
-    # 3. 重新加载数据集
-    print("2. 重新加载数据集...")
+    print(f"Dataset saved to: {dataset_path}\n")
+
+    # 3. Reload dataset
+    print("2. Reloading dataset...")
     loaded_dataset = DataLoader.load_jsonl(dataset_path)
-    print(f"加载的数据集包含 {len(loaded_dataset)} 个样本\n")
-    
-    # 4. 列出可用的抽取器
-    print("3. 可用的抽取器:")
+    print(f"Loaded dataset contains {len(loaded_dataset)} samples\n")
+
+    # 4. List available extractors
+    print("3. Available extractors:")
     available_extractors = ExtractorFactory.list_available()
     for extractor_name in available_extractors:
         print(f"  - {extractor_name}")
     print()
-    
-    # 5. 创建评测器
-    print("4. 创建评测器...")
+
+    # 5. Create evaluator
+    print("4. Creating evaluator...")
     evaluator = Evaluator()
-    print(f"可用的评测指标: {evaluator.metric_calculator.list_available_metrics()}\n")
-    
-    # 6. 创建一个模拟抽取器进行演示
-    print("5. 创建模拟抽取器...")
-    
+    print(f"Available evaluation metrics: {evaluator.metric_calculator.list_available_metrics()}\n")
+
+    # 6. Create a mock extractor for demonstration
+    print("5. Creating mock extractor...")
+
     from webmainbench.extractors import BaseExtractor, ExtractionResult
-    
+
     class MockExtractor(BaseExtractor):
-        """模拟抽取器，用于演示"""
-        
+        """Mock extractor for demonstration"""
+
         def _setup(self):
             pass
-        
+
         def _extract_content(self, html, url=None):
-            # 简单的模拟抽取逻辑
-            if "标题" in html:
-                content = "# 提取的标题\n\n提取的正文内容。"
+            # Simple mock extraction logic
+            if "heading" in html.lower() or "title" in html.lower():
+                content = "# Extracted Title\n\nExtracted body content."
                 content_list = [
-                    {"type": "heading", "content": "提取的标题", "level": 1},
-                    {"type": "paragraph", "content": "提取的正文内容。"}
+                    {"type": "heading", "content": "Extracted Title", "level": 1},
+                    {"type": "paragraph", "content": "Extracted body content."}
                 ]
             else:
-                content = "提取的内容"
-                content_list = [{"type": "paragraph", "content": "提取的内容"}]
-            
+                content = "Extracted content"
+                content_list = [{"type": "paragraph", "content": "Extracted content"}]
+
             return ExtractionResult(
                 content=content,
                 content_list=content_list,
                 success=True,
                 confidence_score=0.85
             )
-    
-    # 注册模拟抽取器
+
+    # Register mock extractor
     ExtractorFactory.register("mock", MockExtractor)
     mock_extractor = ExtractorFactory.create("mock")
-    print("模拟抽取器已创建\n")
-    
-    # 7. 运行评测
-    print("6. 运行评测...")
+    print("Mock extractor created\n")
+
+    # 7. Run evaluation
+    print("6. Running evaluation...")
     result = evaluator.evaluate(
         dataset=loaded_dataset,
         extractor=mock_extractor,
-        max_samples=2  # 限制样本数量用于演示
+        max_samples=2  # Limit sample count for demonstration
     )
-    
-    # 8. 显示结果
-    print("\n7. 评测结果:")
+
+    # 8. Display results
+    print("\n7. Evaluation results:")
     print("=" * 50)
     formatted_results = format_results(result.to_dict())
     print(formatted_results)
-    
-    # 9. 保存结果
+
+    # 9. Save results
     results_dir = Path("results")
     results_dir.mkdir(exist_ok=True)
-    
+
     results_path = results_dir / "mock_evaluation_results.json"
     DataSaver.save_evaluation_results(result, results_path)
-    print(f"\n结果已保存到: {results_path}")
-    
-    # 10. 生成报告
+    print(f"\nResults saved to: {results_path}")
+
+    # 10. Generate report
     report_path = results_dir / "mock_evaluation_report.csv"
     DataSaver.save_summary_report(result, report_path)
-    print(f"报告已保存到: {report_path}")
+    print(f"Report saved to: {report_path}")
 
 
 def demo_llm_webkit_evaluation():
-    """演示LLM-WebKit抽取器的6项指标评测"""
-    
-    print("=== LLM-WebKit Extractor 6项指标评测示例 ===\n")
-    
-    # 设置日志
+    """Demonstrate 6-metric evaluation with LLM-WebKit extractor"""
+
+    print("=== LLM-WebKit Extractor 6-Metric Evaluation Example ===\n")
+
+    # Set up logging
     setup_logging(level="INFO")
-    
-    # 1. 创建包含各种内容类型的测试数据集
-    print("1. 创建包含多种内容类型的测试数据集...")
-    
+
+    # 1. Create test dataset with various content types
+    print("1. Creating test dataset with multiple content types...")
+
     samples = []
-    
-    # 样本1: 包含文本和代码
+
+    # Sample 1: text and code
     samples.append(DataSample(
         id="text_code_sample",
         html="""
@@ -440,7 +440,7 @@ def hello_world():
             {"type": "text", "content": "以上代码展示了一个简单的Python函数。"}
         ]
     ))
-    
+
     # 样本2: 包含表格
     samples.append(DataSample(
         id="table_sample",
@@ -483,7 +483,7 @@ def hello_world():
             {"type": "table", "content": "| 产品 | 销量 | 收入 |\n|------|------|------|\n| 产品A | 100 | 1000 |\n| 产品B | 200 | 3000 |"}
         ]
     ))
-    
+
     # 样本3: 包含公式
     samples.append(DataSample(
         id="formula_sample",
@@ -511,238 +511,237 @@ def hello_world():
             {"type": "formula", "content": "\\int_{-\\infty}^{\\infty} e^{-x^2} dx = \\sqrt{\\pi}"}
         ]
     ))
-    
-    # 创建数据集并添加样本
-    dataset = BenchmarkDataset(name="llm_webkit_test", description="LLM-WebKit 6项指标测试数据集")
+
+    # Create dataset and add samples
+    dataset = BenchmarkDataset(name="llm_webkit_test", description="LLM-WebKit 6-metric test dataset")
     for sample in samples:
         dataset.add_sample(sample)
-    
-    print(f"测试数据集包含 {len(dataset)} 个样本")
-    print(f"样本类型: 文本+代码, 表格, 公式\n")
-    
-    # 2. 创建LLM-WebKit抽取器
-    print("2. 创建LLM-WebKit抽取器...")
-    
-    # 显示所有可用的抽取器
+
+    print(f"Test dataset contains {len(dataset)} samples")
+    print(f"Sample types: text+code, table, formula\n")
+
+    # 2. Create LLM-WebKit extractor
+    print("2. Creating LLM-WebKit extractor...")
+
+    # Show all available extractors
     available_extractors = ExtractorFactory.list_available()
-    print(f"可用的抽取器: {available_extractors}")
-    
-    # 直接创建LLM-WebKit抽取器，设置模型路径
+    print(f"Available extractors: {available_extractors}")
+
+    # Create LLM-WebKit extractor directly with model path
     config = {
         "model_path": "/Users/chupei/model/checkpoint-3296"
     }
     extractor = ExtractorFactory.create("llm-webkit", config=config)
-    print(f"✅ LLM-WebKit抽取器创建成功，模型路径: {config['model_path']}")
-    
+    print(f"LLM-WebKit extractor created successfully, model path: {config['model_path']}")
+
     print()
-    
-    # 3. 创建评测器并显示所有可用指标
-    print("3. 创建评测器...")
+
+    # 3. Create evaluator and show all available metrics
+    print("3. Creating evaluator...")
     evaluator = Evaluator()
     available_metrics = evaluator.metric_calculator.list_available_metrics()
-    print(f"✅ 可用的评测指标 ({len(available_metrics)}项):")
-    
-    # 按照6项指标分类显示
+    print(f"Available evaluation metrics ({len(available_metrics)} total):")
+
+    # Display by the 6 metric categories
     target_metrics = ["overall", "text_edit", "code_edit", "table_edit", "table_TEDS", "formula_edit"]
-    
+
     for metric in target_metrics:
         if metric in available_metrics:
-            print(f"  ✅ {metric}")
+            print(f"  {metric}")
         else:
-            print(f"  ❌ {metric} (未注册)")
-    
+            print(f"  {metric} (not registered)")
+
     print()
-    
-    # 4. 运行评测
-    print("4. 开始评测...")
+
+    # 4. Run evaluation
+    print("4. Starting evaluation...")
     print("=" * 60)
-    
+
     result = evaluator.evaluate(
         dataset=dataset,
         extractor=extractor,
-        max_samples=None  # 评测所有样本
+        max_samples=None  # Evaluate all samples
     )
-    
-    # 5. 显示详细的6项指标结果
-    print("\n5. 📊 6项指标详细评测结果:")
+
+    # 5. Display detailed 6-metric results
+    print("\n5. 6-metric detailed evaluation results:")
     print("=" * 60)
-    
+
     results_dict = result.to_dict()
-    
-    # 从overall_metrics中提取指标结果
+
+    # Extract metric results from overall_metrics
     metrics = results_dict.get('overall_metrics', {})
-    
-    # 按照指标分类显示
-    print(f"\n🏆 综合指标:")
+
+    # Display by metric category
+    print(f"\nOverall metrics:")
     if 'overall' in metrics:
-        print(f"  overall (综合得分): {metrics['overall']:.4f}")
+        print(f"  overall (combined score): {metrics['overall']:.4f}")
     else:
-        print("  overall: 未计算")
-    
-    print(f"\n📝 文本相关指标:")
+        print("  overall: not calculated")
+
+    print(f"\nText-related metrics:")
     if 'text_edit' in metrics:
-        print(f"  text_edit (文本编辑距离): {metrics['text_edit']:.4f}")
+        print(f"  text_edit (text edit distance): {metrics['text_edit']:.4f}")
     else:
-        print("  text_edit: 未计算")
+        print("  text_edit: not calculated")
     if 'code_edit' in metrics:
-        print(f"  code_edit (代码编辑距离): {metrics['code_edit']:.4f}")
+        print(f"  code_edit (code edit distance): {metrics['code_edit']:.4f}")
     else:
-        print("  code_edit: 未计算")
-    
-    print(f"\n📊 表格相关指标:")
+        print("  code_edit: not calculated")
+
+    print(f"\nTable-related metrics:")
     if 'table_edit' in metrics:
-        print(f"  table_edit (表格编辑距离): {metrics['table_edit']:.4f}")
+        print(f"  table_edit (table edit distance): {metrics['table_edit']:.4f}")
     else:
-        print("  table_edit: 未计算")
+        print("  table_edit: not calculated")
     if 'table_TEDS' in metrics:
-        print(f"  table_TEDS (表格结构相似度): {metrics['table_TEDS']:.4f}")
+        print(f"  table_TEDS (table structure similarity): {metrics['table_TEDS']:.4f}")
     else:
-        print("  table_TEDS: 未计算")
-    
-    print(f"\n🧮 公式相关指标:")
+        print("  table_TEDS: not calculated")
+
+    print(f"\nFormula-related metrics:")
     if 'formula_edit' in metrics:
-        print(f"  formula_edit (公式编辑距离): {metrics['formula_edit']:.4f}")
+        print(f"  formula_edit (formula edit distance): {metrics['formula_edit']:.4f}")
     else:
-        print("  formula_edit: 未计算")
-    
-    print(f"\n📈 详细统计:")
-    print(f"  总样本数: {len(dataset)}")
+        print("  formula_edit: not calculated")
+
+    print(f"\nDetailed statistics:")
+    print(f"  Total samples: {len(dataset)}")
     success_count = len([s for s in results_dict.get('sample_results', []) if s.get('extraction_success', False)])
     failure_count = len(dataset) - success_count
-    print(f"  成功样本数: {success_count}")
-    print(f"  失败样本数: {failure_count}")
-    
-    # 6. 保存结果到文件
+    print(f"  Successful samples: {success_count}")
+    print(f"  Failed samples: {failure_count}")
+
+    # 6. Save results to file
     print("\n" + "=" * 60)
-    print("6. 保存评测结果...")
-    
+    print("6. Saving evaluation results...")
+
     results_dir = Path("results")
     results_dir.mkdir(exist_ok=True)
-    
-    # 保存详细结果
+
+    # Save detailed results
     results_path = results_dir / "llm_webkit_evaluation_results.json"
-    DataSaver.save_evaluation_results(result, results_path)  # 直接传递result对象
-    print(f"✅ 详细结果已保存到: {results_path}")
-    
-    # 生成CSV报告
+    DataSaver.save_evaluation_results(result, results_path)  # Pass result object directly
+    print(f"Detailed results saved to: {results_path}")
+
+    # Generate CSV report
     report_path = results_dir / "llm_webkit_evaluation_report.csv"
-    DataSaver.save_summary_report(result, report_path)  # 直接传递result对象
-    print(f"✅ CSV报告已保存到: {report_path}")
-    
+    DataSaver.save_summary_report(result, report_path)  # Pass result object directly
+    print(f"CSV report saved to: {report_path}")
+
     print("\n" + "=" * 60)
-    print("✅ LLM-WebKit 6项指标评测完成！")
+    print("LLM-WebKit 6-metric evaluation complete!")
 
 
 def demo_dataset_with_extraction():
-    """演示保存带有抽取内容的数据集"""
-    print("=== 演示：保存带有抽取内容的数据集 ===")
-    
+    """Demonstrate saving a dataset with extracted content"""
+    print("=== Demo: Saving a Dataset with Extracted Content ===")
+
     from webmainbench import DataLoader, DataSaver, Evaluator, ExtractorFactory
     from pathlib import Path
-    
-    # 配置文件路径
+
+    # Configure file paths
     data_dir = Path("data")
     dataset_path = data_dir / "sample_dataset.jsonl"
     # dataset_path = "/Users/chupei/Downloads/WebMainBench_dataset_merge_2549.jsonl"
-    
-    print(f"📂 数据集文件: {dataset_path}")
-    
-    # 🔧 创建llm-webkit抽取器（统一使用）
+
+    print(f"Dataset file: {dataset_path}")
+
+    # Create llm-webkit extractor (used uniformly)
     extractor_config = {"model_path": "/Users/chupei/model/checkpoint-3296"}
     extractor = ExtractorFactory.create("llm-webkit", config=extractor_config)
-    print(f"🤖 使用抽取器: {extractor.name}")
-    
-    # 创建评测器
+    print(f"Using extractor: {extractor.name}")
+
+    # Create evaluator
     evaluator = Evaluator()
-    
-    # 🔧 选择评测模式：内存模式 vs 批处理模式
-    USE_BATCHED_MODE = True  # 设置为True使用批处理模式（适用于大数据集）
-    
+
+    # Choose evaluation mode: in-memory mode vs batched mode
+    USE_BATCHED_MODE = True  # Set to True to use batched mode (suitable for large datasets)
+
     if USE_BATCHED_MODE:
-        print("🔄 使用批处理模式（内存优化）")
-        
-        # 🚀 批处理评测（适用于大数据集）
+        print("Using batched mode (memory-optimized)")
+
+        # Batched evaluation (suitable for large datasets)
         result = evaluator.evaluate_batched(
             jsonl_file_path=dataset_path,
-            extractor=extractor,  # 直接传递extractor对象
-            batch_size=10,        # 小批次
-            max_samples=20        # 演示用
+            extractor=extractor,  # Pass extractor object directly
+            batch_size=10,        # Small batch size
+            max_samples=20        # For demonstration
         )
-        print(f"✅ 批处理评测完成，总体得分: {result.overall_metrics.get('overall', 0):.4f}")
-        
-        # 为了保存带有抽取内容的数据集，需要重新加载原始数据集
-        # 注：这里只是短暂加载用于保存，不影响前面的内存优化评测
+        print(f"Batched evaluation complete, overall score: {result.overall_metrics.get('overall', 0):.4f}")
+
+        # To save the dataset with extraction content, reload the original dataset temporarily
+        # Note: this is only a brief load for saving and does not affect the memory-optimized evaluation above
         dataset = DataLoader.load_jsonl(dataset_path, include_results=False)
         dataset.name = result.dataset_name
-            
+
     else:
-        print("🔄 使用传统内存模式")
-        
-        # 从文件加载数据集
-        print(f"📂 从文件加载数据集: {dataset_path}")
+        print("Using traditional in-memory mode")
+
+        # Load dataset from file
+        print(f"Loading dataset from file: {dataset_path}")
         dataset = DataLoader.load_jsonl(dataset_path, include_results=False)
         dataset.name = "WebMainBench_with_extraction"
-        dataset.description = "演示抽取内容保存的测试数据集"
-        
-        print(f"📊 加载数据集完成，包含 {len(dataset.samples)} 个样本")
-        
-        # 运行评测
+        dataset.description = "Test dataset demonstrating extraction content saving"
+
+        print(f"Dataset loaded, contains {len(dataset.samples)} samples")
+
+        # Run evaluation
         result = evaluator.evaluate(dataset, extractor)
-    
-    print(f"✅ 评测完成，总体得分: {result.overall_metrics.get('overall', 0):.4f}")
-    
-    # 保存带有抽取内容的数据集
+
+    print(f"Evaluation complete, overall score: {result.overall_metrics.get('overall', 0):.4f}")
+
+    # Save dataset with extracted content
     results_dir = Path("results")
     enriched_dataset_path = results_dir / f"{dataset.name}_with_{extractor.name}_extraction.jsonl"
-    
+
     DataSaver.save_dataset_with_extraction(
         results=result,
-        dataset=dataset, 
+        dataset=dataset,
         file_path=enriched_dataset_path,
         extractor_name=extractor.name
     )
-    
-    print(f"💾 已保存带有抽取内容的数据集到: {enriched_dataset_path}")
-    
-    # 保存评测结果和摘要报告
+
+    print(f"Dataset with extracted content saved to: {enriched_dataset_path}")
+
+    # Save evaluation results and summary report
     evaluation_results_path = results_dir / f"{dataset.name}_{extractor.name}_evaluation_results.json"
     summary_report_path = results_dir / f"{dataset.name}_{extractor.name}_evaluation_report.csv"
-    
+
     DataSaver.save_evaluation_results(result, evaluation_results_path)
     DataSaver.save_summary_report(result, summary_report_path)
-    
-    print(f"📊 已保存评测结果到: {evaluation_results_path}")
-    print(f"📈 已保存摘要报告到: {summary_report_path}")
-    
-    # 显示保存的字段信息
-    print("\n📋 保存的新字段包括:")
-    print(f"  - {extractor.name}_content: 抽取的内容")
-    print(f"  - {extractor.name}_content_list: 抽取的结构化内容列表")
-    print(f"  - {extractor.name}_success: 抽取是否成功")
-    print(f"  - {extractor.name}_time: 抽取耗时")
-    print(f"  - {extractor.name}_*_score: 各项指标分数")
+
+    print(f"Evaluation results saved to: {evaluation_results_path}")
+    print(f"Summary report saved to: {summary_report_path}")
+
+    # Display saved field info
+    print("\nNewly saved fields include:")
+    print(f"  - {extractor.name}_content: extracted content")
+    print(f"  - {extractor.name}_content_list: extracted structured content list")
+    print(f"  - {extractor.name}_success: whether extraction succeeded")
+    print(f"  - {extractor.name}_time: extraction time")
+    print(f"  - {extractor.name}_*_score: metric scores")
 
 
 def demo_multi_extraction():
-    """演示保存带有多个抽取器抽取内容的数据集（支持批处理模式）"""
-    print("=== 演示：保存带有多个抽取器抽取内容的数据集 ===")
+    """Demonstrate saving a dataset with content from multiple extractors (supports batched mode)"""
+    print("=== Demo: Saving a Dataset with Multiple Extractor Results ===")
 
     from webmainbench import DataLoader, DataSaver, Evaluator, ExtractorFactory
     from pathlib import Path
     import time
 
-
-    # 设置日志
+    # Set up logging
     setup_logging(level="INFO")
 
-    # 配置文件路径
+    # Configure file paths
     data_dir = Path("../data")
     # dataset_path = data_dir / "sample_dataset.jsonl"
     dataset_path = "/home/lulindong/Pycharm_projects/cc/WebMainBench_1904_v1_WebMainBench_dataset_merge_with_llm_webkit.jsonl"
 
-    print(f"📂 数据集文件: {dataset_path}")
+    print(f"Dataset file: {dataset_path}")
 
-    # 🔧 定义要使用的抽取器列表及配置
+    # Define list of extractors and their configurations
     extractors_info = [
         {"name": "resiliparse", "config": {
             "main_content": True,
@@ -755,68 +754,68 @@ def demo_multi_extraction():
         {"name": "magic-html", "config": {}},
     ]
 
-    # 🔧 选择评测模式：内存模式 vs 批处理模式
-    USE_BATCHED_MODE = True  # 大数据集建议设为True
-    BATCH_SIZE = 10  # 批处理大小
-    MAX_SAMPLES = None  # 演示用（全量评测可设为None）
+    # Choose evaluation mode: in-memory mode vs batched mode
+    USE_BATCHED_MODE = True  # Recommended True for large datasets
+    BATCH_SIZE = 10  # Batch size
+    MAX_SAMPLES = None  # For demonstration (set None for full evaluation)
 
-    # 创建结果目录
+    # Create results directory
     results_dir = Path("results")
     results_dir.mkdir(exist_ok=True)
 
-    # 存储所有抽取器的评测结果和性能数据
+    # Store evaluation results and performance data for all extractors
     all_results = []
     extractor_performance = []
 
-    # 为每个抽取器运行评测
+    # Run evaluation for each extractor
     for info in extractors_info:
         extractor_name = info["name"]
         config = info["config"]
 
         try:
-            # 创建抽取器实例
+            # Create extractor instance
             extractor = ExtractorFactory.create(extractor_name, config=config)
-            print(f"\n🤖 使用抽取器: {extractor.name}")
+            print(f"\nUsing extractor: {extractor.name}")
         except Exception as e:
-            print(f"⚠️ {extractor_name} 抽取器创建失败: {e}")
+            print(f"Failed to create extractor {extractor_name}: {e}")
             continue
 
-        # 记录总耗时
+        # Record total elapsed time
         start_time = time.time()
 
-        # 初始化评测器
+        # Initialize evaluator
         evaluator = Evaluator()
 
-        # 选择批处理模式或传统模式
+        # Choose batched or traditional mode
         if USE_BATCHED_MODE:
-            print(f"🔄 使用批处理模式（批大小: {BATCH_SIZE}，最大样本: {MAX_SAMPLES or '全部'}）")
-            # 批处理评测（内存优化）
+            print(f"Using batched mode (batch size: {BATCH_SIZE}, max samples: {MAX_SAMPLES or 'all'})")
+            # Batched evaluation (memory-optimized)
             result = evaluator.evaluate_batched(
                 jsonl_file_path=dataset_path,
                 extractor=extractor,
                 batch_size=BATCH_SIZE,
                 max_samples=MAX_SAMPLES
             )
-            # 为保存数据集，临时加载原始数据（不影响内存优化）
+            # Temporarily load original data for saving (does not affect memory-optimized evaluation)
             dataset = DataLoader.load_jsonl(dataset_path, include_results=False, max_samples=MAX_SAMPLES)
             dataset.name = result.dataset_name
         else:
-            print("🔄 使用传统内存模式")
-            # 加载完整数据集到内存
+            print("Using traditional in-memory mode")
+            # Load full dataset into memory
             dataset = DataLoader.load_jsonl(dataset_path, include_results=False, max_samples=MAX_SAMPLES)
             dataset.name = "WebMainBench_with_multi_extraction"
-            dataset.description = "多抽取器内容保存演示数据集"
-            print(f"📊 加载数据集完成，包含 {len(dataset.samples)} 个样本")
+            dataset.description = "Multi-extractor content saving demo dataset"
+            print(f"Dataset loaded, contains {len(dataset.samples)} samples")
 
-            # 传统模式评测
+            # Traditional mode evaluation
             result = evaluator.evaluate(dataset, extractor)
 
-        # 计算耗时指标
+        # Calculate elapsed time metrics
         total_time = time.time() - start_time
         total_samples = len(dataset.samples)
         avg_time_per_sample = total_time / total_samples if total_samples else 0
 
-        # 保存性能数据
+        # Save performance data
         extractor_performance.append({
             "name": extractor_name,
             "total_samples": total_samples,
@@ -824,19 +823,19 @@ def demo_multi_extraction():
             "avg_time_per_sample": avg_time_per_sample
         })
 
-        # 输出评测结果
-        print(f"⏱️ 总耗时: {total_time:.4f}秒（单样本平均: {avg_time_per_sample:.4f}秒）")
-        print(f"📊 核心指标:")
+        # Output evaluation results
+        print(f"Total time: {total_time:.4f}s (avg per sample: {avg_time_per_sample:.4f}s)")
+        print(f"Core metrics:")
         print(f"   code_edit: {result.overall_metrics.get('code_edit', 0):.4f}")
         print(f"   formula_edit: {result.overall_metrics.get('formula_edit', 0):.4f}")
         print(f"   table_TEDS: {result.overall_metrics.get('table_TEDS', 0):.4f}")
         print(f"   table_edit: {result.overall_metrics.get('table_edit', 0):.4f}")
         print(f"   text_edit: {result.overall_metrics.get('text_edit', 0):.4f}")
-        print(f"✅ 总体得分: {result.overall_metrics.get('overall', 0):.4f}")
+        print(f"Overall score: {result.overall_metrics.get('overall', 0):.4f}")
 
         all_results.append(result)
 
-        # 保存带有当前抽取器内容的数据集
+        # Save dataset with current extractor's content
         enriched_dataset_path = results_dir / f"{dataset.name}_{extractor.name}_extraction_infer.jsonl"
         DataSaver.save_dataset_with_extraction(
             results=result,
@@ -844,153 +843,153 @@ def demo_multi_extraction():
             file_path=enriched_dataset_path,
             extractor_name=extractor.name
         )
-        print(f"💾 已保存抽取内容到: {enriched_dataset_path}")
+        print(f"Extracted content saved to: {enriched_dataset_path}")
 
-        # 保存单个抽取器的评测结果
+        # Save individual extractor evaluation results
         eval_results_path = results_dir / f"{dataset.name}_{extractor.name}_evaluation_results.json"
         DataSaver.save_evaluation_results(result, eval_results_path)
-        print(f"📋 已保存评测结果到: {eval_results_path}")
+        print(f"Evaluation results saved to: {eval_results_path}")
 
-    # 保存所有抽取器的汇总报告
+    # Save summary report for all extractors
     if all_results:
         summary_path = results_dir / f"{dataset.name}_multi_extractors_summary_report.csv"
         DataSaver.save_summary_report(all_results, summary_path)
-        print(f"\n📈 已保存汇总报告到: {summary_path}")
+        print(f"\nSummary report saved to: {summary_path}")
 
-    # 展示性能对比
+    # Display performance comparison
     if extractor_performance:
-        print("\n⚡ 抽取器性能对比:")
+        print("\nExtractor performance comparison:")
         for perf in extractor_performance:
             print(f"  {perf['name']}:")
-            print(f"    样本数: {perf['total_samples']}")
-            print(f"    总耗时: {perf['total_time']:.4f}秒")
-            print(f"    单样本耗时: {perf['avg_time_per_sample']:.4f}秒")
-            print(f"    效率: {1 / perf['avg_time_per_sample']:.2f}样本/秒")
+            print(f"    Samples: {perf['total_samples']}")
+            print(f"    Total time: {perf['total_time']:.4f}s")
+            print(f"    Time per sample: {perf['avg_time_per_sample']:.4f}s")
+            print(f"    Throughput: {1 / perf['avg_time_per_sample']:.2f} samples/s")
 
-    # 展示保存的字段信息
-    print("\n📋 保存的新字段说明:")
+    # Display saved field information
+    print("\nSaved new field descriptions:")
     for info in extractors_info:
         name = info["name"]
-        print(f"  {name}相关字段:")
-        print(f"    - {name}_content: 抽取的原始内容")
-        print(f"    - {name}_content_list: 结构化内容列表（含type字段）")
-        print(f"    - {name}_success: 抽取是否成功（布尔值）")
-        print(f"    - {name}_time: 单样本抽取耗时（秒）")
-        print(f"    - {name}_*_score: 各指标得分（如{name}_text_edit）")
+        print(f"  {name} related fields:")
+        print(f"    - {name}_content: extracted raw content")
+        print(f"    - {name}_content_list: structured content list (with type field)")
+        print(f"    - {name}_success: whether extraction succeeded (boolean)")
+        print(f"    - {name}_time: per-sample extraction time (seconds)")
+        print(f"    - {name}_*_score: metric scores (e.g. {name}_text_edit)")
 
 
 def demo_llm_webkit_with_preprocessed_html_evaluation():
-    """演示LLM-WebKit预处理HTML功能的评测"""
-    
-    print("\n=== LLM-WebKit 预处理HTML功能演示 ===\n")
-    
-    # 设置日志
+    """Demonstrate evaluation of LLM-WebKit preprocessed HTML feature"""
+
+    print("\n=== LLM-WebKit Preprocessed HTML Feature Demo ===\n")
+
+    # Set up logging
     setup_logging(level="INFO")
-    
-    # 1. 从真实数据集加载包含预处理HTML的数据
-    print("1. 从真实数据集加载预处理HTML数据...")
+
+    # 1. Load preprocessed HTML data from the real dataset
+    print("1. Loading preprocessed HTML data from the real dataset...")
     dataset_path = Path("data/track_id_diff_result_56.jsonl")
-    print(f"📂 数据集文件: {dataset_path}")
-    
-    # 加载数据集
+    print(f"Dataset file: {dataset_path}")
+
+    # Load dataset
     dataset = DataLoader.load_jsonl(dataset_path, include_results=False)
     dataset.name = "real_preprocessed_html_test"
-    dataset.description = "基于真实数据的预处理HTML功能测试"
-    
-    print(f"✅ 真实数据集加载成功，包含 {len(dataset)} 个样本")
-    print("📋 真实数据样本包含:")
-    print("  - html: 原始网页HTML")
-    print("  - llm_webkit_html: LLM预处理后的简化HTML（包含_item_id标记）")
-    print("  - groundtruth_content: 人工标注的标准答案")
-    print("  - llm_webkit_md: LLM提取的markdown内容")
-    
-    
-    # 2. 创建预处理HTML模式的LLM-WebKit抽取器
-    print("2. 创建预处理HTML模式的LLM-WebKit抽取器...")
-    
+    dataset.description = "Preprocessed HTML feature test based on real data"
+
+    print(f"Real dataset loaded successfully, contains {len(dataset)} samples")
+    print("Real data samples include:")
+    print("  - html: raw web page HTML")
+    print("  - llm_webkit_html: LLM-preprocessed simplified HTML (with _item_id markers)")
+    print("  - groundtruth_content: manually annotated ground truth")
+    print("  - llm_webkit_md: LLM-extracted markdown content")
+
+
+    # 2. Create LLM-WebKit extractor in preprocessed HTML mode
+    print("2. Creating LLM-WebKit extractor in preprocessed HTML mode...")
+
     config = {
-        "use_preprocessed_html": True,          # 🔑 关键配置：启用预处理HTML模式
-        "preprocessed_html_field": "llm_webkit_html"  # 指定预处理HTML字段名
+        "use_preprocessed_html": True,          # Key config: enable preprocessed HTML mode
+        "preprocessed_html_field": "llm_webkit_html"  # Specify preprocessed HTML field name
     }
-    
+
     extractor = ExtractorFactory.create("llm-webkit", config=config)
-    
-    # 4. 运行评测
-    print("4. 开始评测...")
+
+    # 4. Run evaluation
+    print("4. Starting evaluation...")
     print("=" * 50)
-    
+
     evaluator = Evaluator()
     result = evaluator.evaluate(
         dataset=dataset,
         extractor=extractor,
         max_samples=None
     )
-    
-    # 5. 显示评测结果
-    print("\n5. 📊 预处理HTML模式评测结果:")
+
+    # 5. Display evaluation results
+    print("\n5. Preprocessed HTML mode evaluation results:")
     print("=" * 50)
-    
+
     results_dict = result.to_dict()
     metrics = results_dict.get('overall_metrics', {})
-    
-    # 显示关键指标
-    print(f"\n🏆 综合指标:")
+
+    # Display key metrics
+    print(f"\nOverall metrics:")
     print(f"  overall: {metrics.get('overall', 0):.4f}")
-    
-    print(f"\n📝 内容提取质量:")
+
+    print(f"\nContent extraction quality:")
     print(f"  text_edit: {metrics.get('text_edit', 0):.4f}")
     print(f"  code_edit: {metrics.get('code_edit', 0):.4f}")
     print(f"  table_edit: {metrics.get('table_edit', 0):.4f}")
     print(f"  table_TEDS: {metrics.get('table_TEDS', 0):.4f}")
-    
-    print(f"\n⚡ 性能统计:")
+
+    print(f"\nPerformance statistics:")
     sample_results = results_dict.get('sample_results', [])
     if sample_results:
         extraction_times = [s.get('extraction_time', 0) for s in sample_results if s.get('extraction_success')]
         if extraction_times:
             avg_time = sum(extraction_times) / len(extraction_times)
-            print(f"  平均提取时间: {avg_time:.3f}秒")
-            print(f"  处理速度: {1/avg_time:.1f}样本/秒")
-    
+            print(f"  Average extraction time: {avg_time:.3f}s")
+            print(f"  Processing speed: {1/avg_time:.1f} samples/s")
+
     success_count = len([s for s in sample_results if s.get('extraction_success', False)])
-    print(f"  成功样本数: {success_count}/{len(dataset)}")
-    
-    # 7. 保存结果
-    print(f"\n7. 💾 保存评测结果...")
-    
+    print(f"  Successful samples: {success_count}/{len(dataset)}")
+
+    # 7. Save results
+    print(f"\n7. Saving evaluation results...")
+
     results_dir = Path("results")
     results_dir.mkdir(exist_ok=True)
-    # 新增：保存带抽取结果的增强数据集（JSONL格式）
+    # Save enhanced dataset with extraction results (JSONL format)
     jsonl_dataset_path = results_dir / f"{extractor.name}_preprocessed_html_dataset_with_results.jsonl"
     DataSaver.save_dataset_with_extraction(
         results=result,
-        dataset=dataset,  # 原始数据集对象
+        dataset=dataset,  # Original dataset object
         file_path=jsonl_dataset_path,
-        extractor_name="llm-webkit"  # 抽取器名称前缀
+        extractor_name="llm-webkit"  # Extractor name prefix
     )
-    print(f"✅ 带抽取结果的JSONL数据集已保存到: {jsonl_dataset_path}")
+    print(f"JSONL dataset with extraction results saved to: {jsonl_dataset_path}")
     results_path = results_dir / f"{extractor.name}_preprocessed_html_evaluation_results.json"
     report_path = results_dir / f"{extractor.name}_preprocessed_html_evaluation_report.csv"
-    
+
     DataSaver.save_evaluation_results(result, results_path)
     DataSaver.save_summary_report(result, report_path)
-    
-    print(f"✅ 详细结果已保存到: {results_path}")
-    print(f"✅ CSV报告已保存到: {report_path}")
+
+    print(f"Detailed results saved to: {results_path}")
+    print(f"CSV report saved to: {report_path}")
     
 
 
 if __name__ == "__main__":
     try:
         # demo_basic_mock_evaluation()
-        # demo_llm_webkit_evaluation()  # 使用LLM-WebKit评测示例
+        # demo_llm_webkit_evaluation()  # LLM-WebKit evaluation example
         demo_llm_webkit_with_preprocessed_html_evaluation()
         # demo_extractor_comparison()
-        # demo_dataset_with_extraction()  # 演示保存带有抽取内容的数据集
-        # demo_multi_extraction() # 演示多个抽取器同时评测
-        print("\n✅ 示例运行完成！")
-        
+        # demo_dataset_with_extraction()  # Demo saving dataset with extracted content
+        # demo_multi_extraction() # Demo evaluating with multiple extractors simultaneously
+        print("\nExample completed!")
+
     except Exception as e:
-        print(f"\n❌ 运行出错: {e}")
+        print(f"\nRuntime error: {e}")
         import traceback
         traceback.print_exc() 
\ No newline at end of file
diff --git a/examples/demo.py b/examples/demo.py
index b460ad5..0c016bd 100644
--- a/examples/demo.py
+++ b/examples/demo.py
@@ -1,15 +1,15 @@
 from webmainbench import DataLoader, Evaluator, ExtractorFactory
 from pathlib import Path
 
-# 1. 加载评测数据集
+# 1. Load evaluation dataset
 dataset = DataLoader.load_jsonl(Path("data/sample_dataset.jsonl"))
 
-# 2. 创建抽取器
+# 2. Create extractor
 extractor = ExtractorFactory.create("llm-webkit")
 
-# 3. 运行评测
+# 3. Run evaluation
 evaluator = Evaluator()
 result = evaluator.evaluate(dataset, extractor)
 
-# 4. 查看结果
+# 4. View results
 print(f"Overall Score: {result}")
diff --git a/examples/llm_webkit_usage.py b/examples/llm_webkit_usage.py
index 4300f55..229cde0 100644
--- a/examples/llm_webkit_usage.py
+++ b/examples/llm_webkit_usage.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 """
-LLM-WebKit Extractor使用示例
+LLM-WebKit Extractor Usage Example
 
-本示例展示如何使用集成了VLLM推理能力的LLM-WebKit extractor。
+This example demonstrates how to use the LLM-WebKit extractor integrated with VLLM inference capabilities.
 """
 
 import time
@@ -10,138 +10,138 @@
 
 
 def main():
-    print("🚀 LLM-WebKit Extractor 使用示例\n")
-    
-    # 1. 创建带有自定义配置的extractor
+    print("LLM-WebKit Extractor Usage Example\n")
+
+    # 1. Create extractor with custom configuration
     config = {
-        "model_path": "/Users/chupei/model/checkpoint-3296",  # 替换为您的模型路径
-        "use_logits_processor": True,  # 启用JSON格式约束
-        "temperature": 0.0,  # 确定性输出
-        "max_item_count": 500,  # 处理的最大item数量
-        "max_output_tokens": 4096,  # 最大输出token数
-        "dtype": "bfloat16",  # 模型精度
-        "tensor_parallel_size": 1  # 张量并行大小
+        "model_path": "/Users/chupei/model/checkpoint-3296",  # Replace with your model path
+        "use_logits_processor": True,  # Enable JSON format constraint
+        "temperature": 0.0,  # Deterministic output
+        "max_item_count": 500,  # Maximum number of items to process
+        "max_output_tokens": 4096,  # Maximum output tokens
+        "dtype": "bfloat16",  # Model precision
+        "tensor_parallel_size": 1  # Tensor parallel size
     }
-    
+
     try:
         extractor = ExtractorFactory.create("llm-webkit", config=config)
-        print(f"✅ Extractor创建成功: {extractor.description}")
-        print(f"📋 版本: {extractor.version}")
-        print(f"⚙️ 配置: {extractor.inference_config.__dict__}\n")
-        
+        print(f"Extractor created successfully: {extractor.description}")
+        print(f"Version: {extractor.version}")
+        print(f"Config: {extractor.inference_config.__dict__}\n")
+
     except Exception as e:
-        print(f"❌ Extractor创建失败: {e}")
-        print("💡 请确保已安装所需依赖：")
+        print(f"Extractor creation failed: {e}")
+        print("Please ensure the required dependencies are installed:")
         print("   pip install vllm transformers torch llm_web_kit")
         return
-    
-    # 2. 准备测试HTML（包含_item_id属性的结构化HTML）
+
+    # 2. Prepare test HTML (structured HTML with _item_id attributes)
     test_html = """
     <html>
     <head>
-        <title>测试文章 - 人工智能的发展趋势</title>
+        <title>Test Article - AI Development Trends</title>
     </head>
     <body>
         <nav _item_id="1">
             <ul>
-                <li><a href="/">首页</a></li>
-                <li><a href="/news">新闻</a></li>
-                <li><a href="/tech">科技</a></li>
+                <li><a href="/">Home</a></li>
+                <li><a href="/news">News</a></li>
+                <li><a href="/tech">Tech</a></li>
             </ul>
         </nav>
-        
+
         <header _item_id="2">
-            <h1>人工智能的发展趋势</h1>
-            <p class="meta">作者：张三 | 发布时间：2024-01-15 | 阅读量：1,234</p>
+            <h1>AI Development Trends</h1>
+            <p class="meta">Author: John Doe | Published: 2024-01-15 | Views: 1,234</p>
         </header>
-        
+
         <main _item_id="3">
             <article>
-                <p>人工智能（AI）技术正在快速发展，对各行各业产生深远影响。本文将探讨AI的主要发展趋势和未来展望。</p>
-                
-                <h2>1. 机器学习的进步</h2>
-                <p>深度学习和大语言模型的突破使得AI系统能够理解和生成更自然的语言，在对话、翻译、创作等领域表现出色。</p>
-                
-                <h2>2. 自动化应用</h2>
-                <p>从制造业的机器人到软件开发的代码生成，AI正在各个领域实现流程自动化，提高效率并降低成本。</p>
-                
-                <h2>3. 个性化服务</h2>
-                <p>基于用户数据的个性化推荐和服务正变得越来越精准，为用户提供更好的体验。</p>
+                <p>Artificial Intelligence (AI) technology is rapidly advancing, with far-reaching impacts across all industries. This article explores the major development trends and future prospects of AI.</p>
+
+                <h2>1. Advances in Machine Learning</h2>
+                <p>Breakthroughs in deep learning and large language models have enabled AI systems to understand and generate more natural language, excelling in dialogue, translation, and creative tasks.</p>
+
+                <h2>2. Automation Applications</h2>
+                <p>From robots in manufacturing to code generation in software development, AI is automating processes across domains, improving efficiency and reducing costs.</p>
+
+                <h2>3. Personalized Services</h2>
+                <p>Personalized recommendations and services based on user data are becoming increasingly precise, providing better user experiences.</p>
             </article>
         </main>
-        
+
         <aside _item_id="4">
-            <h3>相关文章</h3>
+            <h3>Related Articles</h3>
             <ul>
-                <li><a href="/article1">机器学习基础入门</a></li>
-                <li><a href="/article2">深度学习应用案例</a></li>
-                <li><a href="/article3">AI伦理与安全</a></li>
+                <li><a href="/article1">Introduction to Machine Learning</a></li>
+                <li><a href="/article2">Deep Learning Application Cases</a></li>
+                <li><a href="/article3">AI Ethics and Safety</a></li>
             </ul>
         </aside>
-        
+
         <footer _item_id="5">
-            <p>&copy; 2024 科技资讯网. 保留所有权利.</p>
+            <p>&copy; 2024 Tech News. All rights reserved.</p>
             <div class="social-links">
-                <a href="#">微博</a> | <a href="#">微信</a> | <a href="#">知乎</a>
+                <a href="#">Twitter</a> | <a href="#">LinkedIn</a> | <a href="#">GitHub</a>
             </div>
         </footer>
     </body>
     </html>
     """
-    
-    # 3. 执行内容提取
-    print("🔍 开始内容提取...")
+
+    # 3. Execute content extraction
+    print("Starting content extraction...")
     start_time = time.time()
     
     try:
         result = extractor.extract(test_html)
         end_time = time.time()
         
-        print(f"⏱️ 提取耗时: {end_time - start_time:.2f}秒\n")
-        
-        # 4. 显示提取结果
+        print(f"⏱️ Extraction time: {end_time - start_time:.2f}s\n")
+
+        # 4. Display extraction results
         if result.success:
-            print("✅ 内容提取成功！\n")
-            
-            print("📄 提取的主要内容:")
+            print("✅ Content extracted successfully!\n")
+
+            print("📄 Extracted main content:")
             print("=" * 50)
             print(result.content[:500] + "..." if len(result.content) > 500 else result.content)
             print("=" * 50)
-            
-            print(f"\n📊 提取统计:")
-            print(f"  • 内容长度: {len(result.content)} 字符")
-            print(f"  • 置信度: {result.confidence_score:.3f}")
-            print(f"  • 标题: {result.title}")
-            print(f"  • 语言: {result.language}")
-            print(f"  • 提取时间: {result.extraction_time:.3f}秒")
-            
+
+            print(f"\n📊 Extraction statistics:")
+            print(f"  • Content length: {len(result.content)} characters")
+            print(f"  • Confidence: {result.confidence_score:.3f}")
+            print(f"  • Title: {result.title}")
+            print(f"  • Language: {result.language}")
+            print(f"  • Extraction time: {result.extraction_time:.3f}s")
+
             if result.content_list:
-                print(f"  • 结构化内容块: {len(result.content_list)}个")
-                for i, item in enumerate(result.content_list[:3]):  # 显示前3个
+                print(f"  • Structured content blocks: {len(result.content_list)}")
+                for i, item in enumerate(result.content_list[:3]):  # Show first 3
                     print(f"    [{i+1}] {item.get('type', 'unknown')}: {item.get('content', '')[:50]}...")
-        
+
         else:
-            print("❌ 内容提取失败")
-            print(f"错误信息: {result.error_message}")
+            print("❌ Content extraction failed")
+            print(f"Error message: {result.error_message}")
             if result.error_traceback:
-                print(f"错误详情:\n{result.error_traceback}")
-    
+                print(f"Error details:\n{result.error_traceback}")
+
     except Exception as e:
-        print(f"❌ 提取过程中发生异常: {e}")
-    
-    print("\n🎯 高级功能说明:")
-    print("• 智能分类: 使用LLM理解HTML元素语义，准确区分主要内容和辅助内容")
-    print("• 格式约束: 通过logits processor确保LLM输出有效的JSON格式")
-    print("• 性能优化: 自动跳过过于复杂的HTML，支持延迟加载模型")
-    print("• 详细反馈: 提供分类结果、置信度和性能指标")
+        print(f"❌ Exception during extraction: {e}")
+
+    print("\n🎯 Advanced feature notes:")
+    print("• Smart classification: Uses LLM to understand HTML element semantics, accurately distinguishing main content from auxiliary content")
+    print("• Format constraint: Uses logits processor to ensure valid JSON output from the LLM")
+    print("• Performance optimization: Automatically skips overly complex HTML, supports lazy model loading")
+    print("• Detailed feedback: Provides classification results, confidence scores, and performance metrics")
 
 
 if __name__ == "__main__":
     main()
-    
-    print("\n💡 使用提示:")
-    print("1. 确保已安装所需依赖: vllm, transformers, torch, llm_web_kit")
-    print("2. 设置正确的模型路径")
-    print("3. 根据硬件资源调整tensor_parallel_size和dtype")
-    print("4. 对于大规模HTML，适当调整max_item_count限制")
-    print("5. 使用use_logits_processor=True确保输出格式可靠性") 
\ No newline at end of file
+
+    print("\n💡 Usage tips:")
+    print("1. Ensure required dependencies are installed: vllm, transformers, torch, llm_web_kit")
+    print("2. Set the correct model path")
+    print("3. Adjust tensor_parallel_size and dtype based on hardware resources")
+    print("4. For large-scale HTML, adjust max_item_count accordingly")
+    print("5. Use use_logits_processor=True to ensure reliable output format") 
\ No newline at end of file
diff --git a/examples/magic_html_extract_demo.py b/examples/magic_html_extract_demo.py
index 726c054..ef90532 100644
--- a/examples/magic_html_extract_demo.py
+++ b/examples/magic_html_extract_demo.py
@@ -1,68 +1,68 @@
 import time
 from webmainbench.extractors import ExtractorFactory
 
-# 配置 MagicHTML 抽取器（这里可根据需要添加更多配置）
+# Configure MagicHTML extractor (add more configuration as needed)
 config = {}
 try:
-    # 创建 MagicHTML 抽取器实例
+    # Create MagicHTML extractor instance
     extractor = ExtractorFactory.create("magic-html", config=config)
-    print(f"✅ Extractor创建成功: {extractor.description}")
-    print(f"📋 版本: {extractor.version}")
-    print(f"⚙️ 配置: {extractor.get_config()}\n")
+    print(f"✅ Extractor created successfully: {extractor.description}")
+    print(f"📋 Version: {extractor.version}")
+    print(f"⚙️ Config: {extractor.get_config()}\n")
 except Exception as e:
-    print(f"❌ Extractor创建失败: {e}")
+    print(f"❌ Failed to create extractor: {e}")
 
-# 测试 HTML
+# Test HTML
 test_html = """
 <html>
     <body>
-        <h1 cc-select="true">Python编程教程</h1>
-        <p cc-select="true">这是一个Python基础教程，展示如何定义函数。</p>
+        <h1 cc-select="true">Python Programming Tutorial</h1>
+        <p cc-select="true">This is a basic Python tutorial demonstrating how to define functions.</p>
         <pre cc-select="true"><code>def greet(name):
-    ""问候函数""
+    ""Greeting function""
     return f"Hello, {name}!"
 
-# 使用示例
+# Usage example
 result = greet("World")
 print(result)</code></pre>
-        <p cc-select="true">这个函数可以用来问候任何人。</p>
+        <p cc-select="true">This function can be used to greet anyone.</p>
     </body>
 </html>
 """
 
-print("🔍 开始内容提取...")
+print("🔍 Starting content extraction...")
 start_time = time.time()
 
 try:
     result = extractor.extract(test_html)
     end_time = time.time()
 
-    print(f"⏱️ 提取耗时: {end_time - start_time:.2f}秒\n")
+    print(f"⏱️ Extraction time: {end_time - start_time:.2f}s\n")
 
-    # 显示提取结果
+    # Display extraction results
     if result.success:
-        print("✅ 内容提取成功！\n")
+        print("✅ Content extracted successfully!\n")
 
-        print("📄 提取的主要内容:")
+        print("📄 Extracted main content:")
         print("=" * 50)
         print(result.content[:500] + "..." if len(result.content) > 500 else result.content)
         print("=" * 50)
 
-        print(f"\n📊 提取统计:")
-        print(f"  • 内容长度: {len(result.content)} 字符")
-        print(f"  • 标题: {result.title}")
-        print(f"  • 语言: {result.language}")
-        print(f"  • 提取时间: {result.extraction_time:.3f}秒")
+        print(f"\n📊 Extraction statistics:")
+        print(f"  • Content length: {len(result.content)} characters")
+        print(f"  • Title: {result.title}")
+        print(f"  • Language: {result.language}")
+        print(f"  • Extraction time: {result.extraction_time:.3f}s")
 
         if result.content_list:
-            print(f"  • 结构化内容块: {len(result.content_list)}个")
-            for i, item in enumerate(result.content_list[:3]):  # 显示前3个
+            print(f"  • Structured content blocks: {len(result.content_list)}")
+            for i, item in enumerate(result.content_list[:3]):  # Show first 3
                 print(f"    [{i + 1}] {item.get('type', 'unknown')}: {item.get('content', '')[:50]}...")
     else:
-        print("❌ 内容提取失败")
-        print(f"错误信息: {result.error_message}")
+        print("❌ Content extraction failed")
+        print(f"Error message: {result.error_message}")
         if result.error_traceback:
-            print(f"错误详情:\n{result.error_traceback}")
+            print(f"Error details:\n{result.error_traceback}")
 
 except Exception as e:
-    print(f"❌ 提取过程中发生异常: {e}")
\ No newline at end of file
+    print(f"❌ Exception during extraction: {e}")
diff --git a/examples/main_html_eval.py b/examples/main_html_eval.py
index cdeee0c..b29d395 100755
--- a/examples/main_html_eval.py
+++ b/examples/main_html_eval.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python3
 """
-WebMainBench 基本使用示例
+WebMainBench Basic Usage Example
 """
 
 import json
 from pathlib import Path
 
-# 导入 WebMainBench 模块
+# Import WebMainBench modules
 from webmainbench import (
     DataLoader, DataSaver, BenchmarkDataset, DataSample,
     ExtractorFactory, MainHTMLEvaluator,
@@ -16,17 +16,17 @@
 
 def load_benchdata(dataset_path: str) -> BenchmarkDataset:
     dataset_path = Path(dataset_path)
-    print(f"📂 数据集文件: {dataset_path}")
-    
+    print(f"📂 Dataset file: {dataset_path}")
+
     if not dataset_path.exists():
-        print(f"❌ 数据文件不存在: {dataset_path}")
-        print("请确保已运行数据提取命令创建样本数据集")
+        print(f"❌ Data file does not exist: {dataset_path}")
+        print("Please ensure the data extraction command has been run to create the sample dataset")
         return
-    
-    # 加载数据集
+
+    # Load dataset
     dataset = DataLoader.load_jsonl(dataset_path, include_results=False)
     dataset.name = "real_preprocessed_html_test"
-    dataset.description = "基于真实数据的预处理HTML功能测试"
+    dataset.description = "Preprocessed HTML feature test based on real data"
     return dataset
 
 
@@ -39,104 +39,103 @@ def save_results(result_file: Path, results: list[dict]):
     with result_file.open("w", encoding="utf-8") as f:
         for res in results:
             f.write(json.dumps(res, ensure_ascii=False) + "\n")
-    
-    
+
+
 
 def demo_llm_webkit_with_preprocessed_html_evaluation(model_path: str):
-    """演示LLM-WebKit预处理HTML功能的评测"""
-    
-    print("\n=== LLM-WebKit 预处理HTML功能演示 ===\n")
-    
-    # 设置日志
+    """Demonstrate evaluation of the LLM-WebKit preprocessed HTML feature"""
+
+    print("\n=== LLM-WebKit Preprocessed HTML Feature Demo ===\n")
+
+    # Set up logging
     setup_logging(level="INFO")
-    
-    # 1. 从真实数据集加载包含预处理HTML的数据
-    print("1. 从真实数据集加载预处理HTML数据...")
-    
-    # 使用DataLoader加载真实的样本数据
-   
+
+    # 1. Load preprocessed HTML data from the real dataset
+    print("1. Loading preprocessed HTML data from the real dataset...")
+
+    # Load real sample data using DataLoader
     dataset = load_benchdata("data/WebMainBench_llm-webkit_v1_WebMainBench_1827_v1_WebMainBench_dataset_merge_with_llm_webkit.jsonl")
-    print(f"✅ 真实数据集加载成功，包含 {len(dataset)} 个样本")
-    
+    print(f"✅ Real dataset loaded successfully, contains {len(dataset)} samples")
+
+
+
+    # 2. Create LLM-WebKit extractor in preprocessed HTML mode
+    print("2. Creating LLM-WebKit extractor in preprocessed HTML mode...")
 
-    
-    # 2. 创建预处理HTML模式的LLM-WebKit抽取器
-    print("2. 创建预处理HTML模式的LLM-WebKit抽取器...")
-    
     extractor = load_extractor(model_path)
-    print(f"✅ 抽取器创建成功")
-    print(f"📋 配置信息:")
-    print(f"  - 跳过LLM推理: 是（直接处理预处理HTML）")
+    print(f"✅ Extractor created successfully")
+    print(f"📋 Configuration:")
+    print(f"  - Skip LLM inference: Yes (process preprocessed HTML directly)")
     print()
-    
-    # 4. 运行评测
-    print("4. 开始评测...")
+
+    # 4. Run evaluation
+    print("4. Starting evaluation...")
     print("=" * 50)
-    
+
     evaluator = MainHTMLEvaluator()
     result = evaluator.evaluate(
         dataset=dataset,
         extractor=extractor,
         max_samples=None
     )
-    
-    # 5. 显示评测结果
-    print("\n5. 📊 预处理HTML模式评测结果:")
+
+    # 5. Display evaluation results
+    print("\n5. 📊 Preprocessed HTML mode evaluation results:")
     print("=" * 50)
-    
+
     results_dict = result.to_dict()
     metrics = results_dict.get('overall_metrics', {})
-    
-    # 显示关键指标
-    print(f"\n🏆 综合指标:")
+
+    # Display key metrics
+    print(f"\n🏆 Overall metrics:")
     for key in metrics.keys():
         print(f"  {key}: {metrics[key]:.4f}")
-    
-    print(f"\n⚡ 性能统计:")
+
+    print(f"\n⚡ Performance statistics:")
     sample_results = results_dict.get('sample_results', [])
     if sample_results:
         extraction_times = [s.get('extraction_time', 0) for s in sample_results if s.get('extraction_success')]
         if extraction_times:
             avg_time = sum(extraction_times) / len(extraction_times)
-            print(f"  平均提取时间: {avg_time:.3f}秒")
-            print(f"  处理速度: {1/avg_time:.1f}样本/秒")
-    
+            print(f"  Average extraction time: {avg_time:.3f}s")
+            print(f"  Processing speed: {1/avg_time:.1f} samples/s")
+
     success_count = len([s for s in sample_results if s.get('extraction_success', False)])
-    print(f"  成功样本数: {success_count}/{len(dataset)}")
-    
-    # 7. 保存结果
-    print(f"\n6. 💾 保存评测结果...")
-    
+    print(f"  Successful samples: {success_count}/{len(dataset)}")
+
+    # 7. Save results
+    print(f"\n6. 💾 Saving evaluation results...")
+
     results_dir = Path("results")
     results_dir.mkdir(exist_ok=True)
-    # 新增：保存带抽取结果的增强数据集（JSONL格式）
+    # Save enhanced dataset with extraction results (JSONL format)
     jsonl_dataset_path = results_dir / f"{extractor.name}_preprocessed_html_dataset_with_results.jsonl"
     save_results(jsonl_dataset_path, result.sample_results)
-    print(f"✅ 结果已保存到: {jsonl_dataset_path}")
-    
-    
-    print(f"✅ 带抽取结果的JSONL数据集已保存到: {jsonl_dataset_path}")
+    print(f"✅ Results saved to: {jsonl_dataset_path}")
+
+
+    print(f"✅ JSONL dataset with extraction results saved to: {jsonl_dataset_path}")
     results_path = results_dir / f"{extractor.name}_preprocessed_html_evaluation_results.json"
     report_path = results_dir / f"{extractor.name}_preprocessed_html_evaluation_report.csv"
-    
+
     DataSaver.save_evaluation_results(result, results_path)
     DataSaver.save_summary_report(result, report_path)
-    
-    print(f"✅ 详细结果已保存到: {results_path}")
-    print(f"✅ CSV报告已保存到: {report_path}")
-    
+
+    print(f"✅ Detailed results saved to: {results_path}")
+    print(f"✅ CSV report saved to: {report_path}")
+
 
 
 if __name__ == "__main__":
     import argparse
-    parser = argparse.ArgumentParser(description="WebMainBench 基本使用示例")
-    parser.add_argument("--model_path", required=True, help="LLM model路径")
+    parser = argparse.ArgumentParser(description="WebMainBench Basic Usage Example")
+    parser.add_argument("--model_path", required=True, help="LLM model path")
     args = parser.parse_args()
     try:
         demo_llm_webkit_with_preprocessed_html_evaluation(args.model_path)
-        print("\n✅ 示例运行完成！")
-        
+        print("\n✅ Example completed!")
+
     except Exception as e:
-        print(f"\n❌ 运行出错: {e}")
+        print(f"\n❌ Runtime error: {e}")
         import traceback
-        traceback.print_exc() 
\ No newline at end of file
+        traceback.print_exc()
diff --git a/examples/multi_extractor_compare.py b/examples/multi_extractor_compare.py
index 9b3a56f..6b3390d 100644
--- a/examples/multi_extractor_compare.py
+++ b/examples/multi_extractor_compare.py
@@ -1,56 +1,56 @@
 from webmainbench import DataLoader, Evaluator, ExtractorFactory, DataSaver
 from pathlib import Path
 
-# 如需调用LLM修正抽取结果，在 webmainbench/config.py 中配置 LLM api
+# To use LLM to correct extraction results, configure the LLM API in webmainbench/config.py
 
 def all_extractor_comparison():
-    """演示多抽取器对比"""
-    
-    print("\n=== 多抽取器对比演示 ===\n")
-    
-    # 创建数据集
+    """Demonstrate multi-extractor comparison"""
+
+    print("\n=== Multi-Extractor Comparison Demo ===\n")
+
+    # Create dataset
     dataset_path = Path("../data/WebMainBench_llm-webkit_v1_WebMainBench_7887_within_formula.jsonl")
     dataset = DataLoader.load_jsonl(dataset_path)
 
-    # 创建webkit抽取器
+    # Create webkit extractor
     config = {
-        "use_preprocessed_html": True,          # 🔑 关键配置：启用预处理HTML模式
-        "preprocessed_html_field": "llm_webkit_html"  # 指定预处理HTML字段名
+        "use_preprocessed_html": True,          # Key config: enable preprocessed HTML mode
+        "preprocessed_html_field": "llm_webkit_html"  # Specify the preprocessed HTML field name
     }
 
     webkit_extractor = ExtractorFactory.create("llm-webkit", config=config)
-    # 创建magic-extractor抽取器
+    # Create magic-extractor extractor
     magic_extractor = ExtractorFactory.create("magic-html")
-    # 创建trafilatura抽取器,抽取成markdown
+    # Create trafilatura extractor, extract to markdown
     trafilatura_extractor = ExtractorFactory.create("trafilatura")
-    # 创建trafilatura抽取器,抽取成txt
+    # Create trafilatura extractor, extract to txt
     trafilatura_txt_extractor = ExtractorFactory.create("trafilatura_txt")
-    # 创建resiliparse抽取器
+    # Create resiliparse extractor
     resiliparse_extractor = ExtractorFactory.create("resiliparse")
-    
-    # 运行对比
+
+    # Run comparison
     evaluator = Evaluator()
     extractors = [webkit_extractor, magic_extractor, trafilatura_extractor,trafilatura_txt_extractor, resiliparse_extractor]
     # extractors = [webkit_extractor]
 
-    
+
     results = evaluator.compare_extractors(
         dataset=dataset,
         extractors=extractors
     )
-    
-    # 显示对比结果
-    print("对比结果:")
+
+    # Display comparison results
+    print("Comparison results:")
     print("-" * 40)
     for extractor_name, result in results.items():
         overall_score = result.overall_metrics.get('overall', 0)
         print(f"{extractor_name}: {overall_score:.4f}")
-    
-    # 保存多抽取器对比榜单
+
+    # Save multi-extractor comparison leaderboard
     all_results = []
     for extractor_name, result in results.items():
         all_results.append(result.to_dict())
-    
+
     results_dir = Path("results")
     results_dir.mkdir(exist_ok=True)
     leaderboard_path = results_dir / "leaderboard.csv"
@@ -60,10 +60,10 @@ def all_extractor_comparison():
     DataSaver.save_evaluation_results(all_results, evaluation_results_path)
     DataSaver.save_dataset_with_extraction(
         results=all_results,
-        dataset=dataset,  # 原始数据集对象
+        dataset=dataset,  # Original dataset object
         file_path=jsonl_dataset_path
     )
-    print(f"\n📊 榜单已保存到: {leaderboard_path}")
+    print(f"\nLeaderboard saved to: {leaderboard_path}")
 
 
 if __name__ == "__main__":
diff --git a/examples/resiliparse_extract_demo.py b/examples/resiliparse_extract_demo.py
index ba33a14..17c941f 100644
--- a/examples/resiliparse_extract_demo.py
+++ b/examples/resiliparse_extract_demo.py
@@ -1,7 +1,7 @@
 import time
 from webmainbench.extractors import ExtractorFactory
 
-# 配置 Resiliparse 抽取器
+# Configure Resiliparse extractor
 config = {
     "main_content": True,
     "alt_texts": True,
@@ -14,66 +14,66 @@
 }
 
 try:
-    # 创建 Resiliparse 抽取器实例
+    # Create Resiliparse extractor instance
     extractor = ExtractorFactory.create("resiliparse", config=config)
-    print(f"✅ Extractor创建成功: {extractor.description}")
-    print(f"📋 版本: {extractor.version}")
-    print(f"⚙️ 配置: {extractor.get_config()}\n")
+    print(f"✅ Extractor created successfully: {extractor.description}")
+    print(f"📋 Version: {extractor.version}")
+    print(f"⚙️ Config: {extractor.get_config()}\n")
 except Exception as e:
-    print(f"❌ Extractor创建失败: {e}")
+    print(f"❌ Failed to create extractor: {e}")
 
 
-# 测试 HTML
+# Test HTML
 test_html = """
 <html>
     <body>
-        <h1 cc-select="true">Python编程教程</h1>
-        <p cc-select="true">这是一个Python基础教程，展示如何定义函数。</p>
+        <h1 cc-select="true">Python Programming Tutorial</h1>
+        <p cc-select="true">This is a basic Python tutorial demonstrating how to define functions.</p>
         <pre cc-select="true"><code>def greet(name):
-    ""问候函数""
+    ""Greeting function""
     return f"Hello, {name}!"
 
-# 使用示例
+# Usage example
 result = greet("World")
 print(result)</code></pre>
-        <p cc-select="true">这个函数可以用来问候任何人。</p>
+        <p cc-select="true">This function can be used to greet anyone.</p>
     </body>
 </html>
 """
 
-print("🔍 开始内容提取...")
+print("🔍 Starting content extraction...")
 start_time = time.time()
 
 try:
     result = extractor.extract(test_html)
     end_time = time.time()
 
-    print(f"⏱️ 提取耗时: {end_time - start_time:.2f}秒\n")
+    print(f"⏱️ Extraction time: {end_time - start_time:.2f}s\n")
 
-    # 显示提取结果
+    # Display extraction results
     if result.success:
-        print("✅ 内容提取成功！\n")
+        print("✅ Content extracted successfully!\n")
 
-        print("📄 提取的主要内容:")
+        print("📄 Extracted main content:")
         print("=" * 50)
         print(result.content[:500] + "..." if len(result.content) > 500 else result.content)
         print("=" * 50)
 
-        print(f"\n📊 提取统计:")
-        print(f"  • 内容长度: {len(result.content)} 字符")
-        print(f"  • 标题: {result.title}")
-        print(f"  • 语言: {result.language}")
-        print(f"  • 提取时间: {result.extraction_time:.3f}秒")
+        print(f"\n📊 Extraction statistics:")
+        print(f"  • Content length: {len(result.content)} characters")
+        print(f"  • Title: {result.title}")
+        print(f"  • Language: {result.language}")
+        print(f"  • Extraction time: {result.extraction_time:.3f}s")
 
         if result.content_list:
-            print(f"  • 结构化内容块: {len(result.content_list)}个")
-            for i, item in enumerate(result.content_list[:3]):  # 显示前3个
+            print(f"  • Structured content blocks: {len(result.content_list)}")
+            for i, item in enumerate(result.content_list[:3]):  # Show first 3
                 print(f"    [{i + 1}] {item.get('type', 'unknown')}: {item.get('content', '')[:50]}...")
     else:
-        print("❌ 内容提取失败")
-        print(f"错误信息: {result.error_message}")
+        print("❌ Content extraction failed")
+        print(f"Error message: {result.error_message}")
         if result.error_traceback:
-            print(f"错误详情:\n{result.error_traceback}")
+            print(f"Error details:\n{result.error_traceback}")
 
 except Exception as e:
-    print(f"❌ 提取过程中发生异常: {e}")
+    print(f"❌ Exception during extraction: {e}")
diff --git a/examples/teds_usage.py b/examples/teds_usage.py
index 000f288..70d4e51 100644
--- a/examples/teds_usage.py
+++ b/examples/teds_usage.py
@@ -1,8 +1,9 @@
 #!/usr/bin/env python3
 """
-WebMainBench TEDS 算法使用示例
+WebMainBench TEDS Algorithm Usage Example
 
-展示如何在评估中使用 TEDS (Tree-Edit Distance based Similarity) 算法进行表格评估
+Demonstrates how to use the TEDS (Tree-Edit Distance based Similarity) algorithm
+for table evaluation in assessments.
 """
 
 import sys
@@ -18,268 +19,268 @@
 
 
 def demo_teds_configuration():
-    """演示如何配置 TEDS 算法"""
-    print("=== 🔧 TEDS 配置示例 ===\n")
-    
-    # 方法1: 使用 TableTEDSMetric 指标
-    print("**方法1: 使用专用的 TableTEDSMetric 指标**")
+    """Demonstrate how to configure the TEDS algorithm"""
+    print("=== TEDS Configuration Example ===\n")
+
+    # Method 1: Use the TableTEDSMetric metric
+    print("**Method 1: Use the dedicated TableTEDSMetric metric**")
     evaluation_config = {
         "metrics": {
             "table_extraction": {
-                "use_teds": True,  # 启用 TEDS 算法
-                "structure_only": False  # 同时考虑结构和内容
+                "use_teds": True,  # Enable TEDS algorithm
+                "structure_only": False  # Consider both structure and content
             }
         }
     }
-    print("配置:", evaluation_config)
+    print("Config:", evaluation_config)
     print()
-    
-    # 方法2: 直接使用 TEDS 指标
-    print("**方法2: 直接使用独立的 TEDS 指标**")
+
+    # Method 2: Use TEDS metric directly
+    print("**Method 2: Use the standalone TEDS metric directly**")
     teds_config = {
         "metrics": {
             "teds": {
                 "structure_only": False,
                 "ignore_nodes": ["tbody", "thead", "tfoot"]
             },
-            "s_teds": {  # 结构化 TEDS
+            "s_teds": {  # Structural TEDS
                 "structure_only": True
             }
         }
     }
-    print("配置:", teds_config)
+    print("Config:", teds_config)
     print()
 
 
 def demo_teds_comparison():
-    """演示 TEDS 与简单算法的对比"""
-    print("=== ⚖️ TEDS vs 简单算法对比 ===\n")
-    
-    # 准备测试数据
+    """Demonstrate comparison of TEDS vs simple algorithm"""
+    print("=== TEDS vs Simple Algorithm Comparison ===\n")
+
+    # Prepare test data
     test_cases = [
         {
-            "name": "完全匹配的表格",
+            "name": "Perfectly matching table",
             "extracted": """
             <table>
-                <tr><th>产品</th><th>价格</th></tr>
-                <tr><td>苹果</td><td>5元</td></tr>
-                <tr><td>橙子</td><td>3元</td></tr>
+                <tr><th>Product</th><th>Price</th></tr>
+                <tr><td>Apple</td><td>$5</td></tr>
+                <tr><td>Orange</td><td>$3</td></tr>
             </table>
             """,
             "groundtruth": """
             <table>
-                <tr><th>产品</th><th>价格</th></tr>
-                <tr><td>苹果</td><td>5元</td></tr>
-                <tr><td>橙子</td><td>3元</td></tr>
+                <tr><th>Product</th><th>Price</th></tr>
+                <tr><td>Apple</td><td>$5</td></tr>
+                <tr><td>Orange</td><td>$3</td></tr>
             </table>
             """
         },
         {
-            "name": "缺少行的表格",
+            "name": "Table with missing row",
             "extracted": """
             <table>
-                <tr><th>产品</th><th>价格</th></tr>
-                <tr><td>苹果</td><td>5元</td></tr>
+                <tr><th>Product</th><th>Price</th></tr>
+                <tr><td>Apple</td><td>$5</td></tr>
             </table>
             """,
             "groundtruth": """
             <table>
-                <tr><th>产品</th><th>价格</th></tr>
-                <tr><td>苹果</td><td>5元</td></tr>
-                <tr><td>橙子</td><td>3元</td></tr>
-                <tr><td>香蕉</td><td>4元</td></tr>
+                <tr><th>Product</th><th>Price</th></tr>
+                <tr><td>Apple</td><td>$5</td></tr>
+                <tr><td>Orange</td><td>$3</td></tr>
+                <tr><td>Banana</td><td>$4</td></tr>
             </table>
             """
         },
         {
-            "name": "结构不同的表格",
+            "name": "Table with different structure",
             "extracted": """
             <table>
-                <tr><th>产品</th><th>价格</th></tr>
-                <tr><td>苹果</td><td>5元</td></tr>
+                <tr><th>Product</th><th>Price</th></tr>
+                <tr><td>Apple</td><td>$5</td></tr>
             </table>
             """,
             "groundtruth": """
             <table>
-                <tr><th>产品</th><th>价格</th><th>库存</th></tr>
-                <tr><td>苹果</td><td>5元</td><td>100</td></tr>
+                <tr><th>Product</th><th>Price</th><th>Stock</th></tr>
+                <tr><td>Apple</td><td>$5</td><td>100</td></tr>
             </table>
             """
         }
     ]
-    
-    print("| 测试用例 | 简单算法 | TEDS算法 | S-TEDS | 差异 |")
+
+    print("| Test case | Simple | TEDS | S-TEDS | Diff |")
     print("|---------|---------|---------|--------|------|")
-    
+
     for case in test_cases:
-        # 简单算法评估
+        # Simple algorithm evaluation
         simple_evaluator = Evaluator(task_config={
             "metrics": {
                 "table_extraction": {"use_teds": False}
             }
         })
-        
-        # TEDS 算法评估
+
+        # TEDS algorithm evaluation
         teds_evaluator = Evaluator(task_config={
             "metrics": {
                 "table_extraction": {"use_teds": True}
             }
         })
-        
-        # 创建模拟数据
+
+        # Create mock data
         sample = DataSample(
             id=f"test_{case['name']}",
-            html="<div>测试HTML</div>",
-            content="测试内容",
+            html="<div>Test HTML</div>",
+            content="Test content",
             content_list=[{"table": case["groundtruth"]}]
         )
-        
+
         extraction_result = ExtractionResult(
             extractor_name="test",
-            extracted_content="测试内容",
+            extracted_content="Test content",
             extracted_content_list=[{"table": case["extracted"]}]
         )
-        
-        # 计算得分
+
+        # Calculate scores
         try:
             simple_result = simple_evaluator.evaluate_single(sample, extraction_result)
             teds_result = teds_evaluator.evaluate_single(sample, extraction_result)
-            
+
             simple_score = simple_result.overall_metrics.get("table_extraction", 0.0)
             teds_score = teds_result.overall_metrics.get("table_extraction", 0.0)
-            
-            # S-TEDS (结构化) 评估
+
+            # S-TEDS (structure-only) evaluation
             s_teds = StructureTEDSMetric("s_teds")
             s_teds_result = s_teds.calculate(case["extracted"], case["groundtruth"])
             s_teds_score = s_teds_result.score
-            
+
             diff = abs(simple_score - teds_score)
-            
+
             print(f"| {case['name'][:10]}... | {simple_score:.4f} | {teds_score:.4f} | {s_teds_score:.4f} | {diff:.4f} |")
-            
+
         except Exception as e:
-            print(f"| {case['name'][:10]}... | 错误 | 错误 | 错误 | - |")
-            print(f"  错误信息: {e}")
-    
+            print(f"| {case['name'][:10]}... | Error | Error | Error | - |")
+            print(f"  Error message: {e}")
+
     print()
 
 
 def demo_advanced_teds_features():
-    """演示 TEDS 的高级功能"""
-    print("=== 🚀 TEDS 高级功能演示 ===\n")
-    
-    # 1. 处理 Markdown 表格
-    print("**1. Markdown 表格支持**")
+    """Demonstrate advanced TEDS features"""
+    print("=== TEDS Advanced Feature Demo ===\n")
+
+    # 1. Handle Markdown tables
+    print("**1. Markdown Table Support**")
     teds = TEDSMetric("teds")
-    
+
     markdown_table = """
-    | 姓名 | 年龄 | 职业 |
+    | Name | Age | Occupation |
     |------|------|------|
-    | 张三 | 25   | 工程师 |
-    | 李四 | 30   | 设计师 |
+    | Alice | 25   | Engineer |
+    | Bob   | 30   | Designer |
     """
-    
+
     html_table = """
     <table>
-        <tr><th>姓名</th><th>年龄</th><th>职业</th></tr>
-        <tr><td>张三</td><td>25</td><td>工程师</td></tr>
-        <tr><td>李四</td><td>30</td><td>设计师</td></tr>
+        <tr><th>Name</th><th>Age</th><th>Occupation</th></tr>
+        <tr><td>Alice</td><td>25</td><td>Engineer</td></tr>
+        <tr><td>Bob</td><td>30</td><td>Designer</td></tr>
     </table>
     """
-    
+
     result = teds.calculate(markdown_table, html_table)
-    print(f"Markdown vs HTML 表格 TEDS 得分: {result.score:.4f}")
-    print(f"详细信息: {result.details}")
+    print(f"Markdown vs HTML table TEDS score: {result.score:.4f}")
+    print(f"Details: {result.details}")
     print()
-    
-    # 2. 复杂表格结构
-    print("**2. 复杂表格结构支持 (colspan, rowspan)**")
+
+    # 2. Complex table structure
+    print("**2. Complex Table Structure Support (colspan, rowspan)**")
     complex_table1 = """
     <table>
-        <tr><th colspan="2">学生信息</th></tr>
-        <tr><th>姓名</th><th>成绩</th></tr>
-        <tr><td>张三</td><td>95</td></tr>
-        <tr><td>李四</td><td>87</td></tr>
+        <tr><th colspan="2">Student Info</th></tr>
+        <tr><th>Name</th><th>Score</th></tr>
+        <tr><td>Alice</td><td>95</td></tr>
+        <tr><td>Bob</td><td>87</td></tr>
     </table>
     """
-    
+
     complex_table2 = """
     <table>
-        <tr><th>类别</th><th>详情</th></tr>
-        <tr><th>姓名</th><th>成绩</th></tr>
-        <tr><td>张三</td><td>95</td></tr>
-        <tr><td>李四</td><td>87</td></tr>
+        <tr><th>Category</th><th>Details</th></tr>
+        <tr><th>Name</th><th>Score</th></tr>
+        <tr><td>Alice</td><td>95</td></tr>
+        <tr><td>Bob</td><td>87</td></tr>
     </table>
     """
-    
+
     result = teds.calculate(complex_table1, complex_table2)
-    print(f"复杂表格结构 TEDS 得分: {result.score:.4f}")
-    print(f"编辑距离: {result.details.get('edit_distance')}")
-    print(f"节点数量: 预测={result.details.get('predicted_nodes')}, 真实={result.details.get('groundtruth_nodes')}")
+    print(f"Complex table structure TEDS score: {result.score:.4f}")
+    print(f"Edit distance: {result.details.get('edit_distance')}")
+    print(f"Node count: predicted={result.details.get('predicted_nodes')}, groundtruth={result.details.get('groundtruth_nodes')}")
     print()
-    
-    # 3. 结构化 vs 内容敏感评估
-    print("**3. 结构化 vs 内容敏感评估对比**")
+
+    # 3. Structure-only vs content-sensitive evaluation
+    print("**3. Structure-only vs Content-sensitive Evaluation Comparison**")
     content_teds = TEDSMetric("content_teds", {"structure_only": False})
     structure_teds = StructureTEDSMetric("structure_teds")
-    
+
     table_diff_content = """
     <table>
         <tr><th>A</th><th>B</th></tr>
-        <tr><td>数据1</td><td>数据2</td></tr>
+        <tr><td>Data1</td><td>Data2</td></tr>
     </table>
     """
-    
+
     table_same_structure = """
     <table>
         <tr><th>X</th><th>Y</th></tr>
-        <tr><td>值1</td><td>值2</td></tr>
+        <tr><td>Value1</td><td>Value2</td></tr>
     </table>
     """
-    
+
     content_result = content_teds.calculate(table_diff_content, table_same_structure)
     structure_result = structure_teds.calculate(table_diff_content, table_same_structure)
-    
-    print(f"内容敏感 TEDS 得分: {content_result.score:.4f}")
-    print(f"仅结构 S-TEDS 得分: {structure_result.score:.4f}")
-    print(f"说明: S-TEDS 忽略文本内容差异，只关注表格结构")
+
+    print(f"Content-sensitive TEDS score: {content_result.score:.4f}")
+    print(f"Structure-only S-TEDS score: {structure_result.score:.4f}")
+    print(f"Note: S-TEDS ignores text content differences and only focuses on table structure")
     print()
 
 
 def demo_evaluation_workflow():
-    """演示完整的评估工作流程"""
-    print("=== 📋 完整评估工作流程 ===\n")
-    
-    print("**步骤 1: 准备数据**")
-    # 模拟评估数据
+    """Demonstrate the complete evaluation workflow"""
+    print("=== Complete Evaluation Workflow ===\n")
+
+    print("**Step 1: Prepare data**")
+    # Simulated evaluation data
     sample_data = DataSample(
         id="sample_001",
         html="""
         <div>
-            <h1>产品价格表</h1>
+            <h1>Product Price List</h1>
             <table>
-                <tr><th>产品</th><th>价格</th><th>库存</th></tr>
-                <tr><td>iPhone</td><td>5999元</td><td>50</td></tr>
-                <tr><td>iPad</td><td>3999元</td><td>30</td></tr>
-                <tr><td>MacBook</td><td>12999元</td><td>10</td></tr>
+                <tr><th>Product</th><th>Price</th><th>Stock</th></tr>
+                <tr><td>iPhone</td><td>$599</td><td>50</td></tr>
+                <tr><td>iPad</td><td>$399</td><td>30</td></tr>
+                <tr><td>MacBook</td><td>$1299</td><td>10</td></tr>
             </table>
         </div>
         """,
-        content="产品价格表\n\n| 产品 | 价格 | 库存 |\n|------|------|------|\n| iPhone | 5999元 | 50 |\n| iPad | 3999元 | 30 |\n| MacBook | 12999元 | 10 |",
+        content="Product Price List\n\n| Product | Price | Stock |\n|------|------|------|\n| iPhone | $599 | 50 |\n| iPad | $399 | 30 |\n| MacBook | $1299 | 10 |",
         content_list=[
             {
                 "type": "title",
-                "content": "产品价格表"
+                "content": "Product Price List"
             },
             {
                 "type": "table",
-                "content": "| 产品 | 价格 | 库存 |\n|------|------|------|\n| iPhone | 5999元 | 50 |\n| iPad | 3999元 | 30 |\n| MacBook | 12999元 | 10 |"
+                "content": "| Product | Price | Stock |\n|------|------|------|\n| iPhone | $599 | 50 |\n| iPad | $399 | 30 |\n| MacBook | $1299 | 10 |"
             }
         ]
     )
-    print("✅ 数据准备完成")
-    
-    print("\n**步骤 2: 配置 TEDS 评估器**")
+    print("Data preparation complete")
+
+    print("\n**Step 2: Configure TEDS evaluator**")
     evaluation_config = {
         "metrics": {
             "overall": "edit_distance",
@@ -289,74 +290,74 @@ def demo_evaluation_workflow():
             }
         }
     }
-    
+
     evaluator = Evaluator(task_config=evaluation_config)
-    print("✅ 评估器配置完成")
-    
-    print("\n**步骤 3: 模拟抽取结果**")
-    # 模拟一个有轻微错误的抽取结果
+    print("Evaluator configuration complete")
+
+    print("\n**Step 3: Simulate extraction results**")
+    # Simulate extraction result with minor errors
     extraction_result = ExtractionResult(
         extractor_name="TestExtractor",
-        extracted_content="产品价格表\n\n| 产品 | 价格 |\n|------|------|\n| iPhone | 5999元 |\n| iPad | 3999元 |",  # 缺少库存列和MacBook行
+        extracted_content="Product Price List\n\n| Product | Price |\n|------|------|\n| iPhone | $599 |\n| iPad | $399 |",  # Missing stock column and MacBook row
         extracted_content_list=[
             {
-                "type": "title", 
-                "content": "产品价格表"
+                "type": "title",
+                "content": "Product Price List"
             },
             {
                 "type": "table",
-                "content": "| 产品 | 价格 |\n|------|------|\n| iPhone | 5999元 |\n| iPad | 3999元 |"
+                "content": "| Product | Price |\n|------|------|\n| iPhone | $599 |\n| iPad | $399 |"
             }
         ]
     )
-    print("✅ 模拟抽取结果生成")
-    
-    print("\n**步骤 4: 执行评估**")
+    print("Simulated extraction result generated")
+
+    print("\n**Step 4: Run evaluation**")
     evaluation_result = evaluator.evaluate_single(sample_data, extraction_result)
-    
-    print(f"📊 评估结果:")
-    print(f"  - 整体得分: {evaluation_result.overall_metrics.get('overall', 'N/A'):.4f}")
-    print(f"  - 表格抽取 (TEDS): {evaluation_result.overall_metrics.get('table_extraction', 'N/A'):.4f}")
-    print(f"  - 成功率: {evaluation_result.metadata.get('success_rate', 'N/A'):.2%}")
-    
-    # 显示详细的 TEDS 信息
+
+    print(f"Evaluation results:")
+    print(f"  - Overall score: {evaluation_result.overall_metrics.get('overall', 'N/A'):.4f}")
+    print(f"  - Table extraction (TEDS): {evaluation_result.overall_metrics.get('table_extraction', 'N/A'):.4f}")
+    print(f"  - Success rate: {evaluation_result.metadata.get('success_rate', 'N/A'):.2%}")
+
+    # Display detailed TEDS information
     if evaluation_result.detailed_metrics:
         for metric_name, metric_result in evaluation_result.detailed_metrics.items():
             if 'teds' in metric_name.lower():
-                print(f"\n🔍 {metric_name} 详细信息:")
+                print(f"\n{metric_name} details:")
                 details = metric_result.details
-                print(f"  - 算法: {details.get('algorithm', 'N/A')}")
-                print(f"  - 编辑距离: {details.get('edit_distance', 'N/A')}")
-                print(f"  - 节点数量 (预测/真实): {details.get('predicted_nodes', 'N/A')}/{details.get('groundtruth_nodes', 'N/A')}")
-    
-    print("\n✅ 评估完成")
+                print(f"  - Algorithm: {details.get('algorithm', 'N/A')}")
+                print(f"  - Edit distance: {details.get('edit_distance', 'N/A')}")
+                print(f"  - Node count (predicted/groundtruth): {details.get('predicted_nodes', 'N/A')}/{details.get('groundtruth_nodes', 'N/A')}")
+
+    print("\nEvaluation complete")
 
 
 if __name__ == "__main__":
-    print("🚀 WebMainBench TEDS 算法使用示例\n")
+    print("WebMainBench TEDS Algorithm Usage Example\n")
     print("=" * 60)
-    
+
     try:
         demo_teds_configuration()
         print("=" * 60)
-        
+
         demo_teds_comparison()
         print("=" * 60)
-        
+
         demo_advanced_teds_features()
         print("=" * 60)
-        
+
         demo_evaluation_workflow()
-        
-        print("\n🎉 所有演示完成！")
-        print("\n💡 要点总结:")
-        print("  1. TEDS 算法提供更学术严谨的表格评估")
-        print("  2. 支持 HTML、Markdown 等多种表格格式")
-        print("  3. 可配置结构化评估 (S-TEDS) 或内容敏感评估")
-        print("  4. 能够准确识别表格结构差异和内容差异")
-        print("  5. 与现有评估流程完全兼容")
-        
+
+        print("\nAll demos complete!")
+        print("\nKey takeaways:")
+        print("  1. TEDS algorithm provides more academically rigorous table evaluation")
+        print("  2. Supports multiple table formats including HTML and Markdown")
+        print("  3. Configurable structure-only evaluation (S-TEDS) or content-sensitive evaluation")
+        print("  4. Accurately identifies table structure differences and content differences")
+        print("  5. Fully compatible with existing evaluation workflows")
+
     except Exception as e:
-        print(f"\n❌ 演示过程中发生错误: {e}")
+        print(f"\nError during demo: {e}")
         import traceback
-        traceback.print_exc() 
\ No newline at end of file
+        traceback.print_exc()
\ No newline at end of file
diff --git a/examples/test_model.py b/examples/test_model.py
index 59b88aa..c7bc994 100644
--- a/examples/test_model.py
+++ b/examples/test_model.py
@@ -1,16 +1,16 @@
 from webmainbench import DataLoader, Evaluator, ExtractorFactory
 
-# 1. 加载评测数据集
+# 1. Load evaluation dataset
 dataset = DataLoader.load_jsonl("WebMainBench/data/WebMainBench_llm-webkit_v1_WebMainBench_dataset_merge_2549_llm_webkit.jsonl")
 
-# 2. 创建抽取器
+# 2. Create extractor
 extractor = ExtractorFactory.create("test-model")
 
-# 3. 运行评测
+# 3. Run evaluation
 evaluator = Evaluator()
 result = evaluator.evaluate(dataset, extractor)
 
-# 4. 查看结果
+# 4. View results
 print(f"Overall Score: {result.overall_metrics}")
 print(f"Category Metrics: {result.category_metrics}")
 print(f"Error Analysis: {result.error_analysis}")
diff --git a/examples/test_table_extract.py b/examples/test_table_extract.py
index 00b978c..102409d 100644
--- a/examples/test_table_extract.py
+++ b/examples/test_table_extract.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-脚本：仅提取 WebMainBench 数据集中的表格内容到 table.md
+Script: Extract only table content from the WebMainBench dataset into table.md
 """
 
 import json
@@ -8,52 +8,52 @@
 import os
 from pathlib import Path
 
-# 添加父目录到 sys.path 以便导入 webmainbench
+# Add parent directory to sys.path for importing webmainbench
 sys.path.append(str(Path(__file__).parent.parent))
 
 from webmainbench.metrics.base import BaseMetric
 
 def extract_only_tables_from_dataset():
-    """只提取 WebMainBench 数据集中的表格内容并输出到 table.md（table为空的不记录）"""
+    """Extract only table content from the WebMainBench dataset and output to table.md (items with empty tables are not recorded)"""
 
-    # 路径配置
+    # Path configuration
     dataset_path = "/home/zhangshuo/Desktop/vscodeworkspace/WebMainBench/data/WebMainBench_llm-webkit_v1_WebMainBench_dataset_merge_with_llm_webkit.jsonl"
     output_path = "table.md"
 
-    # 检查数据集文件是否存在
+    # Check if the dataset file exists
     if not os.path.exists(dataset_path):
-        print(f"错误：未找到数据集文件 {dataset_path}")
+        print(f"Error: dataset file not found: {dataset_path}")
         return
 
     extracted_tables = []
     line_ids = []
 
-    # 按行读取 JSONL 文件
+    # Read JSONL file line by line
     with open(dataset_path, 'r', encoding='utf-8') as f:
         for line_num, line in enumerate(f, 1):
             try:
                 data = json.loads(line.strip())
 
-                # 提取ID和内容
+                # Extract ID and content
                 item_id = data.get('track_id', f'line_{line_num}')
                 content = data.get('llm_webkit_md', '')
 
-                # 使用 _extract_from_markdown 提取
+                # Use _extract_from_markdown to extract
                 if content:
                     extracted = BaseMetric._extract_from_markdown(content)
                     table_content = extracted.get("table", "")
-                    # 只记录table不为空的项
+                    # Only record items with non-empty table
                     if table_content and table_content.strip():
                         extracted_tables.append(table_content)
                         line_ids.append((item_id, line_num))
             except json.JSONDecodeError as e:
-                print(f"解析JSON出错，行{line_num}: {e}")
+                print(f"JSON parse error at line {line_num}: {e}")
                 continue
             except Exception as e:
-                print(f"处理第{line_num}行时出错: {e}")
+                print(f"Error processing line {line_num}: {e}")
                 continue
 
-    # 写入 table.md 文件，只输出 table 字段
+    # Write to table.md, output only the table field
     with open(output_path, 'w', encoding='utf-8') as f:
         f.write("# Extracted Table Content from WebMainBench Dataset\n\n")
         f.write(f"Total items processed: {len(extracted_tables)}\n\n")
@@ -68,8 +68,8 @@ def extract_only_tables_from_dataset():
             f.write("\n```\n\n")
             f.write("---\n\n")
 
-    print(f"表格提取完成！共处理 {len(extracted_tables)} 条数据。")
-    print(f"表格内容已保存到: {output_path}")
+    print(f"Table extraction complete! Processed {len(extracted_tables)} items.")
+    print(f"Table content saved to: {output_path}")
 
 if __name__ == "__main__":
     extract_only_tables_from_dataset()
diff --git a/examples/trafilatura_extract_demo.py b/examples/trafilatura_extract_demo.py
index 1ee9f3c..f031306 100644
--- a/examples/trafilatura_extract_demo.py
+++ b/examples/trafilatura_extract_demo.py
@@ -1,20 +1,20 @@
 import time
 from webmainbench.extractors import ExtractorFactory
 
-# 配置 Trafilatura 抽取器（这里可根据需要添加更多配置）
+# Configure Trafilatura extractor (add more configuration as needed)
 config = {}
 
 try:
-    # 创建 Trafilatura 抽取器实例
+    # Create Trafilatura extractor instance
     extractor = ExtractorFactory.create("trafilatura", config=config)
-    print(f"✅ Extractor创建成功: {extractor.description}")
-    print(f"📋 版本: {extractor.version}")
-    print(f"⚙️ 配置: {extractor.get_config()}\n")
+    print(f"✅ Extractor created successfully: {extractor.description}")
+    print(f"📋 Version: {extractor.version}")
+    print(f"⚙️ Config: {extractor.get_config()}\n")
 except Exception as e:
-    print(f"❌ Extractor创建失败: {e}")
+    print(f"❌ Failed to create extractor: {e}")
 
 
-# 测试 HTML
+# Test HTML
 test_html = """
 <html><head>
 <style id="cc-extraStyle" name="cc">
@@ -2835,39 +2835,39 @@
 });</script><iframe data-anno-uid="anno-uid-s9kz8jbpth" name="goog_topics_frame" src="https://securepubads.g.doubleclick.net/static/topics/topics_frame.html" style="display: none;"></iframe></body></html>
 """
 
-print("🔍 开始内容提取...")
+print("🔍 Starting content extraction...")
 start_time = time.time()
 
 try:
     result = extractor.extract(test_html)
     end_time = time.time()
 
-    print(f"⏱️ 提取耗时: {end_time - start_time:.2f}秒\n")
+    print(f"⏱️ Extraction time: {end_time - start_time:.2f}s\n")
 
-    # 显示提取结果
+    # Display extraction results
     if result.success:
-        print("✅ 内容提取成功！\n")
+        print("✅ Content extracted successfully!\n")
 
-        print("📄 提取的主要内容:")
+        print("📄 Extracted main content:")
         print("=" * 50)
         print(result.content[:500] + "..." if len(result.content) > 500 else result.content)
         print("=" * 50)
 
-        print(f"\n📊 提取统计:")
-        print(f"  • 内容长度: {len(result.content)} 字符")
-        print(f"  • 标题: {result.title}")
-        print(f"  • 语言: {result.language}")
-        print(f"  • 提取时间: {result.extraction_time:.3f}秒")
+        print(f"\n📊 Extraction statistics:")
+        print(f"  • Content length: {len(result.content)} characters")
+        print(f"  • Title: {result.title}")
+        print(f"  • Language: {result.language}")
+        print(f"  • Extraction time: {result.extraction_time:.3f}s")
 
         if result.content_list:
-            print(f"  • 结构化内容块: {len(result.content_list)}个")
-            for i, item in enumerate(result.content_list[:3]):  # 显示前3个
+        print(f"  • Structured content blocks: {len(result.content_list)}")
+            for i, item in enumerate(result.content_list[:3]):  # Show first 3
                 print(f"    [{i + 1}] {item.get('type', 'unknown')}: {item.get('content', '')[:50]}...")
     else:
-        print("❌ 内容提取失败")
-        print(f"错误信息: {result.error_message}")
+        print("❌ Content extraction failed")
+        print(f"Error message: {result.error_message}")
         if result.error_traceback:
-            print(f"错误详情:\n{result.error_traceback}")
+            print(f"Error details:\n{result.error_traceback}")
 
 except Exception as e:
-    print(f"❌ 提取过程中发生异常: {e}")
\ No newline at end of file
+    print(f"❌ Exception during extraction: {e}")
diff --git a/requirements.txt b/requirements.txt
index feaf240..cdecdfe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,4 +13,5 @@ streamlit
 markdown
 jieba
 apted
-openai
\ No newline at end of file
+openai
+dotenv
\ No newline at end of file
diff --git a/scripts/diff_jsonl.py b/scripts/diff_jsonl.py
index 77003a3..242e995 100644
--- a/scripts/diff_jsonl.py
+++ b/scripts/diff_jsonl.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-比较两个JSONL文件，找出track_id在文件1中存在但在文件2中不存在的数据
+Compare two JSONL files and find records whose track_id exists in file 1 but not in file 2
 """
 import json
 import sys
@@ -8,22 +8,22 @@
 
 def load_track_ids(jsonl_file):
     """
-    从JSONL文件中加载所有track_id
-    
+    Load all track_ids from a JSONL file
+
     Args:
-        jsonl_file: JSONL文件路径
-        
+        jsonl_file: Path to the JSONL file
+
     Returns:
-        set: track_id集合
+        set: Set of track_ids
     """
     track_ids = set()
     file_path = Path(jsonl_file)
     
     if not file_path.exists():
-        print(f"❌ 文件不存在: {file_path}")
+        print(f"❌ File does not exist: {file_path}")
         return track_ids
     
-    print(f"📖 正在读取文件: {file_path.name}")
+    print(f"📖 Reading file: {file_path.name}")
     
     line_count = 0
     try:
@@ -35,9 +35,9 @@ def load_track_ids(jsonl_file):
                     
                 line_count += 1
                 
-                # 每处理1000行显示进度
+                # Show progress every 1000 lines processed
                 if line_count % 1000 == 0:
-                    print(f"  📊 已处理 {line_count} 行...")
+                    print(f"  📊 Processed {line_count} lines...")
                 
                 try:
                     data = json.loads(line)
@@ -47,35 +47,35 @@ def load_track_ids(jsonl_file):
                         track_ids.add(track_id)
                         
                 except json.JSONDecodeError as e:
-                    print(f"  ⚠️ 第 {line_num} 行JSON解析错误: {e}")
+                    print(f"  ⚠️ Line {line_num} JSON parse error: {e}")
                     continue
-                    
+
     except Exception as e:
-        print(f"❌ 读取文件时出错: {e}")
+        print(f"❌ Error reading file: {e}")
         return set()
-    
-    print(f"  ✅ 共找到 {len(track_ids)} 个唯一track_id")
+
+    print(f"  ✅ Found {len(track_ids)} unique track_ids")
     return track_ids
 
 def load_data_with_track_ids(jsonl_file, target_track_ids):
     """
-    从JSONL文件中加载指定track_id的数据
-    
+    Load records with specified track_ids from a JSONL file
+
     Args:
-        jsonl_file: JSONL文件路径
-        target_track_ids: 目标track_id集合
-        
+        jsonl_file: Path to the JSONL file
+        target_track_ids: Set of target track_ids
+
     Returns:
-        list: 匹配的数据列表
+        list: List of matching records
     """
     matched_data = []
     file_path = Path(jsonl_file)
     
     if not file_path.exists():
-        print(f"❌ 文件不存在: {file_path}")
+        print(f"❌ File does not exist: {file_path}")
         return matched_data
     
-    print(f"📖 正在从 {file_path.name} 中提取目标数据...")
+    print(f"📖 Extracting target records from {file_path.name}...")
     
     line_count = 0
     found_count = 0
@@ -89,9 +89,9 @@ def load_data_with_track_ids(jsonl_file, target_track_ids):
                     
                 line_count += 1
                 
-                # 每处理1000行显示进度
+                # Show progress every 1000 lines processed
                 if line_count % 1000 == 0:
-                    print(f"  📊 已处理 {line_count} 行，找到 {found_count} 条目标数据...")
+                    print(f"  📊 Processed {line_count} lines, found {found_count} target records...")
                 
                 try:
                     data = json.loads(line)
@@ -102,23 +102,23 @@ def load_data_with_track_ids(jsonl_file, target_track_ids):
                         found_count += 1
                         
                 except json.JSONDecodeError as e:
-                    print(f"  ⚠️ 第 {line_num} 行JSON解析错误: {e}")
+                    print(f"  ⚠️ Line {line_num} JSON parse error: {e}")
                     continue
-                    
+
     except Exception as e:
-        print(f"❌ 读取文件时出错: {e}")
+        print(f"❌ Error reading file: {e}")
         return []
-    
-    print(f"  ✅ 共找到 {len(matched_data)} 条目标数据")
+
+    print(f"  ✅ Found {len(matched_data)} target records")
     return matched_data
 
 def main():
-    """主函数"""
-    # 默认输入文件
+    """Main function"""
+    # Default input files
     file1_default = "data/filtered_normal_data_1883.jsonl"
     file2_default = "data/WebMainBench_1827_v1_WebMainBench_dataset_merge_with_llm_webkit.jsonl"
     
-    # 检查命令行参数
+    # Check command-line arguments
     if len(sys.argv) >= 3:
         file1 = sys.argv[1]
         file2 = sys.argv[2]
@@ -127,61 +127,61 @@ def main():
         file2 = file2_default
     
     print("=" * 80)
-    print("🔍 比较JSONL文件中的track_id差异")
+    print("🔍 Compare track_id differences between JSONL files")
     print("=" * 80)
-    print(f"📁 文件1 (源文件): {file1}")
-    print(f"📁 文件2 (对比文件): {file2}")
-    print(f"🎯 目标: 找出在文件1中存在但在文件2中不存在的track_id数据")
+    print(f"📁 File 1 (source): {file1}")
+    print(f"📁 File 2 (comparison): {file2}")
+    print(f"🎯 Goal: Find track_ids that exist in file 1 but not in file 2")
     print()
     
-    # 步骤1: 加载文件1的所有track_id
-    print("🔸 步骤1: 加载文件1的track_id...")
+    # Step 1: Load all track_ids from file 1
+    print("🔸 Step 1: Loading track_ids from file 1...")
     track_ids_file1 = load_track_ids(file1)
     
     if not track_ids_file1:
-        print("❌ 文件1中没有找到有效的track_id")
+        print("❌ No valid track_ids found in file 1")
         return
     
     print()
     
-    # 步骤2: 加载文件2的所有track_id
-    print("🔸 步骤2: 加载文件2的track_id...")
+    # Step 2: Load all track_ids from file 2
+    print("🔸 Step 2: Loading track_ids from file 2...")
     track_ids_file2 = load_track_ids(file2)
     
     if not track_ids_file2:
-        print("❌ 文件2中没有找到有效的track_id")
+        print("❌ No valid track_ids found in file 2")
         return
     
     print()
     
-    # 步骤3: 计算差集
-    print("🔸 步骤3: 计算差集...")
+    # Step 3: Compute difference
+    print("🔸 Step 3: Computing difference...")
     diff_track_ids = track_ids_file1 - track_ids_file2
     common_track_ids = track_ids_file1 & track_ids_file2
     
-    print(f"  📊 文件1中的track_id数量: {len(track_ids_file1):,}")
-    print(f"  📊 文件2中的track_id数量: {len(track_ids_file2):,}")
-    print(f"  📊 共同的track_id数量: {len(common_track_ids):,}")
-    print(f"  ⭐ 差异的track_id数量: {len(diff_track_ids):,}")
+    print(f"  📊 track_id count in file 1: {len(track_ids_file1):,}")
+    print(f"  📊 track_id count in file 2: {len(track_ids_file2):,}")
+    print(f"  📊 Common track_id count: {len(common_track_ids):,}")
+    print(f"  ⭐ Different track_id count: {len(diff_track_ids):,}")
     
     if not diff_track_ids:
-        print("\n🎉 没有发现差异！文件1中的所有track_id在文件2中都存在。")
+        print("\n🎉 No differences found! All track_ids in file 1 exist in file 2.")
         return
     
     print()
     
-    # 步骤4: 提取差异数据
-    print("🔸 步骤4: 提取差异数据...")
+    # Step 4: Extract different records
+    print("🔸 Step 4: Extracting different records...")
     diff_data = load_data_with_track_ids(file1, diff_track_ids)
     
     if not diff_data:
-        print("❌ 没有找到差异数据")
+        print("❌ No different records found")
         return
     
     print()
     
-    # 步骤5: 保存结果
-    print("🔸 步骤5: 保存差异数据...")
+    # Step 5: Save results
+    print("🔸 Step 5: Saving differential data...")
     output_file = "data/track_id_diff_result.jsonl"
     
     try:
@@ -189,22 +189,22 @@ def main():
             for data in diff_data:
                 f.write(json.dumps(data, ensure_ascii=False) + '\n')
         
-        print(f"✅ 已保存 {len(diff_data)} 条差异数据到: {output_file}")
+        print(f"✅ Saved {len(diff_data)} differential records to: {output_file}")
         
-        # 显示前几个差异的track_id作为示例
-        print(f"\n📋 差异track_id示例 (前10个):")
+        # Display first few different track_ids as examples
+        print(f"\n📋 Sample differential track_ids (first 10):")
         for i, track_id in enumerate(list(diff_track_ids)[:10], 1):
             print(f"  {i}. {track_id}")
         
         if len(diff_track_ids) > 10:
-            print(f"  ... 还有 {len(diff_track_ids) - 10} 个")
+            print(f"  ... and {len(diff_track_ids) - 10} more")
             
     except Exception as e:
-        print(f"❌ 保存文件时出错: {e}")
+        print(f"❌ Error saving file: {e}")
         return
     
     print("\n" + "=" * 80)
-    print("🎉 比较完成!")
+    print("🎉 Comparison complete!")
     print("=" * 80)
 
 if __name__ == "__main__":
diff --git a/scripts/domain_stats.py b/scripts/domain_stats.py
old mode 100755
new mode 100644
index 68b2ca8..2a394f2
--- a/scripts/domain_stats.py
+++ b/scripts/domain_stats.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
 """
-域名分布统计脚本
-统计数据集中的域名分布、TLD分布、以及域名与meta信息的关系
+Domain distribution statistics script
+Analyzes domain distribution, TLD distribution, and the relationship between domains and meta information in the dataset
 
-使用方法:
+Usage:
     python scripts/domain_stats.py data/WebMainBench_7887_with_meta.jsonl
 """
 
@@ -16,7 +16,7 @@
 
 
 class DomainStatsAnalyzer:
-    """域名统计分析器"""
+    """Domain statistics analyzer"""
     
     def __init__(self, input_file):
         self.input_file = input_file
@@ -34,17 +34,17 @@ def __init__(self, input_file):
         })
         
     def load_data(self):
-        """加载JSONL数据"""
-        print(f"加载数据: {self.input_file}")
+        """Load JSONL data."""
+        print(f"Loading data: {self.input_file}")
         with open(self.input_file, 'r', encoding='utf-8') as f:
             for line in f:
                 if line.strip():
                     item = json.loads(line)
                     self.data.append(item)
-        print(f"已加载 {len(self.data)} 条数据\n")
+print(f"Loaded {len(self.data)} records\n")
     
     def extract_domain(self, url):
-        """从URL提取域名"""
+        """Extract domain from URL."""
         try:
             parsed = urlparse(url)
             return parsed.netloc.lower()
@@ -52,15 +52,15 @@ def extract_domain(self, url):
             return None
     
     def extract_tld(self, domain):
-        """提取顶级域名"""
+        """Extract top-level domain."""
         if domain and '.' in domain:
             return domain.split('.')[-1]
         return None
     
     def analyze(self):
-        """执行分析"""
+        """Run analysis."""
         print("="*60)
-        print("开始分析域名分布")
+        print("Starting domain distribution analysis")
         print("="*60 + "\n")
         
         for item in self.data:
@@ -70,15 +70,15 @@ def analyze(self):
             if not domain:
                 continue
             
-            # 统计域名
+            # Count domains
             self.domain_counter[domain] += 1
             
-            # 统计TLD
+            # Count TLDs
             tld = self.extract_tld(domain)
             if tld:
                 self.tld_counter[tld] += 1
             
-            # 统计域名的meta信息
+            # Count meta information for domains
             meta = item.get('meta', {})
             self.domain_meta[domain]['count'] += 1
             self.domain_meta[domain]['languages'][meta.get('language', 'unknown')] += 1
@@ -93,39 +93,39 @@ def analyze(self):
                 self.domain_meta[domain]['has_equation'] += 1
     
     def print_basic_stats(self):
-        """打印基本统计信息"""
+        """Print basic statistics."""
         print("="*60)
-        print("1. 基本统计")
+        print("1. Basic Statistics")
         print("="*60)
         
         total_samples = len(self.data)
         unique_domains = len(self.domain_counter)
         
-        print(f"总样本数: {total_samples}")
-        print(f"独立域名数: {unique_domains}")
-        print(f"平均每域名样本数: {total_samples/unique_domains:.2f}")
+print(f"Total samples: {total_samples}")
+print(f"Unique domains: {unique_domains}")
+print(f"Average samples per domain: {total_samples/unique_domains:.2f}")
         
-        # 域名样本数分布
+        # Domain sample count distribution
         samples_per_domain = list(self.domain_counter.values())
-        print(f"\n每域名样本数统计:")
-        print(f"  最小值: {min(samples_per_domain)}")
-        print(f"  最大值: {max(samples_per_domain)}")
-        print(f"  平均值: {sum(samples_per_domain)/len(samples_per_domain):.2f}")
-        print(f"  中位数: {sorted(samples_per_domain)[len(samples_per_domain)//2]}")
+print(f"\nSamples per domain statistics:")
+print(f"  Min: {min(samples_per_domain)}")
+print(f"  Max: {max(samples_per_domain)}")
+print(f"  Mean: {sum(samples_per_domain)/len(samples_per_domain):.2f}")
+print(f"  Median: {sorted(samples_per_domain)[len(samples_per_domain)//2]}")
         
-        # 单样本域名占比
+        # Single-sample domain ratio
         single_sample_domains = sum(1 for count in samples_per_domain if count == 1)
-        print(f"\n只有1个样本的域名: {single_sample_domains} ({single_sample_domains/unique_domains*100:.1f}%)")
+print(f"\nDomains with only 1 sample: {single_sample_domains} ({single_sample_domains/unique_domains*100:.1f}%)")
         print()
     
     def print_tld_distribution(self):
-        """打印TLD分布"""
+        """Print TLD distribution."""
         print("="*60)
-        print("2. 顶级域名(TLD)分布")
+        print("2. Top-Level Domain (TLD) Distribution")
         print("="*60)
         
         total = sum(self.tld_counter.values())
-        print(f"\n总共 {len(self.tld_counter)} 种TLD\n")
+print(f"\nTotal of {len(self.tld_counter)} TLD types\n")
         
         for tld, count in self.tld_counter.most_common(20):
             percentage = count / total * 100
@@ -133,13 +133,13 @@ def print_tld_distribution(self):
             print(f"  .{tld:15} {count:5} ({percentage:5.2f}%) {bar}")
         
         if len(self.tld_counter) > 20:
-            print(f"\n  ... 还有 {len(self.tld_counter) - 20} 种TLD")
+            print(f"\n  ... and {len(self.tld_counter) - 20} more TLD types")
         print()
     
     def print_top_domains(self, n=30):
-        """打印热门域名"""
+        """Print top domains."""
         print("="*60)
-        print(f"3. Top {n} 域名")
+        print(f"3. Top {n} Domains")
         print("="*60 + "\n")
         
         total = sum(self.domain_counter.values())
@@ -150,39 +150,39 @@ def print_top_domains(self, n=30):
         print()
     
     def print_domain_meta_analysis(self):
-        """打印域名的meta信息分析"""
+        """Print domain meta information analysis."""
         print("="*60)
-        print("4. 域名与内容类型分析")
+        print("4. Domain and Content Type Analysis")
         print("="*60 + "\n")
         
-        # 找出包含特殊内容最多的域名
+        # Find domains with the most special content
         domains_with_table = [(d, info['has_table']) for d, info in self.domain_meta.items() if info['has_table'] > 0]
         domains_with_code = [(d, info['has_code']) for d, info in self.domain_meta.items() if info['has_code'] > 0]
         domains_with_equation = [(d, info['has_equation']) for d, info in self.domain_meta.items() if info['has_equation'] > 0]
         
-        print(f"包含表格的域名数: {len(domains_with_table)}")
+print(f"Domains with tables: {len(domains_with_table)}")
         if domains_with_table:
-            print("  Top 10 域名(按表格数量):")
+print("  Top 10 domains (by table count):")
             for domain, count in sorted(domains_with_table, key=lambda x: -x[1])[:10]:
-                print(f"    {domain:50} {count} 个样本")
+print(f"    {domain:50} {count} samples")
         
-        print(f"\n包含代码的域名数: {len(domains_with_code)}")
+print(f"\nDomains with code: {len(domains_with_code)}")
         if domains_with_code:
-            print("  Top 10 域名(按代码数量):")
+print("  Top 10 domains (by code count):")
             for domain, count in sorted(domains_with_code, key=lambda x: -x[1])[:10]:
-                print(f"    {domain:50} {count} 个样本")
+print(f"    {domain:50} {count} samples")
         
-        print(f"\n包含公式的域名数: {len(domains_with_equation)}")
+print(f"\nDomains with equations: {len(domains_with_equation)}")
         if domains_with_equation:
-            print("  Top 10 域名(按公式数量):")
+print("  Top 10 domains (by equation count):")
             for domain, count in sorted(domains_with_equation, key=lambda x: -x[1])[:10]:
-                print(f"    {domain:50} {count} 个样本")
+print(f"    {domain:50} {count} samples")
         print()
     
     def print_language_by_tld(self):
-        """打印TLD的语言分布"""
+        """Print language distribution by TLD."""
         print("="*60)
-        print("5. TLD与语言分布")
+        print("5. TLD and Language Distribution")
         print("="*60 + "\n")
         
         tld_languages = defaultdict(Counter)
@@ -193,22 +193,22 @@ def print_language_by_tld(self):
                 for lang, count in info['languages'].items():
                     tld_languages[tld][lang] += count
         
-        # 显示主要TLD的语言分布
+        # Display language distribution for major TLDs
         for tld, lang_counter in sorted(tld_languages.items(), 
                                        key=lambda x: sum(x[1].values()), 
                                        reverse=True)[:10]:
             total = sum(lang_counter.values())
-            print(f".{tld} (共{total}个样本):")
+print(f".{tld} ({total} samples total):")
             for lang, count in lang_counter.most_common(3):
                 print(f"  {lang:15} {count:4} ({count/total*100:5.1f}%)")
             print()
     
     def save_reports(self, output_dir='results'):
-        """保存统计报告"""
+        """Save statistics reports."""
         output_path = Path(output_dir)
         output_path.mkdir(parents=True, exist_ok=True)
         
-        # 保存域名统计CSV
+        # Save domain statistics CSV
         domain_csv = output_path / 'domain_statistics.csv'
         with open(domain_csv, 'w', encoding='utf-8', newline='') as f:
             writer = csv.writer(f)
@@ -237,9 +237,9 @@ def save_reports(self, output_dir='results'):
                     info['has_equation']
                 ])
         
-        print(f"✓ 域名统计CSV已保存: {domain_csv}")
+print(f"✓ Domain statistics CSV saved: {domain_csv}")
         
-        # 保存TLD统计CSV
+        # Save TLD statistics CSV
         tld_csv = output_path / 'tld_statistics.csv'
         with open(tld_csv, 'w', encoding='utf-8', newline='') as f:
             writer = csv.writer(f)
@@ -253,17 +253,17 @@ def save_reports(self, output_dir='results'):
                     f"{count/total*100:.2f}%"
                 ])
         
-        print(f"✓ TLD统计CSV已保存: {tld_csv}")
+print(f"✓ TLD statistics CSV saved: {tld_csv}")
         
-        # 保存域名列表
+        # Save domain list
         domains_txt = output_path / 'unique_domains.txt'
         with open(domains_txt, 'w', encoding='utf-8') as f:
             for domain in sorted(self.domain_counter.keys()):
                 f.write(f"{domain}\n")
         
-        print(f"✓ 域名列表已保存: {domains_txt}")
+print(f"✓ Domain list saved: {domains_txt}")
         
-        # 保存JSON格式的详细统计
+        # Save detailed statistics in JSON format
         stats_json = output_path / 'domain_stats.json'
         stats_data = {
             'summary': {
@@ -278,11 +278,11 @@ def save_reports(self, output_dir='results'):
         with open(stats_json, 'w', encoding='utf-8') as f:
             json.dump(stats_data, f, indent=2, ensure_ascii=False)
         
-        print(f"✓ JSON统计已保存: {stats_json}")
+print(f"✓ JSON statistics saved: {stats_json}")
         print()
     
     def run(self, output_dir='results'):
-        """执行完整分析流程"""
+        """Execute full analysis pipeline."""
         self.load_data()
         self.analyze()
         self.print_basic_stats()
@@ -292,21 +292,21 @@ def run(self, output_dir='results'):
         self.print_language_by_tld()
         
         print("="*60)
-        print("保存统计报告")
+        print("Saving Statistics Reports")
         print("="*60 + "\n")
         self.save_reports(output_dir)
         
         print("="*60)
-        print("域名统计分析完成！")
+        print("Domain statistics analysis complete!")
         print("="*60 + "\n")
 
 
 def main():
     parser = argparse.ArgumentParser(
-        description='统计数据集中的域名分布信息',
+        description='Analyze domain distribution information in the dataset',
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog='''
-示例:
+Examples:
   python scripts/domain_stats.py data/WebMainBench_7887_with_meta.jsonl
   
   python scripts/domain_stats.py data/WebMainBench_7887_with_meta.jsonl \\
@@ -316,23 +316,23 @@ def main():
     
     parser.add_argument(
         'input_file',
-        help='输入的JSONL文件路径'
+        help='Path to the input JSONL file'
     )
     
     parser.add_argument(
         '--output-dir',
         default='results',
-        help='输出目录 (默认: results)'
+        help='Output directory (default: results)'
     )
     
     args = parser.parse_args()
     
-    # 检查文件是否存在
+    # Check if file exists
     if not Path(args.input_file).exists():
-        print(f"错误: 文件不存在: {args.input_file}")
+print(f"Error: File does not exist: {args.input_file}")
         return 1
     
-    # 执行分析
+    # Run analysis
     analyzer = DomainStatsAnalyzer(args.input_file)
     analyzer.run(args.output_dir)
     
diff --git a/scripts/filter_normal_jsonl.py b/scripts/filter_normal_jsonl.py
index 20291ac..b0d21e8 100644
--- a/scripts/filter_normal_jsonl.py
+++ b/scripts/filter_normal_jsonl.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-过滤JSONL文件中marked_type为normal的数据
+Filter records with marked_type equal to 'normal' from a JSONL file
 """
 import json
 import sys
@@ -8,25 +8,25 @@
 
 def filter_normal_data(input_file):
     """
-    过滤marked_type为normal的数据
-    
+    Filter records where marked_type is 'normal'
+
     Args:
-        input_file: 输入的JSONL文件路径
-        
+        input_file: Path to the input JSONL file
+
     Returns:
         tuple: (normal_data_list, total_count, normal_count)
     """
     input_path = Path(input_file)
     
     if not input_path.exists():
-        print(f"❌ 文件不存在: {input_path}")
+        print(f"❌ File does not exist: {input_path}")
         return [], 0, 0
     
     normal_data = []
     total_count = 0
     normal_count = 0
     
-    print(f"📖 正在读取文件: {input_path}")
+    print(f"📖 Reading file: {input_path}")
     
     try:
         with open(input_path, 'r', encoding='utf-8') as f:
@@ -37,9 +37,9 @@ def filter_normal_data(input_file):
                     
                 total_count += 1
                 
-                # 每处理1000行显示进度
+                # Show progress every 1000 lines processed
                 if total_count % 1000 == 0:
-                    print(f"📊 已处理 {total_count} 行...")
+                    print(f"📊 Processed {total_count} lines...")
                 
                 try:
                     data = json.loads(line)
@@ -50,55 +50,55 @@ def filter_normal_data(input_file):
                         normal_data.append(data)
                         
                 except json.JSONDecodeError as e:
-                    print(f"⚠️ 第 {line_num} 行JSON解析错误: {e}")
+                    print(f"⚠️ Line {line_num} JSON parse error: {e}")
                     continue
                     
     except Exception as e:
-        print(f"❌ 读取文件时出错: {e}")
+        print(f"❌ Error reading file: {e}")
         return [], 0, 0
     
     return normal_data, total_count, normal_count
 
 def main():
-    """主函数"""
-    # 默认输入文件
+    """Main function"""
+    # Default input file
     default_input = "data/WebMainBench_dataset_merge_2549_llm_webkit.jsonl"
     
-    # 检查命令行参数
+    # Check command-line arguments
     if len(sys.argv) > 1:
         input_file = sys.argv[1]
     else:
         input_file = default_input
     
     print("=" * 60)
-    print("🔍 过滤 marked_type 为 'normal' 的数据")
+    print("🔍 Filter records with marked_type equal to 'normal'")
     print("=" * 60)
     
-    # 执行过滤
+    # Perform filtering
     normal_data, total_count, normal_count = filter_normal_data(input_file)
-    
-    # 输出统计结果
+
+    # Output statistics
     print("\n" + "=" * 60)
-    print("📊 统计结果")
+    print("📊 Statistics")
     print("=" * 60)
-    print(f"📁 输入文件: {input_file}")
-    print(f"📄 总数据条数: {total_count:,}")
-    print(f"✅ normal类型数据: {normal_count:,}")
+    print(f"📁 Input file: {input_file}")
+    print(f"📄 Total records: {total_count:,}")
+    print(f"✅ Normal-type records: {normal_count:,}")
     
     if total_count > 0:
         percentage = (normal_count / total_count) * 100
-        print(f"📈 normal类型占比: {percentage:.2f}%")
+        print(f"📈 Normal-type percentage: {percentage:.2f}%")
     
-    # 显示其他统计信息
+    # Display other statistics
     other_count = total_count - normal_count
     if other_count > 0:
         other_percentage = (other_count / total_count) * 100
-        print(f"📊 其他类型数据: {other_count:,} ({other_percentage:.2f}%)")
-    
-    # 询问是否保存过滤结果
+        print(f"📊 Other-type records: {other_count:,} ({other_percentage:.2f}%)")
+
+    # Ask whether to save the filtered results
     if normal_count > 0:
-        print(f"\n💾 是否保存过滤结果? 将保存到 filtered_normal_data.jsonl")
-        user_input = input("输入 'y' 保存，其他键跳过: ").strip().lower()
+        print(f"\n💾 Save filtered results? Will be saved to filtered_normal_data.jsonl")
+        user_input = input("Enter 'y' to save, any other key to skip: ").strip().lower()
         
         if user_input == 'y':
             output_file = "filtered_normal_data.jsonl"
@@ -106,11 +106,11 @@ def main():
                 with open(output_file, 'w', encoding='utf-8') as f:
                     for data in normal_data:
                         f.write(json.dumps(data, ensure_ascii=False) + '\n')
-                print(f"✅ 已保存 {normal_count} 条normal类型数据到: {output_file}")
+                print(f"✅ Saved {normal_count} normal-type records to: {output_file}")
             except Exception as e:
-                print(f"❌ 保存文件时出错: {e}")
+                print(f"❌ Error saving file: {e}")
     
-    print("\n🎉 处理完成!")
+    print("\n🎉 Processing complete!")
 
 if __name__ == "__main__":
     main()
diff --git a/scripts/language_classify.py b/scripts/language_classify.py
index 1d16b8d..bd503c7 100644
--- a/scripts/language_classify.py
+++ b/scripts/language_classify.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-语言分类工具
-用于为数据集中的文本内容添加语言标签（ISO 639-1 标准）
+Language classification tool
+Adds language labels (ISO 639-1 standard) to text content in the dataset
 """
 
 import json
@@ -15,16 +15,16 @@
 import time
 
 class LanguageClassifier:
-    """语言分类器"""
+    """Language classifier."""
     
     def __init__(self, api_key: Optional[str] = None, model: str = "gpt-5", base_url: str = "https://api.deepseek.com/v1"):
         """
-        初始化语言分类器
+        Initialize language classifier.
         
         Args:
-            api_key: OpenAI API密钥（如果使用LLM）
-            model: 使用的模型名称
-            base_url: 模型请求的基础URL地址
+            api_key: OpenAI API key (if using LLM)
+            model: Model name to use
+            base_url: Base URL for model requests
         """
         self.api_key = api_key
         self.model = model
@@ -32,9 +32,9 @@ def __init__(self, api_key: Optional[str] = None, model: str = "gpt-5", base_url
         self.supported_languages = self._get_supported_languages()
         
     def _get_supported_languages(self) -> Dict[str, str]:
-        """获取支持的语言列表（ISO 639-1 标准）"""
+        """Get the list of supported languages (ISO 639-1 standard)."""
         return {
-            # 主要语言
+            # Major languages
             'en': 'English',
             'zh': 'Chinese',
             'es': 'Spanish', 
@@ -125,15 +125,15 @@ def _get_supported_languages(self) -> Dict[str, str]:
     
     def get_language_detection_prompt(self, text: str) -> str:
         """
-        生成语言检测的prompt
+        Generate language detection prompt.
         
         Args:
-            text: 需要检测语言的文本
+            text: Text to detect language for
             
         Returns:
-            格式化的prompt
+            Formatted prompt
         """
-        # 构建支持的语言列表字符串
+        # Build supported language list string
         lang_list = ", ".join([f"{code} ({name})" for code, name in sorted(self.supported_languages.items())])
         
         prompt = f"""Please identify the primary language of the following text and return ONLY the ISO 639-1 two-letter language code.
@@ -159,13 +159,13 @@ def get_language_detection_prompt(self, text: str) -> str:
     
     def detect_language_llm(self, text: str) -> str:
         """
-        使用LLM进行语言检测
+        Detect language using LLM.
         
         Args:
-            text: 需要检测的文本
+            text: Text to detect
             
         Returns:
-            ISO 639-1 语言代码
+            ISO 639-1 language code
         """
         try:
             from openai import OpenAI
@@ -173,7 +173,7 @@ def detect_language_llm(self, text: str) -> str:
             if not self.api_key:
                 raise ValueError("API key is required for LLM detection")
             
-            # 配置OpenAI客户端
+            # Configure OpenAI client
             client = OpenAI(
                 base_url = self.base_url,
                 api_key = self.api_key
@@ -191,47 +191,47 @@ def detect_language_llm(self, text: str) -> str:
             
             result = response.choices[0].message.content.strip().lower()
             
-            # 验证返回的语言代码
+            # Validate returned language code
             if result in self.supported_languages:
                 return result
             else:
-                print(f"⚠️  LLM返回了无效的语言代码: {result}，返回默认语言")
+                print(f"⚠️  LLM returned invalid language code: {result}, returning default language")
                 return "en"
                 
         except Exception as e:
-            print(f"⚠️  LLM检测失败: {e}，返回默认语言")
+            print(f"⚠️  LLM detection failed: {e}, returning default language")
             return "en"
     
     def detect_language(self, text: str) -> str:
         """
-        使用LLM检测文本语言
+        Detect text language using LLM.
         
         Args:
-            text: 需要检测的文本
+            text: Text to detect
             
         Returns:
-            ISO 639-1 语言代码
+            ISO 639-1 language code
         """
         return self.detect_language_llm(text)
     
     def process_jsonl(self, input_file: str, output_file: str, 
                      batch_size: int = 100) -> None:
         """
-        处理JSONL文件，添加语言标签
+        Process JSONL file, adding language labels.
         
         Args:
-            input_file: 输入文件路径
-            output_file: 输出文件路径
-            batch_size: 批处理大小
+            input_file: Input file path
+            output_file: Output file path
+            batch_size: Batch size
         """
-        print(f"🔄 开始处理语言分类...")
-        print(f"📄 输入文件: {input_file}")
-        print(f"📄 输出文件: {output_file}")
-        print(f"🧠 检测方法: LLM")
-        print(f"🌐 模型地址: {self.base_url}")
-        print(f"🤖 使用模型: {self.model}")
+print(f"🔄 Starting language classification...")
+print(f"📄 Input file: {input_file}")
+print(f"📄 Output file: {output_file}")
+print(f"🧠 Detection method: LLM")
+print(f"🌐 Model URL: {self.base_url}")
+print(f"🤖 Model: {self.model}")
         
-        # 统计信息
+        # Statistics
         total_count = 0
         processed_count = 0
         language_stats = Counter()
@@ -256,28 +256,28 @@ def process_jsonl(self, input_file: str, output_file: str,
                             processed_count += len(batch)
                             batch = []
                             
-                            print(f"  📊 已处理 {processed_count:,} 条数据...")
+    print(f"  📊 Processed {processed_count:,} records...")
                     
                     except json.JSONDecodeError as e:
-                        print(f"⚠️  第{line_num}行JSON解析错误: {e}")
+                        print(f"⚠️  JSON parse error at line {line_num}: {e}")
                         continue
                 
-                # 处理最后一批
+                # Process the last batch
                 if batch:
                     self._process_batch(batch, outfile, language_stats)
                     processed_count += len(batch)
         
         except FileNotFoundError:
-            print(f"❌ 文件未找到: {input_file}")
+            print(f"❌ File not found: {input_file}")
             return
         except Exception as e:
-            print(f"❌ 处理过程中出错: {e}")
+            print(f"❌ Error during processing: {e}")
             return
         
-        # 输出统计结果
-        print(f"\n✅ 处理完成!")
-        print(f"📊 总计处理: {processed_count:,} 条数据")
-        print(f"📊 语言分布:")
+        # Output statistics
+print(f"\n✅ Processing complete!")
+print(f"📊 Total processed: {processed_count:,} records")
+print(f"📊 Language distribution:")
         
         for lang_code, count in language_stats.most_common():
             lang_name = self.supported_languages.get(lang_code, "Unknown")
@@ -286,21 +286,21 @@ def process_jsonl(self, input_file: str, output_file: str,
 
     def analyze_language_statistics(self, input_file: str, output_csv: str = None) -> None:
         """
-        分析JSONL文件中的语言统计信息并保存到CSV文件
+        Analyze language statistics in a JSONL file and save to CSV.
         
         Args:
-            input_file: 输入JSONL文件路径
-            output_csv: 输出CSV文件路径，默认为输入文件名_language_stats.csv
+            input_file: Input JSONL file path
+            output_csv: Output CSV file path (default: input_filename_language_stats.csv)
         """
         if output_csv is None:
             input_path = Path(input_file)
             output_csv = str(input_path.parent / f"{input_path.stem}_language_stats.csv")
         
-        print(f"🔍 开始分析语言统计信息...")
-        print(f"📄 输入文件: {input_file}")
-        print(f"📄 输出CSV: {output_csv}")
+print(f"🔍 Starting language statistics analysis...")
+print(f"📄 Input file: {input_file}")
+print(f"📄 Output CSV: {output_csv}")
         
-        # 统计信息
+        # Statistics
         total_count = 0
         processed_count = 0
         language_stats = Counter()
@@ -315,7 +315,7 @@ def analyze_language_statistics(self, input_file: str, output_csv: str = None) -
                         data = json.loads(line)
                         total_count += 1
                         
-                        # 从meta字段中获取language信息
+                        # Get language information from meta field
                         language = None
                         if 'meta' in data and isinstance(data['meta'], dict):
                             language = data['meta'].get('language')
@@ -324,28 +324,28 @@ def analyze_language_statistics(self, input_file: str, output_csv: str = None) -
                             language_stats[language] += 1
                             processed_count += 1
                         else:
-                            # 如果没有language字段，记录为未知
+                            # If no language field, record as unknown
                             language_stats['unknown'] += 1
                             processed_count += 1
                             
                         if line_num % 1000 == 0:
-                            print(f"  📊 已分析 {line_num:,} 行数据...")
+                            print(f"  📊 Analyzed {line_num:,} lines...")
                     
                     except json.JSONDecodeError as e:
-                        print(f"⚠️  第{line_num}行JSON解析错误: {e}")
+                        print(f"⚠️  JSON parse error at line {line_num}: {e}")
                         continue
         
         except FileNotFoundError:
-            print(f"❌ 文件未找到: {input_file}")
+            print(f"❌ File not found: {input_file}")
             return
         except Exception as e:
-            print(f"❌ 分析过程中出错: {e}")
+            print(f"❌ Error during analysis: {e}")
             return
         
-        # 生成统计数据
+        # Generate statistics data
         stats_data = []
         for lang_code, count in language_stats.most_common():
-            lang_name = self.supported_languages.get(lang_code, "Unknown" if lang_code != 'unknown' else "未检测到语言")
+            lang_name = self.supported_languages.get(lang_code, "Unknown" if lang_code != 'unknown' else "No language detected")
             percentage = (count / processed_count) * 100 if processed_count > 0 else 0
             stats_data.append({
                 'language_code': lang_code,
@@ -354,7 +354,7 @@ def analyze_language_statistics(self, input_file: str, output_csv: str = None) -
                 'percentage': round(percentage, 2)
             })
         
-        # 保存到CSV文件
+        # Save to CSV file
         try:
             import csv
             with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
@@ -365,134 +365,134 @@ def analyze_language_statistics(self, input_file: str, output_csv: str = None) -
                 for row in stats_data:
                     writer.writerow(row)
             
-            print(f"\n✅ 语言统计分析完成!")
-            print(f"📊 总计分析: {processed_count:,} 条数据")
-            print(f"📊 语言分布:")
+print(f"\n✅ Language statistics analysis complete!")
+print(f"📊 Total analyzed: {processed_count:,} records")
+    print(f"📊 Language distribution:")
             
             for row in stats_data:
                 print(f"   {row['language_code']} ({row['language_name']}): {row['count']:,} ({row['percentage']:.1f}%)")
             
-            print(f"\n💾 统计结果已保存到: {output_csv}")
+print(f"\n💾 Statistics saved to: {output_csv}")
             
         except Exception as e:
-            print(f"❌ 保存CSV文件时出错: {e}")
+            print(f"❌ Error saving CSV file: {e}")
             return
     
     def _process_batch(self, batch: List[Dict], outfile, language_stats: Counter) -> None:
         """
-        处理一批数据
+        Process a batch of data.
         
         Args:
-            batch: 数据批次
-            outfile: 输出文件对象
-            language_stats: 语言统计计数器
+            batch: Data batch
+            outfile: Output file object
+            language_stats: Language statistics counter
         """
         for data in batch:
-            # 获取文本内容
+            # Get text content
             text = data.get('convert_main_content', '')
             
-            # 如果没有convert_main_content，尝试其他字段
+            # If no convert_main_content, try other fields
             if not text:
                 text = data.get('groundtruth_content', '')
             if not text:
                 text = data.get('content', '')
             
-            # 检测语言
+            # Detect language
             if text:
                 language = self.detect_language(text)
             else:
-                language = "en"  # 默认英语
+                language = "en"  # Default to English
             
-            # 更新数据
+            # Update data
             if 'meta' not in data:
                 data['meta'] = {}
             data['meta']['language'] = language
             
-            # 统计
+            # Statistics
             language_stats[language] += 1
             
-            # 写入输出文件
+            # Write to output file
             outfile.write(json.dumps(data, ensure_ascii=False) + '\n')
             
-            # 添加延迟避免速率限制
+            # Add delay to avoid rate limits
             time.sleep(0.1)
 
 
 def main():
-    """主函数"""
+    """Main function."""
     parser = argparse.ArgumentParser(
-        description="为JSONL数据集添加语言标签 (ISO 639-1 标准) 或统计语言分布",
+        description="Add language labels (ISO 639-1 standard) to JSONL dataset or analyze language distribution",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
-示例用法:
-  # 使用LLM进行语言检测
+Examples:
+  # Use LLM for language detection
   python scripts/language_classify.py data/input.jsonl --output data/output.jsonl --api-key YOUR_API_KEY
   
-  # 指定自定义模型地址和批处理大小
+  # Specify custom model URL and batch size
   python scripts/language_classify.py data/input.jsonl --output data/output.jsonl --api-key YOUR_API_KEY --base-url http://custom-url:8080/ --batch-size 50
   
-  # 使用默认地址和指定模型
+  # Use default URL with specified model
   python scripts/language_classify.py data/input.jsonl --output data/output.jsonl --api-key YOUR_API_KEY --model gpt-4
   
-  # 统计已有文件的语言分布
+  # Analyze language distribution of an existing file
   python scripts/language_classify.py data/WebMainBench_7887_language_output.jsonl --analyze-only --output-csv data/language_stats.csv
         """
     )
     
     parser.add_argument(
         "input_file",
-        help="输入JSONL文件路径"
+        help="Input JSONL file path"
     )
     
     parser.add_argument(
         "--output", "-o",
-        help="输出JSONL文件路径（仅在非分析模式下必需）"
+        help="Output JSONL file path (required only in non-analysis mode)"
     )
     
     parser.add_argument(
         "--analyze-only",
         action="store_true",
-        help="仅分析语言统计信息，不进行语言检测"
+        help="Only analyze language statistics, skip language detection"
     )
     
     parser.add_argument(
         "--output-csv",
-        help="语言统计结果CSV输出文件路径（仅在分析模式下使用）"
+        help="CSV output file path for language statistics (used only in analysis mode)"
     )
     
     parser.add_argument(
         "--api-key",
         default=os.getenv("OPENAI_API_KEY"),
-        help="OpenAI API密钥（仅在语言检测模式下需要）"
+        help="OpenAI API key (required only in language detection mode)"
     )
     
     parser.add_argument(
         "--model",
         default="gpt-5",
-        help="LLM模型名称 (默认: gpt-5)"
+        help="LLM model name (default: gpt-5)"
     )
     
     parser.add_argument(
         "--base-url",
         default="https://api.deepseek.com/v1/",
-        help="模型请求的基础URL地址"
+        help="Base URL for model requests"
     )
     
     parser.add_argument(
         "--batch-size",
         type=int,
         default=100,
-        help="批处理大小 (默认: 100)"
+        help="Batch size (default: 100)"
     )
     
     args = parser.parse_args()
     
-    # 验证参数
+    # Validate parameters
     if not Path(args.input_file).exists():
-        print(f"❌ 输入文件不存在: {args.input_file}")
+print(f"❌ Input file does not exist: {args.input_file}")
         sys.exit(1)
     
-    # 创建分类器
+    # Create classifier
     classifier = LanguageClassifier(
         api_key=args.api_key,
         model=args.model,
@@ -500,19 +500,19 @@ def main():
     )
     
     if args.analyze_only:
-        # 仅进行语言统计分析
+        # Only perform language statistics analysis
         classifier.analyze_language_statistics(
             input_file=args.input_file,
             output_csv=args.output_csv
         )
     else:
-        # 进行语言检测和分类
+        # Perform language detection and classification
         if not args.output:
-            print("❌ 在语言检测模式下，--output 参数是必需的")
+print("❌ In language detection mode, --output parameter is required")
             sys.exit(1)
         
         if not args.api_key:
-            print("❌ 在语言检测模式下，--api-key 参数是必需的")
+print("❌ In language detection mode, --api-key parameter is required")
             sys.exit(1)
         
         classifier.process_jsonl(
diff --git a/scripts/merge_jsonl.py b/scripts/merge_jsonl.py
index e09a03a..df71472 100644
--- a/scripts/merge_jsonl.py
+++ b/scripts/merge_jsonl.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-合并多个JSONL文件，基于track_id字段去重
+Merge multiple JSONL files and deduplicate based on the track_id field
 """
 import json
 import sys
@@ -9,13 +9,13 @@
 
 def load_jsonl_with_dedup(jsonl_files):
     """
-    加载多个JSONL文件并基于track_id去重
-    
+    Load multiple JSONL files and deduplicate based on track_id
+
     Args:
-        jsonl_files: JSONL文件路径列表
-        
+        jsonl_files: List of JSONL file paths
+
     Returns:
-        dict: {track_id: data} 的有序字典
+        dict: Ordered dictionary of {track_id: data}
     """
     merged_data = OrderedDict()
     total_loaded = 0
@@ -25,10 +25,10 @@ def load_jsonl_with_dedup(jsonl_files):
         file_path = Path(file_path)
         
         if not file_path.exists():
-            print(f"⚠️ 文件不存在，跳过: {file_path}")
+            print(f"⚠️ File does not exist, skipping: {file_path}")
             continue
             
-        print(f"📖 正在读取文件: {file_path.name}")
+        print(f"📖 Reading file: {file_path.name}")
         
         line_count = 0
         file_loaded = 0
@@ -43,88 +43,88 @@ def load_jsonl_with_dedup(jsonl_files):
                         
                     line_count += 1
                     
-                    # 每处理1000行显示进度
+                    # Show progress every 1000 lines processed
                     if line_count % 1000 == 0:
-                        print(f"  📊 已处理 {line_count} 行...")
+                        print(f"  📊 Processed {line_count} lines...")
                     
                     try:
                         data = json.loads(line)
                         track_id = data.get('track_id')
                         
                         if not track_id:
-                            print(f"  ⚠️ 第 {line_num} 行缺少track_id字段，跳过")
+                            print(f"  ⚠️ Line {line_num} is missing the track_id field, skipping")
                             continue
                         
                         if track_id in merged_data:
-                            # 发现重复的track_id
+                            # Duplicate track_id found
                             file_duplicates += 1
                             duplicates_found += 1
-                            print(f"  🔄 发现重复track_id: {track_id} (第 {line_num} 行)")
+                            print(f"  🔄 Duplicate track_id found: {track_id} (line {line_num})")
                         else:
-                            # 新的track_id，添加到合并数据中
+                            # New track_id, add to merged data
                             merged_data[track_id] = data
                             file_loaded += 1
                             total_loaded += 1
                             
                     except json.JSONDecodeError as e:
-                        print(f"  ❌ 第 {line_num} 行JSON解析错误: {e}")
+                        print(f"  ❌ Line {line_num} JSON parse error: {e}")
                         continue
                         
         except Exception as e:
-            print(f"❌ 读取文件 {file_path} 时出错: {e}")
+            print(f"❌ Error reading file {file_path}: {e}")
             continue
         
-        print(f"  ✅ 文件处理完成:")
-        print(f"    📄 总行数: {line_count}")
-        print(f"    ➕ 新增数据: {file_loaded}")
-        print(f"    🔄 重复数据: {file_duplicates}")
+        print(f"  ✅ File processing complete:")
+        print(f"    📄 Total lines: {line_count}")
+        print(f"    ➕ New entries: {file_loaded}")
+        print(f"    🔄 Duplicates: {file_duplicates}")
         print()
     
-    print(f"📊 合并统计:")
-    print(f"  📄 总处理数据: {total_loaded + duplicates_found}")
-    print(f"  ✅ 唯一数据: {len(merged_data)}")
-    print(f"  🔄 重复去除: {duplicates_found}")
+    print(f"📊 Merge statistics:")
+    print(f"  📄 Total entries processed: {total_loaded + duplicates_found}")
+    print(f"  ✅ Unique entries: {len(merged_data)}")
+    print(f"  🔄 Duplicates removed: {duplicates_found}")
     
     return merged_data
 
 def save_merged_data(merged_data, output_file):
     """
-    保存合并后的数据到JSONL文件
-    
+    Save merged data to a JSONL file
+
     Args:
-        merged_data: 合并后的数据字典
-        output_file: 输出文件路径
+        merged_data: Dictionary of merged data
+        output_file: Output file path
     """
     output_path = Path(output_file)
     
-    print(f"💾 正在保存合并结果到: {output_path}")
+    print(f"💾 Saving merged results to: {output_path}")
     
     try:
-        # 确保输出目录存在
+        # Ensure the output directory exists
         output_path.parent.mkdir(parents=True, exist_ok=True)
         
         with open(output_path, 'w', encoding='utf-8') as f:
             for track_id, data in merged_data.items():
                 f.write(json.dumps(data, ensure_ascii=False) + '\n')
         
-        print(f"✅ 成功保存 {len(merged_data)} 条数据到: {output_path}")
-        print(f"📁 文件大小: {output_path.stat().st_size / (1024*1024):.2f} MB")
+        print(f"✅ Successfully saved {len(merged_data)} entries to: {output_path}")
+        print(f"📁 File size: {output_path.stat().st_size / (1024*1024):.2f} MB")
         
     except Exception as e:
-        print(f"❌ 保存文件时出错: {e}")
+        print(f"❌ Error saving file: {e}")
 
 def main():
-    """主函数"""
-    # 默认输入文件
+    """Main function"""
+    # Default input files
     default_files = [
         "data/filtered_normal_data_1883.jsonl",
         "data/track_id_diff_result_56.jsonl"
     ]
     
-    # 检查命令行参数
+    # Check command-line arguments
     if len(sys.argv) > 2:
-        input_files = sys.argv[1:-1]  # 除了最后一个参数外都是输入文件
-        output_file = sys.argv[-1]    # 最后一个参数是输出文件
+        input_files = sys.argv[1:-1]  # All arguments except the last are input files
+        output_file = sys.argv[-1]    # The last argument is the output file
     elif len(sys.argv) == 2:
         input_files = default_files
         output_file = sys.argv[1]
@@ -133,70 +133,70 @@ def main():
         output_file = "data/merged_data.jsonl"
     
     print("=" * 80)
-    print("🔗 合并JSONL文件并基于track_id去重")
+    print("🔗 Merge JSONL files and deduplicate based on track_id")
     print("=" * 80)
-    print("📁 输入文件:")
+    print("📁 Input files:")
     for i, file in enumerate(input_files, 1):
         print(f"  {i}. {file}")
-    print(f"📄 输出文件: {output_file}")
+    print(f"📄 Output file: {output_file}")
     print()
     
-    # 检查输入文件是否存在
+    # Check if input files exist
     existing_files = []
     for file_path in input_files:
         if Path(file_path).exists():
             existing_files.append(file_path)
         else:
-            # 尝试一些常见的文件名变体
+            # Try some common filename variants
             base_name = Path(file_path).stem
             parent_dir = Path(file_path).parent
             
-            # 如果文件名包含数字，尝试不同的数字
+            # If the filename contains numbers, try different numbers
             if "track_id_diff_result" in base_name:
-                # 尝试寻找实际的差异结果文件
+                # Try to find the actual diff result file
                 potential_files = list(parent_dir.glob("track_id_diff_result*.jsonl"))
                 if potential_files:
                     actual_file = potential_files[0]
-                    print(f"🔍 找到相似文件: {actual_file}")
+                    print(f"🔍 Found similar file: {actual_file}")
                     existing_files.append(str(actual_file))
                     continue
             
-            print(f"⚠️ 文件不存在: {file_path}")
+            print(f"⚠️ File does not exist: {file_path}")
     
     if not existing_files:
-        print("❌ 没有找到有效的输入文件")
+        print("❌ No valid input files found")
         return
     
-    # 执行合并
-    print("🔸 步骤1: 加载和去重数据...")
+    # Perform merge
+    print("🔸 Step 1: Loading and deduplicating data...")
     merged_data = load_jsonl_with_dedup(existing_files)
     
     if not merged_data:
-        print("❌ 没有找到有效数据")
+        print("❌ No valid data found")
         return
     
     print()
-    print("🔸 步骤2: 保存合并结果...")
+    print("🔸 Step 2: Saving merged results...")
     save_merged_data(merged_data, output_file)
     
     print()
     print("=" * 80)
-    print("🎉 合并完成!")
+    print("🎉 Merge complete!")
     print("=" * 80)
     
-    # 显示一些统计信息
+    # Display some statistics
     if merged_data:
-        print("📋 合并结果统计:")
-        print(f"  🎯 唯一track_id数量: {len(merged_data):,}")
-        
-        # 显示前几个track_id作为示例
+        print("📋 Merge result statistics:")
+        print(f"  🎯 Unique track_id count: {len(merged_data):,}")
+
+        # Display first few track_ids as examples
         sample_track_ids = list(merged_data.keys())[:5]
-        print(f"  📝 示例track_id:")
+        print(f"  📝 Sample track_ids:")
         for i, track_id in enumerate(sample_track_ids, 1):
             print(f"    {i}. {track_id}")
         
         if len(merged_data) > 5:
-            print(f"    ... 还有 {len(merged_data) - 5:,} 个")
+            print(f"    ... and {len(merged_data) - 5:,} more")
 
 if __name__ == "__main__":
     main()
diff --git a/scripts/process_dataset.py b/scripts/process_dataset.py
index 9f845be..781d0cd 100644
--- a/scripts/process_dataset.py
+++ b/scripts/process_dataset.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 """
-为数据集添加 llm_webkit430_main_html 字段
-根据 llm_webkit_extractor.py 的逻辑，从现有字段构建 main_html
-使用方法:
+Add llm_webkit430_main_html field to the dataset
+Builds main_html from existing fields following llm_webkit_extractor.py logic
+Usage:
     python scripts/process_dataset.py data/WebMainBench_7887_within_formula_code.jsonl
 """
 
@@ -13,27 +13,27 @@
 
 def process_single_item(data: dict, verbose: bool = False) -> dict:
     """
-    为单个JSON对象添加 llm_webkit430_main_html 字段
+    Add llm_webkit430_main_html field to a single JSON object.
     
-    参考 llm_webkit_extractor.py:665-670 的逻辑
+    See llm_webkit_extractor.py:665-670 for the reference logic.
     """
     try:
-        # 导入必要的模块
+        # Import necessary modules
         from llm_web_kit.input.pre_data_json import PreDataJson, PreDataJsonKey
         from llm_web_kit.main_html_parser.parser.tag_mapping import MapItemToHtmlTagsParser
 
-        # 从数据中获取字段
-        typical_raw_tag_html = data.get('typical_raw_tag_html', '')  # 预处理HTML
-        llm_response = data.get('llm_response_html', '')  # LLM响应HTML
+        # Get fields from data
+        typical_raw_tag_html = data.get('typical_raw_tag_html', '')  # Preprocessed HTML
+        llm_response = data.get('llm_response_html', '')  # LLM response HTML
 
-        # 检查必要字段
+        # Check required fields
         if not typical_raw_tag_html:
             if verbose:
-                print("  ⚠️  llm_webkit_html 字段为空，跳过")
+print("  ⚠️  llm_webkit_html field is empty, skipping")
             data['llm_webkit430_main_html'] = ""
             return data
 
-        # 构建 pre_data（参考 llm_webkit_extractor.py:665）
+        # Build pre_data (see llm_webkit_extractor.py:665)
         pre_data = {
             'typical_raw_tag_html': typical_raw_tag_html,
             'typical_raw_html': typical_raw_tag_html,
@@ -41,96 +41,96 @@ def process_single_item(data: dict, verbose: bool = False) -> dict:
             'html_source': typical_raw_tag_html
         }
 
-        # 转换为 PreDataJson 对象
+        # Convert to PreDataJson object
         pre_data = PreDataJson(pre_data)
 
-        # 映射 - 使用 MapItemToHtmlTagsParser
+        # Mapping - use MapItemToHtmlTagsParser
         parser = MapItemToHtmlTagsParser({})
         pre_data = parser.parse(pre_data)
 
-        # 提取 main_html
+        # Extract main_html
         main_html = pre_data.get(PreDataJsonKey.TYPICAL_MAIN_HTML, "")
 
-        # 添加新字段
+        # Add new field
         data['llm_webkit430_main_html'] = main_html
 
         return data
 
     except ImportError as e:
         if verbose:
-            print(f"\n❌ 导入错误: {e}")
-            print("   请确保安装了 llm_web_kit: pip install llm-webkit")
+print(f"\n❌ Import error: {e}")
+print("   Please make sure llm_web_kit is installed: pip install llm-webkit")
         data['llm_webkit430_main_html'] = ""
         return data
     except Exception as e:
         if verbose:
             import traceback
-            print(f"\n⚠️  处理失败: {e}")
-            print(f"   错误详情: {traceback.format_exc()}")
-        # 失败时添加空字段
+print(f"\n⚠️  Processing failed: {e}")
+print(f"   Error details: {traceback.format_exc()}")
+        # Add empty field on failure
         data['llm_webkit430_main_html'] = ""
         return data
 
 
 def process_dataset(input_file: str, output_file: str = None, verbose: bool = False, test_first: int = None):
     """
-    处理整个数据集
+    Process the entire dataset.
     
     Args:
-        input_file: 输入JSONL文件路径
-        output_file: 输出JSONL文件路径（默认为输入文件名_with_main_html.jsonl）
-        verbose: 是否显示详细信息
-        test_first: 仅处理前N条数据（用于测试）
+        input_file: Input JSONL file path
+        output_file: Output JSONL file path (default: input_filename_with_main_html.jsonl)
+        verbose: Whether to show detailed information
+        test_first: Only process first N records (for testing)
     """
     input_path = Path(input_file)
 
     if not input_path.exists():
-        print(f"❌ 文件不存在: {input_file}")
+print(f"❌ File does not exist: {input_file}")
         return
 
-    # 确定输出文件名
+    # Determine output file name
     if output_file is None:
         output_file = str(input_path.parent / f"{input_path.stem}_with_main_html.jsonl")
 
-    print(f"📄 输入文件: {input_file}")
-    print(f"📄 输出文件: {output_file}")
+print(f"📄 Input file: {input_file}")
+print(f"📄 Output file: {output_file}")
     if test_first:
-        print(f"🧪 测试模式: 仅处理前 {test_first} 条数据")
+print(f"🧪 Test mode: processing only the first {test_first} records")
 
-    # 检查依赖
-    print("\n🔍 检查依赖...")
+    # Check dependencies
+print("\n🔍 Checking dependencies...")
     try:
         from llm_web_kit.input.pre_data_json import PreDataJson, PreDataJsonKey
         from llm_web_kit.main_html_parser.parser.tag_mapping import MapItemToHtmlTagsParser
-        print("✅ llm_web_kit 模块可用")
+print("✅ llm_web_kit module available")
     except ImportError as e:
-        print(f"❌ llm_web_kit 模块未安装: {e}")
-        print("   请运行: pip install llm-webkit")
+print(f"❌ llm_web_kit module not installed: {e}")
+        print("   Please run: pip install llm-webkit")
         return
 
-    # 统计信息
+    # Statistics
     total = 0
     success = 0
     failed = 0
 
-    # 先统计总行数（用于进度条）
-    print("\n📊 统计总行数...")
+    # Count total lines first (for progress bar)
+print("\n📊 Counting total lines...")
     with open(input_file, 'r', encoding='utf-8') as f:
         total_lines = sum(1 for _ in f)
 
     if test_first:
         total_lines = min(total_lines, test_first)
 
-    print(f"📦 总共 {total_lines:,} 条数据\n")
+print(f"📦 Total {total_lines:,} records\n")
 
-    # 处理数据
-    print("🔄 开始处理...\n")
+    # Process data
+print("🔄 Starting processing...\n")
     try:
         with open(input_file, 'r', encoding='utf-8') as fin, \
              open(output_file, 'w', encoding='utf-8') as fout:
 
             for idx, line in enumerate(fin, 1):
-                # 测试模式：只处理前N条
+                # Test mode: only process first N records
                 if test_first and idx > test_first:
                     break
 
@@ -138,41 +138,41 @@ def process_dataset(input_file: str, output_file: str = None, verbose: bool = Fa
                     continue
 
                 try:
-                    # 解析JSON
+                    # Parse JSON
                     data = json.loads(line)
                     total += 1
 
-                    # 显示进度（每100条显示一次）
+                    # Show progress every 100 records
                     if total % 100 == 0:
-                        print(f"  处理进度: {total}/{total_lines} ({total/total_lines*100:.1f}%)")
+print(f"  Progress: {total}/{total_lines} ({total/total_lines*100:.1f}%)")
 
-                    # 处理单条数据
+                    # Process single record
                     if verbose and idx <= 3:
-                        print(f"\n处理第 {idx} 条数据...")
+print(f"\nProcessing record {idx}...")
 
                     processed_data = process_single_item(data, verbose=(verbose and idx <= 3))
 
-                    # 检查是否成功添加字段
+                    # Check if field was successfully added
                     if processed_data.get('llm_webkit430_main_html'):
                         success += 1
                     else:
                         failed += 1
 
-                    # 写入输出文件
+                    # Write to output file
                     fout.write(json.dumps(processed_data, ensure_ascii=False) + '\n')
 
                 except json.JSONDecodeError as e:
-                    print(f"\n⚠️  行 {idx} JSON解析错误: {e}")
+print(f"\n⚠️  JSON parse error at line {idx}: {e}")
                     failed += 1
-                    # 写入原始行
+                    # Write original line
                     fout.write(line)
                 except Exception as e:
-                    print(f"\n❌ 行 {idx} 处理错误: {e}")
+print(f"\n❌ Processing error at line {idx}: {e}")
                     if verbose:
                         import traceback
                         print(traceback.format_exc())
                     failed += 1
-                    # 写入原始数据
+                    # Write original data
                     try:
                         data['llm_webkit430_main_html'] = ""
                         fout.write(json.dumps(data, ensure_ascii=False) + '\n')
@@ -180,40 +180,40 @@ def process_dataset(input_file: str, output_file: str = None, verbose: bool = Fa
                         fout.write(line)
 
     except Exception as e:
-        print(f"\n❌ 处理过程中发生严重错误: {e}")
+print(f"\n❌ Critical error during processing: {e}")
         import traceback
         print(traceback.format_exc())
         return
 
-    # 输出统计信息
+    # Output statistics
     print("\n" + "="*60)
-    print("✅ 处理完成！")
+print("✅ Processing complete!")
     print("="*60)
-    print(f"总处理数: {total:,}")
-    print(f"成功: {success:,} ({success/total*100:.1f}%)" if total > 0 else "成功: 0")
-    print(f"失败: {failed:,} ({failed/total*100:.1f}%)" if total > 0 else "失败: 0")
-    print(f"\n输出文件: {output_file}")
+print(f"Total processed: {total:,}")
+print(f"Success: {success:,} ({success/total*100:.1f}%)" if total > 0 else "Success: 0")
+print(f"Failed: {failed:,} ({failed/total*100:.1f}%)" if total > 0 else "Failed: 0")
+print(f"\nOutput file: {output_file}")
     print("="*60)
 
 
 def main():
     parser = argparse.ArgumentParser(
-        description='为数据集添加 llm_webkit430_main_html 字段',
+        description='Add llm_webkit430_main_html field to the dataset',
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog='''
-示例:
-  # 基本使用
+Examples:
+  # Basic usage
   python scripts/process_dataset.py data/WebMainBench_7887_within_formula_code.jsonl
   
-  # 指定输出文件
+  # Specify output file
   python scripts/process_dataset.py data/WebMainBench_7887_within_formula_code.jsonl \\
     --output data/WebMainBench_7887_with_main_html.jsonl
   
-  # 测试前10条数据
+  # Test on first 10 records
   python scripts/process_dataset.py data/WebMainBench_7887_within_formula_code.jsonl \\
     --test-first 10 --verbose
   
-  # 详细模式（显示前3条的处理细节）
+  # Verbose mode (show details for first 3 records)
   python scripts/process_dataset.py data/WebMainBench_7887_within_formula_code.jsonl \\
     --verbose
         '''
@@ -221,32 +221,32 @@ def main():
 
     parser.add_argument(
         'input_file',
-        help='输入JSONL文件路径'
+        help='Input JSONL file path'
     )
 
     parser.add_argument(
         '--output',
         '-o',
-        help='输出JSONL文件路径（默认：输入文件名_with_main_html.jsonl）'
+        help='Output JSONL file path (default: input_filename_with_main_html.jsonl)'
     )
 
     parser.add_argument(
         '--verbose',
         '-v',
         action='store_true',
-        help='显示详细处理信息（仅显示前3条）'
+        help='Show detailed processing information (first 3 records only)'
     )
 
     parser.add_argument(
         '--test-first',
         '-t',
         type=int,
-        help='仅处理前N条数据（用于测试）'
+        help='Process only the first N records (for testing)'
     )
 
     args = parser.parse_args()
 
-    # 处理数据集
+    # Process dataset
     process_dataset(args.input_file, args.output, args.verbose, args.test_first)
 
 
diff --git a/scripts/simplify_meta.py b/scripts/simplify_meta.py
index ea0d179..bf15e2b 100644
--- a/scripts/simplify_meta.py
+++ b/scripts/simplify_meta.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-简化meta字段工具
-只保留指定的meta字段，移除其他复杂的统计信息
+Meta field simplification tool
+Keeps only specified meta fields, removing other complex statistics
 """
 
 import json
@@ -11,32 +11,32 @@
 from typing import Dict, Any, List
 
 class MetaSimplifier:
-    """Meta字段简化器"""
+    """Meta field simplifier."""
     
     def __init__(self):
-        """初始化简化器"""
+        """Initialize simplifier."""
         self.processed_count = 0
         self.error_count = 0
         
     def simplify_meta(self, data: Dict[str, Any]) -> Dict[str, Any]:
         """
-        简化meta字段
+        Simplify meta fields.
         
         Args:
-            data: 原始数据记录
+            data: Original data record
             
         Returns:
-            简化后的数据记录
+            Simplified data record
         """
         if 'meta' not in data:
             return data
             
         original_meta = data['meta']
         
-        # 构建简化的meta字段
+        # Build simplified meta fields
         simplified_meta = {}
         
-        # 保留指定字段
+        # Keep specified fields
         if 'language' in original_meta:
             simplified_meta['language'] = original_meta['language']
             
@@ -58,30 +58,30 @@ def simplify_meta(self, data: Dict[str, Any]) -> Dict[str, Any]:
         if 'level' in original_meta:
             simplified_meta['level'] = original_meta['level']
             
-        # 处理style字段 - 只保留category值作为style
+        # Process style field - keep only category value as style
         if 'style' in original_meta and isinstance(original_meta['style'], dict):
             style_category = original_meta['style'].get('category', 'Other')
             simplified_meta['style'] = style_category
         elif 'style' in original_meta and isinstance(original_meta['style'], str):
-            # 如果style已经是字符串，直接使用
+            # If style is already a string, use it directly
             simplified_meta['style'] = original_meta['style']
         else:
             simplified_meta['style'] = 'Other'
         
-        # 更新数据记录
+        # Update data record
         data['meta'] = simplified_meta
         return data
     
     def process_file(self, input_file: str, output_file: str) -> None:
         """
-        处理文件，简化meta字段
+        Process file, simplifying meta fields.
         
         Args:
-            input_file: 输入文件路径
-            output_file: 输出文件路径
+            input_file: Input file path
+            output_file: Output file path
         """
-        print(f"📄 正在处理文件: {input_file}")
-        print(f"📄 输出文件: {output_file}")
+        print(f"📄 Processing file: {input_file}")
+        print(f"📄 Output file: {output_file}")
         
         try:
             with open(input_file, 'r', encoding='utf-8') as infile, \
@@ -94,98 +94,98 @@ def process_file(self, input_file: str, output_file: str) -> None:
                     try:
                         data = json.loads(line)
                         
-                        # 简化meta字段
+                        # Simplify meta field
                         simplified_data = self.simplify_meta(data)
                         
-                        # 写入输出文件
+                        # Write to output file
                         outfile.write(json.dumps(simplified_data, ensure_ascii=False) + '\n')
                         
                         self.processed_count += 1
                         
                         if line_num % 1000 == 0:
-                            print(f"  已处理 {line_num:,} 条数据...")
+                            print(f"  Processed {line_num:,} records...")
                             
                     except json.JSONDecodeError as e:
-                        print(f"⚠️  第{line_num}行JSON解析错误: {e}")
+                        print(f"⚠️  JSON parse error at line {line_num}: {e}")
                         self.error_count += 1
                         continue
                     except Exception as e:
-                        print(f"⚠️  第{line_num}行处理错误: {e}")
+                        print(f"⚠️  Processing error at line {line_num}: {e}")
                         self.error_count += 1
                         continue
                         
         except FileNotFoundError:
-            print(f"❌ 文件未找到: {input_file}")
+            print(f"❌ File not found: {input_file}")
             sys.exit(1)
         except Exception as e:
-            print(f"❌ 处理文件时出错: {e}")
+            print(f"❌ Error processing file: {e}")
             sys.exit(1)
     
     def print_summary(self) -> None:
-        """打印处理统计摘要"""
-        print(f"\n📊 处理统计摘要:")
-        print(f"   成功处理记录数: {self.processed_count:,}")
-        print(f"   错误记录数: {self.error_count:,}")
+        """Print processing statistics summary."""
+        print(f"\n📊 Processing Statistics Summary:")
+        print(f"   Successfully processed records: {self.processed_count:,}")
+        print(f"   Error records: {self.error_count:,}")
         
         if self.processed_count > 0:
             success_rate = (self.processed_count / (self.processed_count + self.error_count)) * 100
-            print(f"   成功率: {success_rate:.2f}%")
+            print(f"   Success rate: {success_rate:.2f}%")
 
 
 def show_before_after_example(input_file: str) -> None:
     """
-    显示简化前后的示例对比
+    Show before/after simplification example.
     
     Args:
-        input_file: 输入文件路径
+        input_file: Input file path
     """
-    print("\n📋 简化前后对比示例:")
+    print("\n📋 Before/After Simplification Example:")
     
     try:
         with open(input_file, 'r', encoding='utf-8') as f:
-            # 读取第一条记录
+            # Read the first record
             for line in f:
                 if line.strip():
                     data = json.loads(line)
                     
                     if 'meta' in data:
-                        print("\n🔍 简化前的meta字段:")
+                        print("\n🔍 meta field before simplification:")
                         print(json.dumps(data['meta'], ensure_ascii=False, indent=2)[:500] + "...")
                         
-                        # 简化
+                        # Simplify
                         simplifier = MetaSimplifier()
                         simplified_data = simplifier.simplify_meta(data.copy())
                         
-                        print("\n✨ 简化后的meta字段:")
+                        print("\n✨ meta field after simplification:")
                         print(json.dumps(simplified_data['meta'], ensure_ascii=False, indent=2))
                         
                     break
                     
     except Exception as e:
-        print(f"⚠️  无法显示示例: {e}")
+        print(f"⚠️  Cannot show example: {e}")
 
 
 def main():
-    """主函数"""
+    """Main function."""
     parser = argparse.ArgumentParser(
-        description="简化meta字段，只保留指定的字段",
+        description="Simplify meta fields, keeping only specified fields",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
-简化后的meta字段包含:
-  - language: str - 语言标识
-  - table: list[str] - 表格相关信息
-  - code: list[str] - 代码相关信息  
-  - equation: list[str] - 公式相关信息
-  - level: str - 复杂度级别
-  - style: str - 网页类型(原meta.style.category)
+Simplified meta fields include:
+  - language: str - Language identifier
+  - table: list[str] - Table-related information
+  - code: list[str] - Code-related information
+  - equation: list[str] - Formula-related information
+  - level: str - Complexity level
+  - style: str - Web page type (from original meta.style.category)
 
-示例用法:
-  # 基本简化
+Examples:
+  # Basic simplification
   python scripts/simplify_meta.py \\
     data/WebMainBench_7887_with_meta.jsonl \\
     --output data/WebMainBench_7887_simplified.jsonl
     
-  # 显示简化示例
+  # Show simplification example
   python scripts/simplify_meta.py \\
     data/WebMainBench_7887_with_meta.jsonl \\
     --output data/WebMainBench_7887_simplified.jsonl \\
@@ -195,43 +195,43 @@ def main():
     
     parser.add_argument(
         "input_file",
-        help="输入JSONL文件路径"
+        help="Input JSONL file path"
     )
     
     parser.add_argument(
         "--output", "-o",
         required=True,
-        help="输出简化后的JSONL文件路径"
+        help="Output simplified JSONL file path"
     )
     
     parser.add_argument(
         "--show-example",
         action="store_true",
-        help="显示简化前后的示例对比"
+        help="Show before/after simplification example"
     )
     
     args = parser.parse_args()
     
-    # 验证输入文件
+    # Validate input file
     if not Path(args.input_file).exists():
-        print(f"❌ 输入文件不存在: {args.input_file}")
+        print(f"❌ Input file does not exist: {args.input_file}")
         sys.exit(1)
     
-    # 显示示例
+    # Show example
     if args.show_example:
         show_before_after_example(args.input_file)
         print("\n" + "="*60)
     
-    # 创建简化器
+    # Create simplifier
     simplifier = MetaSimplifier()
     
-    # 处理文件
+    # Process file
     simplifier.process_file(args.input_file, args.output)
     
-    # 打印统计摘要
+    # Print statistics summary
     simplifier.print_summary()
     
-    print(f"\n✅ 简化完成! 输出文件: {args.output}")
+    print(f"\n✅ Simplification complete! Output file: {args.output}")
 
 
 if __name__ == "__main__":
diff --git a/scripts/statics.py b/scripts/statics.py
index f1d3a8c..95a617e 100644
--- a/scripts/statics.py
+++ b/scripts/statics.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-数据集统计和筛选工具
-统计WebMainBench数据集中的数据分布
+Dataset statistics and filtering tool
+Analyzes data distribution in the WebMainBench dataset
 """
 import json
 import sys
@@ -18,7 +18,7 @@
 
 
 class DatasetStatistics:
-    """数据集统计分析类"""
+    """Dataset statistics and analysis class"""
     
     def __init__(self, jsonl_file: str, output_file: str):
         self.jsonl_file = Path(jsonl_file)
@@ -27,11 +27,11 @@ def __init__(self, jsonl_file: str, output_file: str):
         self.stats = {}
         
     def load_data(self):
-        """加载JSONL数据"""
-        print(f"📖 正在加载数据文件: {self.jsonl_file}")
+        """Load JSONL data"""
+        print(f"📖 Loading data file: {self.jsonl_file}")
         
         if not self.jsonl_file.exists():
-            raise FileNotFoundError(f"文件不存在: {self.jsonl_file}")
+            raise FileNotFoundError(f"File does not exist: {self.jsonl_file}")
         
         line_count = 0
         try:
@@ -46,35 +46,35 @@ def load_data(self):
                         self.data.append(data)
                         line_count += 1
                         
-                        # 每10000行显示进度
+                        # Show progress every 10000 lines
                         if line_count % 10000 == 0:
-                            print(f"  📊 已加载 {line_count:,} 行...")
+                            print(f"  📊 Loaded {line_count:,} lines...")
                             
                     except json.JSONDecodeError as e:
-                        print(f"  ⚠️ 第 {line_num} 行JSON解析错误: {e}")
+                        print(f"  ⚠️ Line {line_num} JSON parse error: {e}")
                         continue
                         
         except Exception as e:
-            raise RuntimeError(f"读取文件时出错: {e}")
+            raise RuntimeError(f"Error reading file: {e}")
         
-        print(f"✅ 成功加载 {len(self.data):,} 条数据")
+        print(f"✅ Successfully loaded {len(self.data):,} records")
         return self
     
     def generate_content_list(self, data):
-        """生成content_list字段，使用llm_web_kit提取内容"""
+        """Generate content_list field using llm_web_kit to extract content"""
         try:                   
             html_content = data.get("html", "")
             url = data.get("url", "")
             
-            # 验证必要字段
+            # Validate required fields
             if not html_content:
                 data["content_list"] = []
                 return data
             
-            # 提取内容
+            # Extract content
             result = extract_content_from_html_with_magic_html(url, html_content, 'json')
             
-            # 解析JSON
+            # Parse JSON
             try:
                 result_json = json.loads(result)
                 data["content_list"] = result_json.get("content_list", [])
@@ -85,15 +85,15 @@ def generate_content_list(self, data):
             return data
             
         except Exception as e:
-            # 确保即使出错也有content_list字段
+            # Ensure content_list field exists even on error
             data["content_list"] = []
             return data
 
-    # 计算HTML的DOM_WIDTH和DOM_DEPTH
+    # Calculate DOM_WIDTH and DOM_DEPTH of HTML
     def calculate_DOM_WIDTH_DEPTH(self, data):
-        """计算DOM树的深度和宽度"""
+        """Calculate DOM tree depth and width."""
         try:
-            # 确保meta字段存在
+            # Ensure meta field exists
             if "meta" not in data:
                 data["meta"] = {}
                 
@@ -103,10 +103,10 @@ def calculate_DOM_WIDTH_DEPTH(self, data):
                 data["meta"]["DOM_DEPTH"] = 0
                 return data
             
-            # 使用BeautifulSoup解析HTML
+            # Parse HTML with BeautifulSoup
             soup = BeautifulSoup(html_content, 'html.parser')
             
-            # 找到实际的根元素（通常是html标签）
+            # Find actual root elements (usually the html tag)
             root_elements = [child for child in soup.children if child.name]
             
             if not root_elements:
@@ -114,7 +114,7 @@ def calculate_DOM_WIDTH_DEPTH(self, data):
                 data["meta"]["DOM_DEPTH"] = 0
                 return data
             
-            # 计算所有根元素的最大深度和宽度
+            # Calculate max depth and width across all root elements
             max_depth = 0
             max_width = 0
             
@@ -130,7 +130,7 @@ def calculate_DOM_WIDTH_DEPTH(self, data):
             return data
             
         except Exception as e:
-            # 如果解析失败，设置默认值
+            # If parsing fails, set default values
             if "meta" not in data:
                 data["meta"] = {}
             data["meta"]["DOM_WIDTH"] = 0
@@ -138,56 +138,56 @@ def calculate_DOM_WIDTH_DEPTH(self, data):
             return data
     
     def _calculate_dom_depth(self, element):
-        """递归计算DOM树的最大深度"""
+        """Recursively calculate maximum DOM tree depth."""
         if not element or not hasattr(element, 'name') or not element.name:
             return 0
         
-        # 获取所有直接子元素（忽略文本节点）
+        # Get all direct child elements (ignore text nodes)
         children = [child for child in element.children if hasattr(child, 'name') and child.name]
         
         if not children:
             return 1
         
-        # 递归计算所有子元素的深度，取最大值
+        # Recursively calculate depth of all child elements, take max
         max_child_depth = max(self._calculate_dom_depth(child) for child in children)
         return 1 + max_child_depth
     
     def _calculate_dom_width(self, element):
-        """递归计算DOM树的最大宽度（同一层级的最大子节点数）"""
+        """Recursively calculate maximum DOM tree width (max child count at any level)."""
         if not element or not hasattr(element, 'name') or not element.name:
             return 0
         
-        # 获取所有直接子元素（忽略文本节点）
+        # Get all direct child elements (ignore text nodes)
         children = [child for child in element.children if hasattr(child, 'name') and child.name]
         current_width = len(children)
         
-        # 递归计算所有子元素的最大宽度
+        # Recursively calculate max width of all child elements
         max_child_width = 0
         for child in children:
             child_width = self._calculate_dom_width(child)
             max_child_width = max(max_child_width, child_width)
         
-        # 返回当前层级宽度和子层级最大宽度中的较大者
+        # Return the larger of current-level width and max child-level width
         return max(current_width, max_child_width)
 
-    # 计算HTML中的链接文本与总文本比例并更新数据
+    # Calculate link text to total text ratio in HTML and update data
     def calculate_and_update_text_linktext_ratio(self, data):
         try:
             html_content = data.get("html", "")
             
-            # 确保meta字段存在
+            # Ensure meta field exists
             if "meta" not in data:
                 data["meta"] = {}
             
             if not html_content:
-                # 将比例添加到meta字段中
+                # Add ratio to meta field
                 data["meta"]["text_linktext_ratio"] = 0.0
                 return data
                 
-            # 使用html.parser解析器
+            # Use html.parser
             soup = BeautifulSoup(html_content, 'html.parser')
             
-            # 提取所有文本
+            # Extract all text
             all_text = soup.get_text()
             all_text_len = len(all_text)
             
@@ -195,47 +195,47 @@ def calculate_and_update_text_linktext_ratio(self, data):
                 data["meta"]["text_linktext_ratio"] = 0.0
                 return data
             
-            # 提取所有链接文本
+            # Extract all link text
             link_text_len = sum(len(link.get_text()) for link in soup.find_all('a'))
             
-            # 计算比例 (非链接文本 / 链接文本)
+            # Calculate ratio (non-link text / link text)
             if link_text_len <= 0:
-                ratio = 1.0  # 使用一个较大的值来表示无限大
+                ratio = 1.0  # Use a large value to represent infinity
             else:
                 ratio = link_text_len / all_text_len
-                ratio = min(float(ratio), 1.0)  # 限制最大值
+                ratio = min(float(ratio), 1.0)  # Cap maximum value
             
-            # 将比例添加到meta字段中
+            # Add ratio to meta field
             data["meta"]["text_linktext_ratio"] = ratio
             
-            # 返回更新后的数据
+            # Return updated data
             return data
             
         except Exception as e:
-            # 如果解析失败，至少尝试添加默认值
+            # If parsing fails, at least add default values
             if "meta" not in data:
                 data["meta"] = {}
             data["meta"]["text_linktext_ratio"] = 0.0
             return data
 
-    # 计算HTML中的非链接文本与链接数量比例并更新数据
+    # Calculate non-link text to link count ratio in HTML and update data
     def calculate_and_update_text_linknum_ratio(self, data):
         try:
             html_content = data.get("html", "")
             
-            # 确保meta字段存在
+            # Ensure meta field exists
             if "meta" not in data:
                 data["meta"] = {}
             
             if not html_content:
-                # 将比例添加到meta字段中
+                # Add ratio to meta field
                 data["meta"]["text_linknum_ratio"] = 0.0
                 return data
                 
-            # 使用html.parser解析器
+            # Use html.parser
             soup = BeautifulSoup(html_content, 'html.parser')
             
-            # 提取所有文本
+            # Extract all text
             all_text = soup.get_text()
             all_text_len = len(all_text)
             
@@ -243,103 +243,103 @@ def calculate_and_update_text_linknum_ratio(self, data):
                 data["meta"]["text_linknum_ratio"] = 0.0
                 return data
             
-            # 计算链接数量
+            # Count links
             links = soup.find_all('a')
             link_count = len(links)
             
-            # 提取链接文本总长度
+            # Calculate total link text length
             link_text_len = sum(len(link.get_text()) for link in links)
             
-            # 计算非链接文本长度
+            # Calculate non-link text length
             non_link_text_len = all_text_len - link_text_len
             
-            # 计算比例 (非链接文本长度 / 链接数量)
+            # Calculate ratio (non-link text length / link count)
             if link_count <= 0:
-                ratio = 10000.0  # 使用一个较大的值来表示无限大，但不要太极端
+                ratio = 10000.0  # Use a large value to represent infinity (not too extreme)
             else:
                 ratio = non_link_text_len / link_count
-                ratio = min(float(ratio), 10000.0)  # 限制最大值
+                ratio = min(float(ratio), 10000.0)  # Cap maximum value
             
-            # 将比例添加到meta字段中
+            # Add ratio to meta field
             data["meta"]["text_linknum_ratio"] = ratio
             
-            # 返回更新后的数据
+            # Return updated data
             return data
             
         except Exception as e:
-            # 如果解析失败，至少尝试添加默认值
+            # If parsing fails, at least add default values
             if "meta" not in data:
                 data["meta"] = {}
             data["meta"]["text_linknum_ratio"] = 0.0
             return data
 
-    # 计算HTML中的表格内容与总内容比例，并更新数据
+    # Calculate table content to total content ratio in HTML and update data
     def calculate_and_update_table_html_ratio(self, data):
         try:
             html_content = data.get("html", "")
             
-            # 确保meta字段存在
+            # Ensure meta field exists
             if "meta" not in data:
                 data["meta"] = {}
             
             if not html_content:
-                # 将比例添加到meta字段中
+                # Add ratio to meta field
                 data["meta"]["table_html_source_ratio"] = 0.0
                 return data
                 
-            # 使用html.parser解析器
+            # Use html.parser
             soup = BeautifulSoup(html_content, 'html.parser')
             
-            # 获取整个HTML内容的长度
+            # Get total HTML content length
             total_html_length = len(html_content)
             
             if total_html_length == 0:
                 data["meta"]["table_html_source_ratio"] = 0.0
                 return data
             
-            # 提取所有表格元素
+            # Extract all table elements
             tables = soup.find_all(['table'])
             
-            # 计算所有表格元素的HTML长度
+            # Calculate total HTML length of all table elements
             table_html_length = sum(len(str(table)) for table in tables)
             
-            # 计算表格内容占总HTML内容的比例
+            # Calculate table content ratio of total HTML content
             ratio = table_html_length / total_html_length
             
-            # 确保比例在0到1之间
+            # Ensure ratio is between 0 and 1
             ratio = max(0.0, min(1.0, ratio))
             
-            # 将比例添加到meta字段中
+            # Add ratio to meta field
             data["meta"]["table_html_source_ratio"] = ratio
             
-            # 返回更新后的数据
+            # Return updated data
             return data
             
         except Exception as e:
-            # 如果解析失败，至少尝试添加默认值
+            # If parsing fails, at least add default values
             if "meta" not in data:
                 data["meta"] = {}
             data["meta"]["table_html_source_ratio"] = 0.0
             return data
 
-    # 计算HTML中的表格内文字与总文字比例，并更新数据
+    # Calculate table text to total text ratio in HTML and update data
     def calculate_and_update_table_text_ratio(self, data):
         try:
             html_content = data.get("html", "")
             
-            # 确保meta字段存在
+            # Ensure meta field exists
             if "meta" not in data:
                 data["meta"] = {}
             
             if not html_content:
-                # 将比例添加到meta字段中
+                # Add ratio to meta field
                 data["meta"]["table_text_ratio"] = 0.0
                 return data
                 
-            # 使用html.parser解析器
+            # Use html.parser
             soup = BeautifulSoup(html_content, 'html.parser')
             
-            # 获取整个文档的所有文本
+            # Get all text from the entire document
             all_text = soup.get_text()
             total_text_length = len(all_text)
             
@@ -347,219 +347,219 @@ def calculate_and_update_table_text_ratio(self, data):
                 data["meta"]["table_text_ratio"] = 0.0
                 return data
             
-            # 提取所有表格元素
+            # Extract all table elements
             tables = soup.find_all(['table'])
             
-            # 计算所有表格内的文本长度
+            # Calculate total text length within all tables
             table_text_length = sum(len(table.get_text()) for table in tables)
             
-            # 计算表格内文字占总文字内容的比例
+            # Calculate table text ratio of total text content
             ratio = table_text_length / total_text_length
             
-            # 确保比例在0到1之间
+            # Ensure ratio is between 0 and 1
             ratio = max(0.0, min(1.0, ratio))
             
-            # 将比例添加到meta字段中
+            # Add ratio to meta field
             data["meta"]["table_text_ratio"] = ratio
             
-            # 返回更新后的数据
+            # Return updated data
             return data
             
         except Exception as e:
-            # 如果解析失败，至少尝试添加默认值
+            # If parsing fails, at least add default values
             if "meta" not in data:
                 data["meta"] = {}
             data["meta"]["table_text_ratio"] = 0.0
             return data
 
-    # 计算HTML中第一个表格的DOM深度，并更新数据
+    # Calculate DOM depth of the first table in HTML and update data
     def calculate_and_update_table_depth(self, data):
         try:
             html_content = data.get("html", "")
             
-            # 确保meta字段存在
+            # Ensure meta field exists
             if "meta" not in data:
                 data["meta"] = {}
             
             if not html_content:
-                # 将深度添加到meta字段中，-1表示没有表格
+                # Add depth to meta field, -1 means no table
                 data["meta"]["table_dom_depth"] = -1
                 return data
                 
-            # 使用html.parser解析器
+            # Use html.parser
             soup = BeautifulSoup(html_content, 'html.parser')
             
-            # 查找第一个表格元素
+            # Find the first table element
             first_table = soup.find('table')
             
-            # 如果没有找到表格
+            # If no table found
             if not first_table:
                 data["meta"]["table_dom_depth"] = -1
                 return data
             
-            # 计算表格的DOM深度
+            # Calculate DOM depth of the table
             depth = 0
             parent = first_table.parent
             while parent and parent.name != '[document]':
                 depth += 1
                 parent = parent.parent
             
-            # 将深度添加到meta字段中
+            # Add depth to meta field
             data["meta"]["table_dom_depth"] = depth
             
-            # 返回更新后的数据
+            # Return updated data
             return data
             
         except Exception as e:
-            # 如果解析失败，至少尝试添加默认值
+            # If parsing fails, at least add default values
             if "meta" not in data:
                 data["meta"] = {}
             data["meta"]["table_dom_depth"] = -1
             return data
     
-    # 检测HTML中包含的公式类型
+    # Detect equation types in HTML
     def detect_equations(self, data: dict) -> dict:
-        """检测并标记页面中的公式类型，返回meta.equation字段作为字符串列表"""
+        """Detect and mark equation types in the page; returns meta.equation field as a string list."""
         try:
-            # 确保meta字段存在
+            # Ensure meta field exists
             if "meta" not in data:
                 data["meta"] = {}
             
-            # 初始化equation列表
+            # Initialize code list
             equation_types = []
             
-            # 检查是否有content_list字段
+            # Check if content_list field exists
             if 'content_list' not in data:
                 data["meta"]["equation"] = equation_types
                 return data
             
-            # 获取content_list的JSON文本
+            # Get JSON text of content_list
             text = ContentList(data['content_list']).to_json()
             
-            # 检测行内公式
+            # Detect inline equations
             inline_matches = re.findall("equation-inline", text)
             if len(inline_matches) > 0:
                 equation_types.append("inline")
             
-            # 检测行间公式
+            # Detect interline equations
             interline_matches = re.findall("equation-interline", text)
             if len(interline_matches) > 0:
                 equation_types.append("interline")
             
-            # 将结果存储到meta.equation字段
+            # Store result in meta.equation field
             data["meta"]["equation"] = equation_types
             
             return data
             
         except Exception as e:
-            # 如果解析失败，返回空列表
+            # If parsing fails, return empty list
             if "meta" not in data:
                 data["meta"] = {}
             data["meta"]["equation"] = []
             return data
 
-    # 检测HTML中包含的代码类型
+    # Detect code types in HTML
     def detect_code(self, data: dict) -> dict:
-        """检测并标记页面中的代码类型，返回meta.code字段作为字符串列表"""
+        """Detect and mark code types in the page; returns meta.code field as a string list."""
         try:
-            # 确保meta字段存在
+            # Ensure meta field exists
             if "meta" not in data:
                 data["meta"] = {}
             
-            # 初始化equation列表
+            # Initialize code list
             code_types = []
             
-            # 检查是否有content_list字段
+            # Check if content_list field exists
             if 'content_list' not in data:
                 data["meta"]["code"] = code_types
                 return data
             
-            # 获取content_list的JSON文本
+            # Get JSON text of content_list
             text = ContentList(data['content_list']).to_json()
             
-            # 检测行内公式
+            # Detect inline equations
             inline_matches = re.findall("code-inline", text)
             if len(inline_matches) > 0:
                 code_types.append("inline")
             
-            # 检测行间公式
+            # Detect interline equations
             target_str = r'"type": "code"'
             interline_matches = re.findall(target_str, text)
             if len(interline_matches) > 0:
                 code_types.append("interline")
             
-            # 将结果存储到meta.code字段
+            # Store result in meta.code field
             data["meta"]["code"] = code_types
             
             return data
             
         except Exception as e:
-            # 如果解析失败，返回空列表
+            # If parsing fails, return empty list
             if "meta" not in data:
                 data["meta"] = {}
             data["meta"]["code"] = []
             return data
 
     def classify_tables(self, data):
-        """根据DOM深度和其他特征分类表格类型"""
+        """Classify table types based on DOM depth and other features."""
         try:
-            # 确保meta字段存在
+            # Ensure meta field exists
             if "meta" not in data:
                 data["meta"] = {}
             
-            # 初始化table类型列表
+            # Initialize table type list
             table_types = []
             
-            # 获取表格相关的meta信息
+            # Get table-related meta information
             table_dom_depth = data["meta"].get("table_dom_depth", -1)
             table_text_ratio = data["meta"].get("table_text_ratio", 0)
             table_html_ratio = data["meta"].get("table_html_source_ratio", 0)
             
-            # 如果没有表格，返回空列表
+            # If no table, return empty list
             if table_dom_depth == -1:
                 data["meta"]["table"] = table_types
                 return data
             
-            # 基于DOM深度和内容比例判断表格类型
+            # Determine table type based on DOM depth and content ratio
             
-            # Layout表格判断逻辑：
-            # 1. DOM深度较浅（通常用于页面布局）
-            # 2. 表格HTML占比高但文本占比低（主要是结构性标签）
+            # Layout table heuristic:
+            # 1. Shallow DOM depth (typically used for page layout)
+            # 2. High table HTML ratio but low text ratio (mainly structural tags)
             if (table_dom_depth <= 3 and table_html_ratio > 0.1 and table_text_ratio < 0.3) or \
                (table_dom_depth <= 2):
                 table_types.append("layout")
             
-            # Data表格判断逻辑：
-            # 1. DOM深度较深（嵌套在内容区域）
-            # 2. 表格文本占比较高（包含实际数据内容）
-            # 3. 或者表格HTML占比适中但文本占比高
+            # Data table heuristic:
+            # 1. Deep DOM depth (nested in content area)
+            # 2. High table text ratio (contains actual data content)
+            # 3. Or moderate table HTML ratio but high text ratio
             if (table_dom_depth >= 3 and table_text_ratio >= 0.1) or \
                (table_text_ratio >= 0.3) or \
                (table_html_ratio > 0.05 and table_text_ratio >= 0.15):
                 table_types.append("data")
             
-            # 如果没有匹配任何类型，但确实有表格，默认为data类型
+            # If no type matched but table exists, default to data type
             if not table_types and table_dom_depth > -1:
                 table_types.append("data")
             
-            # 去重并排序
+            # Deduplicate and sort
             table_types = sorted(list(set(table_types)))
             
             data["meta"]["table"] = table_types
             return data
             
         except Exception as e:
-            # 如果解析失败，返回空列表
+            # If parsing fails, return empty list
             if "meta" not in data:
                 data["meta"] = {}
             data["meta"]["table"] = []
             return data
 
-    # 计算text文本在DOM一维序列中的分布离散程度
+    # Calculate dispersion of text distribution in DOM linear sequence
     def calculate_text_DOM_distribution(self, data):
-        """将DOM树线性化，计算文本分布的离散程度"""
+        """Linearize DOM tree and calculate dispersion of text distribution."""
         try:
-            # 确保meta字段存在
+            # Ensure meta field exists
             if "meta" not in data:
                 data["meta"] = {}
             
@@ -568,31 +568,31 @@ def calculate_text_DOM_distribution(self, data):
                 data["meta"]["text_distribution_dispersion"] = 0.0
                 return data
             
-            # 使用BeautifulSoup解析HTML
+            # Parse HTML with BeautifulSoup
             soup = BeautifulSoup(html_content, 'html.parser')
             
-            # 将DOM树线性化为一维列表
+            # Linearize DOM tree into a 1D list
             dom_sequence = self._linearize_dom_tree(soup)
             
             if not dom_sequence:
                 data["meta"]["text_distribution_dispersion"] = 0.0
                 return data
             
-            # 找到所有文本节点在序列中的位置
+            # Find positions of all text nodes in the sequence
             text_positions = []
             for i, node_info in enumerate(dom_sequence):
                 if node_info['has_text']:
                     text_positions.append({
                         'position': i,
                         'text_length': node_info['text_length'],
-                        'relative_position': i / len(dom_sequence)  # 相对位置 0-1
+                        'relative_position': i / len(dom_sequence)  # relative position 0-1
                     })
             
             if not text_positions:
                 data["meta"]["text_distribution_dispersion"] = 0.0
                 return data
             
-            # 计算文本分布的离散程度
+            # Calculate dispersion of text distribution
             dispersion = self._calculate_text_dispersion(text_positions, len(dom_sequence))
             
             data["meta"]["text_distribution_dispersion"] = dispersion
@@ -600,154 +600,154 @@ def calculate_text_DOM_distribution(self, data):
             return data
             
         except Exception as e:
-            # 如果解析失败，返回默认值
+            # If parsing fails, return default value
             if "meta" not in data:
                 data["meta"] = {}
             data["meta"]["text_distribution_dispersion"] = 0.0
             return data
     
     def _linearize_dom_tree(self, soup):
-        """将DOM树线性化为一维序列（深度优先遍历）"""
+        """Linearize DOM tree into a 1D sequence (depth-first traversal)."""
         dom_sequence = []
         
         def traverse(element):
             if not element or not hasattr(element, 'name'):
                 return
             
-            # 跳过document节点
+            # Skip document node
             if element.name == '[document]':
-                # 处理根节点的子元素
+                # Process children of root node
                 if hasattr(element, 'children'):
                     for child in element.children:
                         if hasattr(child, 'name') and child.name:
                             traverse(child)
                 return
             
-            # 处理当前元素 - 只获取直接文本内容
+            # Process current element - only get direct text content
             text_content = ""
             text_length = 0
             has_text = False
             
-            # 只收集元素的直接文本内容（不包括子元素的文本）
+            # Only collect direct text content (exclude child element text)
             if hasattr(element, 'contents'):
                 for content in element.contents:
-                    # 只处理直接的文本节点，跳过子元素
+                    # Only process direct text nodes, skip child elements
                     if isinstance(content, str) and content.strip():
                         text_content += content.strip() + " "
                         
             text_length = len(text_content.strip())
             has_text = text_length > 0
             
-            # 添加到序列中
+            # Add to sequence
             node_info = {
                 'tag': element.name,
                 'has_text': has_text,
                 'text_length': text_length,
-                'text_content': text_content.strip()[:100] if has_text else ""  # 截取前100字符
+                'text_content': text_content.strip()[:100] if has_text else ""  # Truncate to first 100 chars
             }
             dom_sequence.append(node_info)
             
-            # 递归处理子元素
+            # Recursively process child elements
             if hasattr(element, 'children'):
                 for child in element.children:
                     if hasattr(child, 'name') and child.name:
                         traverse(child)
         
-        # 开始遍历
+        # Start traversal
         traverse(soup)
         return dom_sequence
     
     def _calculate_text_dispersion(self, text_positions, total_nodes):
-        """计算文本在一维序列中的分布离散程度 - 基于状态翻转次数"""
+        """Calculate dispersion of text distribution in 1D sequence based on state flip count."""
         if total_nodes < 2:
             return 0.0
         
-        # 创建文本状态序列：1表示有文本，0表示无文本
+        # Create text state sequence: 1=has text, 0=no text
         text_states = [0] * total_nodes
         for pos_info in text_positions:
             text_states[pos_info['position']] = 1
         
-        # 计算状态翻转次数 (0->1 或 1->0)
+        # Count state flips (0->1 or 1->0)
         flips = 0
         for i in range(1, len(text_states)):
             if text_states[i] != text_states[i-1]:
                 flips += 1
         
-        # 归一化：翻转次数除以总节点数量
+        # Normalize: flip count divided by total node count
         dispersion_score = flips / total_nodes
         
         return round(dispersion_score, 4)
 
     def calculate_level_score(self, data):
-        """计算页面内容难易程度评分"""
-        # 确保meta字段存在
+        """Calculate page content complexity score."""
+        # Ensure meta field exists
         if "meta" not in data:
             data["meta"] = {}
         
         meta = data["meta"]
         
-        # 收集各项复杂度指标
+        # Collect complexity indicators
         complexity_score = 0.0
         
-        # 1. 表格复杂度
+        # 1. Table complexity
         table_text_ratio = meta.get("table_text_ratio", 0)
-        table_complexity = min(table_text_ratio, 1.0)  # 归一化到0-1
+        table_complexity = min(table_text_ratio, 1.0)  # Normalized to 0-1
         # complexity_score += 0.20 * table_complexity
         data["meta"]["table_complexity_score"] = round(table_complexity, 4) 
         
-        # 2. DOM结构复杂度
+        # 2. DOM structure complexity
         dom_depth = meta.get("DOM_DEPTH", 0)
         dom_width = meta.get("DOM_WIDTH", 0)
-        # 基于实际数据分布调整归一化参数：
-        # - 深度90%分位数约104，使用120作为归一化基准
-        # - 宽度90%分位数约1283，使用1500作为归一化基准
-        # - 使用平方根函数降低高值的影响，提供更好的区分度
-        depth_norm = min(dom_depth / 20, 1.0) ** 0.7  # 使用0.7次幂平滑曲线
+        # Adjust normalization based on actual data distribution:
+        # - 90th percentile depth ~104, use 120 as normalization base
+        # - 90th percentile width ~1283, use 1500 as normalization base
+        # - Use power function to reduce effect of large values for better discrimination
+        depth_norm = min(dom_depth / 20, 1.0) ** 0.7  # Use 0.7 power for smoother curve
         width_norm = min(dom_width / 300, 1.0) ** 0.7
-        dom_complexity = (depth_norm + width_norm) / 2  # 取平均值
+        dom_complexity = (depth_norm + width_norm) / 2  # Take average
         complexity_score += 0.25 * dom_complexity
         data["meta"]["dom_complexity_score"] = round(dom_complexity, 4) 
         
-        # 3. 文本分布离散程度
+        # 3. Text distribution dispersion
         text_distribution_dispersion = meta.get("text_distribution_dispersion", 0)
         
-        # 文本分布复杂度评分（简化版）
-        text_dispersion_score = min(text_distribution_dispersion, 1.0)  # 归一化到0-1
+        # Text distribution complexity score (simplified)
+        text_dispersion_score = min(text_distribution_dispersion, 1.0)  # Normalized to 0-1
         complexity_score += 0.25 * text_dispersion_score
         data["meta"]["text_dispersion_score"] = round(text_dispersion_score, 4) 
 
-        # 4. 内容类型多样性
+        # 4. Content type diversity
         content_diversity = 0.0
         equation_types = meta.get("equation", [])
         code_types = meta.get("code", [])
         table_types = meta.get("table", [])
         
-        # 根据内容类型数量评分
+        # Score based on number of content types
         if equation_types:
-            content_diversity += 0.3 * len(equation_types) / 2  # 最多2种公式类型
+            content_diversity += 0.3 * len(equation_types) / 2  # Up to 2 equation types
         if code_types:
-            content_diversity += 0.3 * len(code_types) / 2      # 最多2种代码类型
+            content_diversity += 0.3 * len(code_types) / 2      # Up to 2 code types
         if table_types:
-            content_diversity += 0.4 * len(table_types) / 2     # 最多2种表格类型
+            content_diversity += 0.4 * len(table_types) / 2     # Up to 2 table types
         
         content_diversity = min(content_diversity, 1.0)
         complexity_score += 0.25 * content_diversity
         data["meta"]["content_diversity_score"] = round(content_diversity, 4) 
         
-        # 5. 链接文本比例
+        # 5. Link text ratio
         text_linktext_ratio = meta.get("text_linktext_ratio", 0)
-        # 链接比例过高或过低都可能增加复杂度
+        # Both high and low link ratios can increase complexity
         link_complexity = min(text_linktext_ratio, 1.0)
         complexity_score += 0.25 * link_complexity
         data["meta"]["link_complexity_score"] = round(link_complexity, 4) 
         
-        data["meta"]["overall_complexity_score"] = round(complexity_score, 4)  # 保存综合评分便于分析
+        data["meta"]["overall_complexity_score"] = round(complexity_score, 4)  # Save composite score for analysis
       
         return data
 
     def calculate_level(self, data, threshold_30=None, threshold_70=None):
-        """基于动态阈值计算页面内容难易程度:simple, mid, hard"""
-        # 如果没有提供阈值，使用默认阈值
+        """Calculate page content difficulty level based on dynamic thresholds: simple, mid, hard."""
+        # If no thresholds provided, use defaults
         if threshold_30 is None or threshold_70 is None:
             threshold_30 = 0.35
             threshold_70 = 0.65
@@ -763,41 +763,41 @@ def calculate_level(self, data, threshold_30=None, threshold_70=None):
         return data
     
     def _calculate_dynamic_thresholds(self):
-        """基于overall_complexity_score分布计算动态阈值"""
+        """Calculate dynamic thresholds based on overall_complexity_score distribution."""
         complexity_scores = []
         for data in self.data:
             if "meta" in data and "overall_complexity_score" in data["meta"]:
                 complexity_scores.append(data["meta"]["overall_complexity_score"])
         
         if not complexity_scores:
-            print("⚠️  警告: 没有找到overall_complexity_score数据，使用默认阈值")
-            return 0.35, 0.65  # 返回默认阈值
+            print("⚠️  Warning: No overall_complexity_score data found, using default thresholds")
+            return 0.35, 0.65  # Return default thresholds
         
-        # 排序并计算分位数
+        # Sort and calculate percentiles
         complexity_scores.sort()
         n = len(complexity_scores)
         
-        # 计算30%和70%分位数
+        # Calculate 30th and 70th percentiles
         percentile_30_idx = int(n * 0.3)
         percentile_70_idx = int(n * 0.7)
         
-        # 确保索引在有效范围内
+        # Ensure indices are within valid range
         percentile_30_idx = min(percentile_30_idx, n - 1)
         percentile_70_idx = min(percentile_70_idx, n - 1)
         
         threshold_30 = complexity_scores[percentile_30_idx]
         threshold_70 = complexity_scores[percentile_70_idx]
         
-        print(f"📊 复杂度分布阈值计算:")
-        print(f"   总样本数: {n:,}")
-        print(f"   30%分位数 (simple/mid分界): {threshold_30:.4f}")
-        print(f"   70%分位数 (mid/hard分界): {threshold_70:.4f}")
-        print(f"   复杂度得分范围: {min(complexity_scores):.4f} - {max(complexity_scores):.4f}")
+        print(f"📊 Complexity distribution threshold calculation:")
+        print(f"   Total samples: {n:,}")
+        print(f"   30th percentile (simple/mid boundary): {threshold_30:.4f}")
+        print(f"   70th percentile (mid/hard boundary): {threshold_70:.4f}")
+        print(f"   Complexity score range: {min(complexity_scores):.4f} - {max(complexity_scores):.4f}")
         
         return threshold_30, threshold_70
     
     def _print_level_distribution(self):
-        """统计并打印难易程度分类结果"""
+        """Count and print difficulty level classification results."""
         simple_count = mid_count = hard_count = 0
         for data in self.data:
             level = data.get("meta", {}).get("level", "unknown")
@@ -809,60 +809,60 @@ def _print_level_distribution(self):
                 hard_count += 1
         
         total = len(self.data)
-        print(f"📊 难易程度分类结果:")
+        print(f"📊 Difficulty level classification results:")
         print(f"   Simple: {simple_count:,} ({simple_count/total*100:.1f}%)")
         print(f"   Mid:    {mid_count:,} ({mid_count/total*100:.1f}%)")
         print(f"   Hard:   {hard_count:,} ({hard_count/total*100:.1f}%)")
 
     def update_data(self):
-        """更新数据中的统计信息"""
-        print("🔄 第一阶段: 计算基础统计和复杂度得分...")
+        """Update statistics in data."""
+        print("🔄 Phase 1: Calculating base statistics and complexity scores...")
         
-        # 第一阶段：计算所有基础统计信息和复杂度得分
+        # Phase 1: Calculate all base statistics and complexity scores
         for i, data in enumerate(self.data):
-            # 直接在原始数据上追加字段
+            # Append fields directly to original data
             self.generate_content_list(data)
-            self.calculate_DOM_WIDTH_DEPTH(data)  # DOM结构分析
+            self.calculate_DOM_WIDTH_DEPTH(data)  # DOM structure analysis
             self.calculate_and_update_text_linktext_ratio(data)
             # self.calculate_and_update_text_linknum_ratio(data)
             # self.calculate_and_update_table_html_ratio(data)
             self.calculate_and_update_table_text_ratio(data)
             self.calculate_and_update_table_depth(data)
-            self.classify_tables(data)  # 表格分类需要在表格相关统计之后
-            self.calculate_text_DOM_distribution(data)  # 文本DOM分布分析
+            self.classify_tables(data)  # Table classification must come after table statistics
+            self.calculate_text_DOM_distribution(data)  # Text DOM distribution analysis
             self.detect_equations(data)
             self.detect_code(data)
-            self.calculate_level_score(data)  # 只计算复杂度得分，不分类
-            # 删除content_list字段
+            self.calculate_level_score(data)  # Only calculate complexity score, do not classify
+            # Delete content_list field
             if 'content_list' in data:
                 del data['content_list']
 
-            # 显示进度
+            # Show progress
             if (i + 1) % 100 == 0:
-                print(f"  📊 已处理 {i + 1:,} 条数据...")
+                print(f"  📊 Processed {i + 1:,} records...")
         
-        print("🔄 第二阶段: 计算动态阈值和难易程度分类...")
+        print("🔄 Phase 2: Calculating dynamic thresholds and difficulty level classification...")
         
-        # 第二阶段：基于所有复杂度得分计算动态阈值
+        # Phase 2: Calculate dynamic thresholds based on all complexity scores
         threshold_30, threshold_70 = self._calculate_dynamic_thresholds()
         
-        # 第三阶段：应用动态阈值进行难易程度分类
+        # Phase 3: Apply dynamic thresholds for difficulty level classification
         for data in self.data:
             self.calculate_level(data, threshold_30, threshold_70)
         
-        # 统计最终的分类结果
+        # Count final classification results
         self._print_level_distribution()
     
     def write_data(self):
-        """写入数据"""
-        print(f"📝 正在写入数据到: {self.output_file}")
+        """Write data."""
+        print(f"📝 Writing data to: {self.output_file}")
         with open(self.output_file, "w", encoding="utf-8") as f:
             for data in self.data:
                 f.write(json.dumps(data, ensure_ascii=False) + "\n")
-        print(f"✅ 成功写入 {len(self.data):,} 条数据")
+        print(f"✅ Successfully wrote {len(self.data):,} records")
     
     def calculate_meta_statistics(self):
-        """计算meta字段的统计信息"""
+        """Calculate meta field statistics."""
         if not self.data:
             return {}
         
@@ -873,7 +873,7 @@ def calculate_meta_statistics(self):
             'correlation_stats': {}
         }
         
-        # 数值型字段统计
+        # Numerical field statistics
         numerical_fields = [
             'text_linktext_ratio', 'text_linknum_ratio', 
             'table_html_source_ratio', 'table_text_ratio', 'table_dom_depth',
@@ -892,7 +892,7 @@ def calculate_meta_statistics(self):
             for data in self.data:
                 if 'meta' in data and field in data['meta']:
                     value = data['meta'][field]
-                    if isinstance(value, (int, float)) and value != -1:  # 排除-1这种特殊值
+                    if isinstance(value, (int, float)) and value != -1:  # Exclude special value -1
                         values.append(value)
             
             if values:
@@ -905,7 +905,7 @@ def calculate_meta_statistics(self):
                     'std': (sum((x - sum(values)/len(values))**2 for x in values) / len(values))**0.5
                 }
         
-        # 分类型字段统计 - equation
+        # Categorical field statistics - equation
         equation_combinations = {}
         for data in self.data:
             if 'meta' in data and 'equation' in data['meta']:
@@ -915,7 +915,7 @@ def calculate_meta_statistics(self):
         
         stats['categorical_stats']['equation'] = equation_combinations
         
-        # 分类型字段统计 - code
+        # Categorical field statistics - code
         code_combinations = {}
         for data in self.data:
             if 'meta' in data and 'code' in data['meta']:
@@ -925,17 +925,17 @@ def calculate_meta_statistics(self):
         
         stats['categorical_stats']['code'] = code_combinations
 
-        # 分类型字段统计 - level
+        # Categorical field statistics - level
         level_combinations = {}
         for data in self.data:
             if 'meta' in data and 'level' in data['meta']:
                 level_value = data['meta']['level']
-                # level是字符串，直接使用
+                # level is a string, use directly
                 level_key = level_value if level_value else 'none'
                 level_combinations[level_key] = level_combinations.get(level_key, 0) + 1
         stats['categorical_stats']['level'] = level_combinations
         
-        # 分类型字段统计 - table
+        # Categorical field statistics - table
         table_combinations = {}
         for data in self.data:
             if 'meta' in data and 'table' in data['meta']:
@@ -944,7 +944,7 @@ def calculate_meta_statistics(self):
                 table_combinations[table_key] = table_combinations.get(table_key, 0) + 1
         stats['categorical_stats']['table_types'] = table_combinations
 
-        # 交叉统计 - equation和code的组合
+        # Cross-tabulation - equation and code combination
         equation_code_cross = {}
         for data in self.data:
             if 'meta' in data:
@@ -957,7 +957,7 @@ def calculate_meta_statistics(self):
         
         stats['correlation_stats']['equation_code_cross'] = equation_code_cross
         
-        # 表格相关统计
+        # Table-related statistics
         table_stats = {
             'has_table': 0,
             'no_table': 0,
@@ -982,30 +982,30 @@ def calculate_meta_statistics(self):
         return stats
 
     def print_summary(self):
-        """打印统计摘要"""
+        """Print statistics summary."""
         print("\n" + "=" * 80)
-        print("📊 WebMainBench 数据集统计摘要")
+        print("📊 WebMainBench Dataset Statistics Summary")
         print("=" * 80)
-        print(f"📄 总数据条数: {len(self.data):,}")
+        print(f"📄 Total records: {len(self.data):,}")
         
         if hasattr(self, 'stats') and self.stats:
             stats = self.stats
             
-            # 数值型字段统计
-            print(f"\n📈 数值型字段统计:")
+            # Numerical field statistics
+            print(f"\n📈 Numerical field statistics:")
             print("-" * 60)
             for field, field_stats in stats['numerical_stats'].items():
                 print(f"  {field}:")
-                print(f"    样本数: {field_stats['count']:,}")
-                print(f"    均值: {field_stats['mean']:.4f}")
-                print(f"    中位数: {field_stats['median']:.4f}")
-                print(f"    最小值: {field_stats['min']:.4f}")
-                print(f"    最大值: {field_stats['max']:.4f}")
-                print(f"    标准差: {field_stats['std']:.4f}")
+                print(f"    Sample count: {field_stats['count']:,}")
+                print(f"    Mean: {field_stats['mean']:.4f}")
+                print(f"    Median: {field_stats['median']:.4f}")
+                print(f"    Min: {field_stats['min']:.4f}")
+                print(f"    Max: {field_stats['max']:.4f}")
+                print(f"    Std dev: {field_stats['std']:.4f}")
                 print()
             
-            # 公式类型分布
-            print(f"📐 公式类型分布:")
+            # Equation type distribution
+            print(f"📐 Equation type distribution:")
             print("-" * 60)
             equation_stats = stats['categorical_stats'].get('equation', {})
             total_with_meta = sum(equation_stats.values())
@@ -1013,8 +1013,8 @@ def print_summary(self):
                 percentage = (count / total_with_meta * 100) if total_with_meta > 0 else 0
                 print(f"  {eq_type}: {count:,} ({percentage:.1f}%)")
             
-            # 代码类型分布
-            print(f"\n💻 代码类型分布:")
+            # Code type distribution
+            print(f"\n💻 Code type distribution:")
             print("-" * 60)
             code_stats = stats['categorical_stats'].get('code', {})
             total_with_meta = sum(code_stats.values())
@@ -1022,18 +1022,18 @@ def print_summary(self):
                 percentage = (count / total_with_meta * 100) if total_with_meta > 0 else 0
                 print(f"  {code_type}: {count:,} ({percentage:.1f}%)")
             
-            # 表格统计
-            print(f"\n📋 表格统计:")
+            # Table statistics
+            print(f"\n📋 Table statistics:")
             print("-" * 60)
             table_stats = stats['categorical_stats'].get('table', {})
             total = table_stats.get('has_table', 0) + table_stats.get('no_table', 0)
             if total > 0:
-                print(f"  包含表格: {table_stats.get('has_table', 0):,} ({table_stats.get('has_table', 0)/total*100:.1f}%)")
-                print(f"  无表格: {table_stats.get('no_table', 0):,} ({table_stats.get('no_table', 0)/total*100:.1f}%)")
-                print(f"  高表格内容比例(>50%): {table_stats.get('high_table_ratio', 0):,}")
+                print(f"  Has table: {table_stats.get('has_table', 0):,} ({table_stats.get('has_table', 0)/total*100:.1f}%)")
+                print(f"  No table: {table_stats.get('no_table', 0):,} ({table_stats.get('no_table', 0)/total*100:.1f}%)")
+                print(f"  High table ratio (>50%): {table_stats.get('high_table_ratio', 0):,}")
             
-            # 表格类型分布
-            print(f"\n📊 表格类型分布:")
+            # Table type distribution
+            print(f"\n📊 Table type distribution:")
             print("-" * 60)
             table_type_stats = stats['categorical_stats'].get('table_types', {})
             total_with_meta = sum(table_type_stats.values())
@@ -1041,8 +1041,8 @@ def print_summary(self):
                 percentage = (count / total_with_meta * 100) if total_with_meta > 0 else 0
                 print(f"  {table_type}: {count:,} ({percentage:.1f}%)")
             
-            # 难易程度分布
-            print(f"\n🔍 难易程度分布:")
+            # Difficulty level distribution
+            print(f"\n🔍 Difficulty level distribution:")
             print("-" * 60)
             level_stats = stats['categorical_stats'].get('level', {})
             total_with_meta = sum(level_stats.values())
@@ -1050,8 +1050,8 @@ def print_summary(self):
                 percentage = (count / total_with_meta * 100) if total_with_meta > 0 else 0
                 print(f"  {level}: {count:,} ({percentage:.1f}%)")
             
-            # 公式和代码交叉统计（显示前10个最常见的组合）
-            print(f"\n🔗 公式-代码组合分布 (Top 10):")
+            # Equation-code cross-tabulation (show top 10 most common combinations)
+            print(f"\n🔗 Equation-code combination distribution (Top 10):")
             print("-" * 60)
             cross_stats = stats['correlation_stats'].get('equation_code_cross', {})
             sorted_cross = sorted(cross_stats.items(), key=lambda x: x[1], reverse=True)[:10]
@@ -1065,39 +1065,39 @@ def print_summary(self):
     
 
 def main():
-    """主函数"""
-    parser = argparse.ArgumentParser(description="数据集统计和筛选工具")
+    """Main function."""
+    parser = argparse.ArgumentParser(description="Dataset statistics and filtering tool")
     # data/sample_dataset.jsonl
     # data/WebMainBench_1827_v1_WebMainBench_dataset_merge_with_llm_webkit.jsonl
-    parser.add_argument("--input", "-i", required=True, type=str, help="输入JSONL文件路径")
-    parser.add_argument("--output", "-o", required=True, type=str, help="输出文件路径")
+    parser.add_argument("--input", "-i", required=True, type=str, help="Input JSONL file path")
+    parser.add_argument("--output", "-o", required=True, type=str, help="Output file path")
 
     args = parser.parse_args()
     
-    print("🔍 数据集统计和筛选工具")
+    print("🔍 Dataset statistics and filtering tool")
     print("="*60)
     
     try:
-        # 初始化和加载数据
+        # Initialize and load data
         stats_tool = DatasetStatistics(args.input, args.output)       
         stats_tool.load_data()
         
-        # 计算统计信息
+        # Calculate statistics
         stats_tool.update_data()
 
-        # 计算meta字段统计
+        # Calculate meta field statistics
         stats_tool.calculate_meta_statistics()
             
-        # 显示统计摘要
+        # Display statistics summary
         stats_tool.print_summary()
         
-        # 写入数据
+        # Write data
         stats_tool.write_data()
         
-        print(f"\n🎉 任务完成!")
+        print(f"\n🎉 Task complete!")
         
     except Exception as e:
-        print(f"❌ 执行失败: {e}")
+        print(f"❌ Execution failed: {e}")
         import traceback
         traceback.print_exc()
         return 1
diff --git a/scripts/style_classify.py b/scripts/style_classify.py
index 43cb78b..3e61271 100644
--- a/scripts/style_classify.py
+++ b/scripts/style_classify.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
-网页类型分类工具
-用于为数据集中的网页内容添加类型标签
+Web page type classification tool
+Adds type labels to web page content in the dataset
 """
 
 import json
@@ -15,16 +15,16 @@
 import time
 
 class StyleClassifier:
-    """网页类型分类器"""
+    """Web page type classifier."""
     
     def __init__(self, api_key: Optional[str] = None, model: str = "gpt-5", base_url: str = "https://api.deepseek.com/v1"):
         """
-        初始化网页类型分类器
+        Initialize web page type classifier.
         
         Args:
-            api_key: OpenAI API密钥
-            model: 使用的模型名称
-            base_url: 模型请求的基础URL地址
+            api_key: OpenAI API key
+            model: Model name to use
+            base_url: Base URL for model requests
         """
         self.api_key = api_key
         self.model = model
@@ -32,7 +32,7 @@ def __init__(self, api_key: Optional[str] = None, model: str = "gpt-5", base_url
         self.supported_categories = self._get_supported_categories()
         
     def _get_supported_categories(self) -> Dict[str, List[str]]:
-        """获取支持的网页类型分类"""
+        """Get supported web page type categories."""
         return {
             "Article": [
                 "Blog", "News", "Tutorial", "Multiple data article", 
@@ -51,14 +51,14 @@ def _get_supported_categories(self) -> Dict[str, List[str]]:
     
     def get_style_classification_prompt(self, url: str, html_content: str) -> str:
         """
-        生成网页类型分类的prompt
+        Generate web page type classification prompt.
         
         Args:
-            url: 网页URL
-            html_content: 网页HTML内容
+            url: Web page URL
+            html_content: Web page HTML content
             
         Returns:
-            格式化的prompt
+            Formatted prompt
         """
         prompt = f"""You are an expert in web page classification with a strong focus on web page layout, content analysis, and user interaction. Your task is to accurately classify the provided simplified HTML source code into one of the main categories listed below, while also identifying an appropriate subcategory if applicable. Pay attention to html tag name and significant structural elements.
 
@@ -110,14 +110,14 @@ def get_style_classification_prompt(self, url: str, html_content: str) -> str:
     
     def detect_style_llm(self, url: str, html_content: str) -> str:
         """
-        使用LLM进行网页类型检测
+        Detect web page type using LLM.
         
         Args:
-            url: 网页URL
-            html_content: 网页HTML内容
+            url: Web page URL
+            html_content: Web page HTML content
             
         Returns:
-            网页类型分类结果
+            Web page type classification result
         """
         try:
             from openai import OpenAI
@@ -125,7 +125,7 @@ def detect_style_llm(self, url: str, html_content: str) -> str:
             if not self.api_key:
                 raise ValueError("API key is required for LLM detection")
             
-            # 配置OpenAI客户端
+            # Configure OpenAI client
             client = OpenAI(
                 base_url = self.base_url,
                 api_key = self.api_key
@@ -143,58 +143,58 @@ def detect_style_llm(self, url: str, html_content: str) -> str:
             
             result = response.choices[0].message.content.strip()
             
-            # 解析结果格式: Category | subcategory | explanation
+            # Parse result format: Category | subcategory | explanation
             parts = result.split('|')
             if len(parts) >= 3:
                 category = parts[0].strip()
                 subcategory = parts[1].strip()
                 explanation = parts[2].strip()
                 
-                # 验证分类是否在支持列表中
+                # Validate category is in supported list
                 if category in self.supported_categories:
                     return f"{category}|{subcategory}|{explanation}"
                 else:
-                    print(f"⚠️  LLM返回了无效的分类: {category}，返回默认分类")
+                    print(f"⚠️  LLM returned invalid category: {category}, returning default")
                     return "Other|other|LLM returned invalid category"
             else:
-                print(f"⚠️  LLM返回了格式错误的结果: {result}，返回默认分类")
+                print(f"⚠️  LLM returned malformed result: {result}, returning default")
                 return "Other|other|LLM returned malformed result"
                 
         except Exception as e:
-            print(f"⚠️  LLM检测失败: {e}，返回默认分类")
+            print(f"⚠️  LLM detection failed: {e}, returning default")
             return "Other|other|LLM detection failed"
     
     def detect_style(self, url: str, html_content: str) -> str:
         """
-        检测网页类型
+        Detect web page type.
         
         Args:
-            url: 网页URL
-            html_content: 网页HTML内容
+            url: Web page URL
+            html_content: Web page HTML content
             
         Returns:
-            网页类型分类结果
+            Web page type classification result
         """
         return self.detect_style_llm(url, html_content)
     
     def process_jsonl(self, input_file: str, output_file: str, 
                      batch_size: int = 100) -> None:
         """
-        处理JSONL文件，添加网页类型标签
+        Process JSONL file, adding web page type labels.
         
         Args:
-            input_file: 输入文件路径
-            output_file: 输出文件路径
-            batch_size: 批处理大小
+            input_file: Input file path
+            output_file: Output file path
+            batch_size: Batch size
         """
-        print(f"🔄 开始处理网页类型分类...")
-        print(f"📄 输入文件: {input_file}")
-        print(f"📄 输出文件: {output_file}")
-        print(f"🧠 检测方法: LLM")
-        print(f"🌐 模型地址: {self.base_url}")
-        print(f"🤖 使用模型: {self.model}")
+print(f"🔄 Starting web page type classification...")
+        print(f"📄 Input file: {input_file}")
+        print(f"📄 Output file: {output_file}")
+        print(f"🧠 Detection method: LLM")
+        print(f"🌐 Model URL: {self.base_url}")
+        print(f"🤖 Model: {self.model}")
         
-        # 统计信息
+        # Statistics
         total_count = 0
         processed_count = 0
         style_stats = Counter()
@@ -219,32 +219,32 @@ def process_jsonl(self, input_file: str, output_file: str,
                             processed_count += len(batch)
                             batch = []
                             
-                            print(f"  📊 已处理 {processed_count:,} 条数据...")
+    print(f"  📊 Processed {processed_count:,} records...")
                     
                     except json.JSONDecodeError as e:
-                        print(f"⚠️  第{line_num}行JSON解析错误: {e}")
+                        print(f"⚠️  JSON parse error at line {line_num}: {e}")
                         continue
                 
-                # 处理最后一批
+                # Process the last batch
                 if batch:
                     self._process_batch(batch, outfile, style_stats)
                     processed_count += len(batch)
         
         except FileNotFoundError:
-            print(f"❌ 文件未找到: {input_file}")
+            print(f"❌ File not found: {input_file}")
             return
         except Exception as e:
-            print(f"❌ 处理过程中出错: {e}")
+            print(f"❌ Error during processing: {e}")
             return
         
-        # 输出统计结果
-        print(f"\n✅ 处理完成!")
-        print(f"📊 总计处理: {processed_count:,} 条数据")
-        print(f"📊 网页类型分布:")
+        # Output statistics
+        print(f"\n✅ Processing complete!")
+        print(f"📊 Total processed: {processed_count:,} records")
+print(f"📊 Web page type distribution:")
         
         for style_result, count in style_stats.most_common():
             percentage = (count / processed_count) * 100 if processed_count > 0 else 0
-            # 解析分类结果
+            # Parse classification result
             parts = style_result.split('|')
             category = parts[0] if len(parts) > 0 else "Unknown"
             subcategory = parts[1] if len(parts) > 1 else "Unknown"
@@ -252,34 +252,34 @@ def process_jsonl(self, input_file: str, output_file: str,
     
     def _process_batch(self, batch: List[Dict], outfile, style_stats: Counter) -> None:
         """
-        处理一批数据
+        Process a batch of data.
         
         Args:
-            batch: 数据批次
-            outfile: 输出文件对象
-            style_stats: 网页类型统计计数器
+            batch: Data batch
+            outfile: Output file object
+            style_stats: Web page type statistics counter
         """
         for data in batch:
-            # 获取URL和HTML内容
+            # Get URL and HTML content
             url = data.get('url', '')
             html_content = data.get('main_html', '')
             
-            # 如果没有main_html，尝试其他字段
+            # If no main_html, try other fields
             if not html_content:
                 html_content = data.get('html', '')
             
-            # 检测网页类型
+            # Detect web page type
             if url and html_content:
                 style_result = self.detect_style(url, html_content)
             else:
                 style_result = "Other|other|Missing URL or HTML content"
             
-            # 解析分类结果
+            # Parse classification result
             parts = style_result.split('|')
             category = parts[0].strip() if len(parts) > 0 else "Other"
             subcategory = parts[1].strip() if len(parts) > 1 else "other"
             
-            # 更新数据
+            # Update data
             if 'meta' not in data:
                 data['meta'] = {}
             data['meta']['style'] = {
@@ -288,87 +288,87 @@ def _process_batch(self, batch: List[Dict], outfile, style_stats: Counter) -> No
                 'full_result': style_result
             }
             
-            # 统计
+            # Statistics
             style_stats[style_result] += 1
             
-            # 写入输出文件
+            # Write to output file
             outfile.write(json.dumps(data, ensure_ascii=False) + '\n')
             
-            # 添加延迟避免速率限制
+            # Add delay to avoid rate limits
             time.sleep(0.1)
 
 
 def main():
-    """主函数"""
+    """Main function."""
     parser = argparse.ArgumentParser(
-        description="为JSONL数据集添加网页类型标签",
+        description="Add web page type labels to JSONL dataset",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
-示例用法:
-  # 使用LLM进行网页类型分类
+Examples:
+  # Use LLM for web page type classification
   python scripts/style_classify.py data/input.jsonl --output data/output.jsonl --api-key YOUR_API_KEY
   
-  # 指定自定义模型地址和批处理大小
+  # Specify custom model URL and batch size
   python scripts/style_classify.py data/input.jsonl --output data/output.jsonl --api-key YOUR_API_KEY --base-url https://custom-url/v1 --batch-size 50
   
-  # 使用默认地址和指定模型
+  # Use default URL with specified model
   python scripts/style_classify.py data/input.jsonl --output data/output.jsonl --api-key YOUR_API_KEY --model gpt-4
         """
     )
     
     parser.add_argument(
         "input_file",
-        help="输入JSONL文件路径"
+        help="Input JSONL file path"
     )
     
     parser.add_argument(
         "--output", "-o",
         required=True,
-        help="输出JSONL文件路径"
+        help="Output JSONL file path"
     )
     
     parser.add_argument(
         "--api-key",
         required=True,
         default=os.getenv("OPENAI_API_KEY"),
-        help="OpenAI API密钥（必需）"
+        help="OpenAI API key (required)"
     )
     
     parser.add_argument(
         "--model",
         default="gpt-5",
-        help="LLM模型名称 (默认: gpt-5)"
+        help="LLM model name (default: gpt-5)"
     )
     
     parser.add_argument(
         "--base-url",
         required=True,
         default="https://api.deepseek.com/v1/",
-        help="模型请求的基础URL地址"
+        help="Base URL for model requests"
     )
     
     parser.add_argument(
         "--batch-size",
         type=int,
         default=100,
-        help="批处理大小 (默认: 100)"
+        help="Batch size (default: 100)"
     )
     
     args = parser.parse_args()
     
-    # 验证参数
+    # Validate parameters
     if not Path(args.input_file).exists():
-        print(f"❌ 输入文件不存在: {args.input_file}")
+print(f"❌ Input file does not exist: {args.input_file}")
         sys.exit(1)
     
-    # 创建分类器
+    # Create classifier
     classifier = StyleClassifier(
         api_key=args.api_key,
         model=args.model,
         base_url=args.base_url
     )
     
-    # 处理数据
+    # Process data
     classifier.process_jsonl(
         input_file=args.input_file,
         output_file=args.output,
diff --git a/setup.py b/setup.py
index 67713ca..c0fef52 100644
--- a/setup.py
+++ b/setup.py
@@ -36,11 +36,11 @@
         "jsonlines>=3.1.0",
         "requests>=2.28.0",
         "beautifulsoup4>=4.12.0",
-        "numpy>=1.21.0,<2.0.0",  # 避免NumPy 2.x兼容性问题
-        "rapidfuzz>=3.0.0",  # 用于文本编辑距离计算
-        "apted>=1.0.3",  # 用于树编辑距离计算（TEDS）
-        "jieba>=0.42.0",  # 用于中文分词
-        "rouge>=1.0.0",  # 用于 ROUGE 指标
+        "numpy>=1.21.0,<2.0.0",  # Avoid NumPy 2.x compatibility issues
+        "rapidfuzz>=3.0.0",  # For text edit distance calculation
+        "apted>=1.0.3",  # For tree edit distance calculation (TEDS)
+        "jieba>=0.42.0",  # For Chinese word segmentation
+        "rouge>=1.0.0",  # For ROUGE metrics
     ],
     extras_require={
         "all": [
diff --git a/tests/test_code_extraction.py b/tests/test_code_extraction.py
index 223b605..e704ac2 100644
--- a/tests/test_code_extraction.py
+++ b/tests/test_code_extraction.py
@@ -1,19 +1,19 @@
 # tests/test/test_code_extraction.py
 # !/usr/bin/env python
-"""测试code提取功能"""
+"""Test code extraction functionality"""
 
 import unittest
 import sys
 import os
 
-# 添加项目根目录到Python路径
+# Add project root directory to Python path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 
 from webmainbench.metrics.base import BaseMetric, MetricResult
 
 
 class TestCodeExtractionMetric(BaseMetric):
-    """测试用的具体实现类"""
+    """Concrete implementation class for testing"""
 
     def _setup(self) -> None:
         pass
@@ -27,27 +27,27 @@ def _calculate_score(self, predicted: str, groundtruth: str, **kwargs) -> Metric
 
 
 class TestCodeExtraction(unittest.TestCase):
-    """测试code提取功能"""
+    """Test code extraction functionality"""
 
     def setUp(self):
         self.metric = TestCodeExtractionMetric("test_metric")
 
     def test_empty_text(self):
-        """测试空文本"""
+        """Test empty text"""
         result = BaseMetric._extract_from_markdown("")
         self.assertEqual(result['code'], '')
         self.assertEqual(result['text'], '')
 
     # def test_inline_code(self):
-    #     """测试行内代码"""
-    #     text = "这是一个`行内代码`的例子"
+    #     """Test inline code"""
+    #     text = "This is an example of `inline code`"
     #     result = BaseMetric._extract_from_markdown(text)
     #     print(result)
-    #     self.assertEqual(result['code'], '行内代码')
+    #     self.assertEqual(result['code'], 'inline code')
     #     self.assertEqual(result['text'], text)
 
     def test_code_block(self):
-        """测试代码块"""
+        """Test code block"""
         text = """
 I have the following string: `"aaaabbbb"`
 How can I get the last four characters and store them in a string using Python?
@@ -61,7 +61,7 @@ def test_code_block(self):
 
         result = BaseMetric._extract_from_markdown(text)
 
-        # 验证提取的代码
+        # Verify extracted code
         expected_code = ("""
 >>> mystr = "abcdefghijkl"
 >>> mystr[-4:]
@@ -71,21 +71,21 @@ def test_code_block(self):
         self.assertEqual(result['formula'], '')
 
     # def test_code_with_leading_trailing_spaces(self):
-    #     """测试代码前后有空格的情况"""
-    #     text = "前面 `  code  ` 后面"
+    #     """Test code with leading/trailing spaces"""
+    #     text = "before `  code  ` after"
     #     result = BaseMetric._extract_from_markdown(text)
-    #     self.assertEqual(result['code'], 'code')  # 应该去除空格
+    #     self.assertEqual(result['code'], 'code')  # should strip spaces
     #     self.assertEqual(result['text'], text)
 
     # def test_multiline_inline_code(self):
-    #     """测试多行行内代码（不应该匹配）"""
-    #     text = "`第一行\n第二行`"
+    #     """Test multiline inline code (should not match)"""
+    #     text = "`line1\nline2`"
     #     result = BaseMetric._extract_from_markdown(text)
-    #     self.assertEqual(result['code'], '')  # 不应该匹配多行行内代码
-    #     self.assertEqual(result['text'], text)  # 原样保留
+    #     self.assertEqual(result['code'], '')  # should not match multiline inline code
+    #     self.assertEqual(result['text'], text)  # preserve as-is
 
     def test_indent_code_block(self):
-        """测试代码块"""
+        """Test indented code block"""
         text = """
 I have the following string: `"aaaabbbb"`
 How can I get the last four characters and store them in a string using Python?
@@ -98,7 +98,7 @@ def test_indent_code_block(self):
 
         result = BaseMetric._extract_from_markdown(text)
 
-        # 验证提取的代码
+        # Verify extracted code
         expected_code = ("""
 print("hello world")
 print("hi")
diff --git a/tests/test_extractors.py b/tests/test_extractors.py
index cc59feb..9d22d4d 100644
--- a/tests/test_extractors.py
+++ b/tests/test_extractors.py
@@ -6,25 +6,25 @@
 class TestExtractors(unittest.TestCase):
 
     def setUp(self):
-        # 自动发现抽取器
+        # Auto-discover extractors
         ExtractorFactory.auto_discover()
 
     def test_trafilatura_extractor(self):
-        # 测试 Trafilatura 抽取器
+        # Test Trafilatura extractor
         extractor = ExtractorFactory.create("trafilatura")
         html_content = """
         <html>
             <body>
-                <h1 cc-select="true">Python编程教程</h1>
-                <p cc-select="true">这是一个Python基础教程，展示如何定义函数。</p>
+                <h1 cc-select="true">Python Programming Tutorial</h1>
+                <p cc-select="true">This is a basic Python tutorial demonstrating how to define functions.</p>
                 <pre cc-select="true"><code>def greet(name):
-    ""问候函数""
+    ""Greeting function""
     return f"Hello, {name}!"
 
-# 使用示例
+# Usage example
 result = greet("World")
 print(result)</code></pre>
-                <p cc-select="true">这个函数可以用来问候任何人。</p>
+                <p cc-select="true">This function can be used to greet anyone.</p>
             </body>
         </html>
         """
@@ -33,22 +33,22 @@ def test_trafilatura_extractor(self):
         self.assertEqual(result.success in [True, False], True)
 
     def test_magic_html_extractor(self):
-        # 测试 Magic HTML 抽取器
+        # Test Magic HTML extractor
         try:
             extractor = ExtractorFactory.create("magic-html")
             html_content = """
             <html>
                 <body>
-                    <h1 cc-select="true">Python编程教程</h1>
-                    <p cc-select="true">这是一个Python基础教程，展示如何定义函数。</p>
+                    <h1 cc-select="true">Python Programming Tutorial</h1>
+                    <p cc-select="true">This is a basic Python tutorial demonstrating how to define functions.</p>
                     <pre cc-select="true"><code>def greet(name):
-    ""问候函数""
+    ""Greeting function""
     return f"Hello, {name}!"
 
-# 使用示例
+# Usage example
 result = greet("World")
 print(result)</code></pre>
-                    <p cc-select="true">这个函数可以用来问候任何人。</p>
+                    <p cc-select="true">This function can be used to greet anyone.</p>
                 </body>
             </html>
             """
@@ -56,26 +56,26 @@ def test_magic_html_extractor(self):
             self.assertEqual(isinstance(result, ExtractionResult), True)
             self.assertEqual(result.success in [True, False], True)
         except ValueError as e:
-            # 如果抽取器未注册，跳过测试
-            self.skipTest(f"Magic HTML 抽取器未注册: {e}")
+            # If extractor is not registered, skip the test
+            self.skipTest(f"Magic HTML extractor not registered: {e}")
 
     def test_resiliparse_extractor(self):
-        # 测试 Resiliparse 抽取器
+        # Test Resiliparse extractor
         try:
             extractor = ExtractorFactory.create("resiliparse")
             html_content = """
             <html>
                 <body>
-                    <h1 cc-select="true">Python编程教程</h1>
-                    <p cc-select="true">这是一个Python基础教程，展示如何定义函数。</p>
+                    <h1 cc-select="true">Python Programming Tutorial</h1>
+                    <p cc-select="true">This is a basic Python tutorial demonstrating how to define functions.</p>
                     <pre cc-select="true"><code>def greet(name):
-    ""问候函数""
+    ""Greeting function""
     return f"Hello, {name}!"
 
-# 使用示例
+# Usage example
 result = greet("World")
 print(result)</code></pre>
-                    <p cc-select="true">这个函数可以用来问候任何人。</p>
+                    <p cc-select="true">This function can be used to greet anyone.</p>
                 </body>
             </html>
             """
@@ -83,8 +83,8 @@ def test_resiliparse_extractor(self):
             self.assertEqual(isinstance(result, ExtractionResult), True)
             self.assertEqual(result.success in [True, False], True)
         except ValueError as e:
-            # 如果抽取器未注册，跳过测试
-            self.skipTest(f"Resiliparse 抽取器未注册: {e}")
+            # If extractor is not registered, skip the test
+            self.skipTest(f"Resiliparse extractor not registered: {e}")
 
 
 if __name__ == '__main__':
diff --git a/tests/test_formula_extraction.py b/tests/test_formula_extraction.py
index b23a1aa..3a19f41 100644
--- a/tests/test_formula_extraction.py
+++ b/tests/test_formula_extraction.py
@@ -1,18 +1,18 @@
 #!/usr/bin/env python
-"""测试Markdown公式提取功能"""
+"""Test Markdown formula extraction functionality"""
 
 import unittest
 import sys
 import os
 
-# 添加项目根目录到Python路径
+# Add project root directory to Python path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 
 from webmainbench.metrics.base import BaseMetric, MetricResult
 
 
 class TestFormulaExtractionMetric(BaseMetric):
-    """测试用的公式提取 metric 实现类"""
+    """Concrete formula extraction metric implementation class for testing"""
 
     def _setup(self) -> None:
         pass
@@ -26,160 +26,160 @@ def _calculate_score(self, predicted: str, groundtruth: str, **kwargs) -> Metric
 
 
 class TestFormulaExtraction(unittest.TestCase):
-    """测试Markdown公式提取功能"""
+    """Test Markdown formula extraction functionality"""
 
     def setUp(self):
         self.metric = TestFormulaExtractionMetric("test_formula_metric")
 
     def test_inline_formula_extraction(self):
-        """测试行内公式提取"""
-        text = """这是行内公式示例: $E = mc^2$，这是普通文本。"""
+        """Test inline formula extraction"""
+        text = """This is an inline formula example: $E = mc^2$, this is plain text."""
 
         result = self.metric._extract_from_markdown(text)
 
-        # 验证公式被提取
+        # Verify formula was extracted
         self.assertIn('E = mc^2', result['formula'])
 
-        # 验证文本中公式标记被移除
+        # Verify formula marker was removed from text
         # self.assertNotIn('$E = mc^2$', result['text'])
-        # self.assertIn('这是行内公式示例: ，这是普通文本。', result['text'])
+        # self.assertIn('This is an inline formula example: , this is plain text.', result['text'])
         self.assertEqual(result['text'], text)
 
     def test_block_formula_extraction(self):
-        """测试行间公式提取"""
-        text = """这是行间公式:
+        """Test block formula extraction"""
+        text = """This is a block formula:
 $$
 \\int_{-\\infty}^{\\infty} e^{-x^2} dx = \\sqrt{\\pi}
 $$
-公式结束"""
+End of formula"""
 
         result = self.metric._extract_from_markdown(text)
 
-        # 验证公式被提取
+        # Verify formula was extracted
         self.assertIn('\\int_{-\\infty}^{\\infty} e^{-x^2} dx = \\sqrt{\\pi}', result['formula'])
 
-        # 修正：允许提取后有多个空行
-        self.assertIn('这是行间公式:', result['text'])
-        self.assertIn('公式结束', result['text'])
-        # 检查原始公式位置是否被清空
+        # Correction: allow multiple blank lines after extraction
+        self.assertIn('This is a block formula:', result['text'])
+        self.assertIn('End of formula', result['text'])
+        # Check if original formula position has been cleared
         # self.assertNotIn('$$', result['text'])
 
     def test_escaped_dollar_signs(self):
-        """测试转义美元符号不被识别为公式"""
+        """Test that escaped dollar signs are not recognized as formulas"""
         text = """
-        这是转义的美元符号: \\$100，不会被识别为公式。
-而这个是公式: $a + b = c$
+        This is an escaped dollar sign: \\$100, it will not be recognized as a formula.
+And this one is a formula: $a + b = c$
 """
 
         result = self.metric._extract_from_markdown(text)
-        # 验证转义的美元符号不被提取
+        # Verify escaped dollar sign is not extracted
         self.assertNotIn('100', result['formula'])
-        # 验证正常公式被提取
+        # Verify normal formula was extracted
         self.assertIn('a + b = c', result['formula'])
-        # 验证转义符号保留在文本中
+        # Verify escape character is retained in text
         self.assertIn('\\$100', result['text'])
 
     def test_multiple_formulas(self):
-        """测试多个公式提取"""
-        text = """公式1: $a = b + c$
-公式2: $$x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}$$
-公式3: $E_k = \\frac{1}{2}mv^2$"""
+        """Test multiple formula extraction"""
+        text = """Formula 1: $a = b + c$
+Formula 2: $$x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}$$
+Formula 3: $E_k = \\frac{1}{2}mv^2$"""
 
         result = self.metric._extract_from_markdown(text)
 
-        # 验证所有公式被提取
+        # Verify all formulas were extracted
         self.assertIn('a = b + c', result['formula'])
         self.assertIn('x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}', result['formula'])
         self.assertIn('E_k = \\frac{1}{2}mv^2', result['formula'])
 
-        # 验证公式间的分隔
+        # Verify separator between formulas
         self.assertIn('\n', result['formula'])
 
     def test_formula_with_special_characters(self):
-        """测试包含特殊字符的公式"""
-        text = """复杂公式: $\\sum_{i=1}^n i = \\frac{n(n+1)}{2}$
-带希腊字母: $$\\alpha + \\beta = \\gamma$$"""
+        """Test formulas containing special characters"""
+        text = """Complex formula: $\\sum_{i=1}^n i = \\frac{n(n+1)}{2}$
+With Greek letters: $$\\alpha + \\beta = \\gamma$$"""
 
         result = self.metric._extract_from_markdown(text)
 
-        # 验证特殊字符处理正确
+        # Verify special characters are handled correctly
         self.assertIn('\\sum_{i=1}^n i = \\frac{n(n+1)}{2}', result['formula'])
         self.assertIn('\\alpha + \\beta = \\gamma', result['formula'])
 
     def test_formula_within_text(self):
-        """测试文本中的公式提取"""
-        text = """根据相对论 $E = mc^2$，能量和质量可以互相转换。
-更复杂的情况如 $$\\nabla \\cdot \\mathbf{E} = \\frac{\\rho}{\\epsilon_0}$$ 所示。"""
+        """Test formula extraction within text"""
+        text = """According to relativity $E = mc^2$, energy and mass can be converted to each other.
+A more complex case such as $$\\nabla \\cdot \\mathbf{E} = \\frac{\\rho}{\\epsilon_0}$$ is shown."""
 
         result = self.metric._extract_from_markdown(text)
 
-        # 验证公式被提取
+        # Verify formulas were extracted
         self.assertIn('E = mc^2', result['formula'])
         self.assertIn('\\nabla \\cdot \\mathbf{E} = \\frac{\\rho}{\\epsilon_0}', result['formula'])
 
-        # 修正：允许提取后有多个空格
-        # self.assertIn('根据相对论 ，能量和质量可以互相转换。', result['text'])
-        # self.assertIn('更复杂的情况如  所示。', result['text'])
+        # Correction: allow multiple spaces after extraction
+        # self.assertIn('According to relativity , energy and mass can be converted to each other.', result['text'])
+        # self.assertIn('A more complex case such as  is shown.', result['text'])
         self.assertEqual(result['text'], text)
 
     def test_empty_formulas(self):
-        """测试空公式处理"""
-        text = """空行内公式: $   $
-空行间公式: $$   $$"""
+        """Test empty formula handling"""
+        text = """Empty inline formula: $   $
+Empty block formula: $$   $$"""
 
         result = self.metric._extract_from_markdown(text)
 
-        # 验证空公式被提取但内容为空
+        # Verify empty formula was extracted but content is empty
         self.assertTrue(result['formula'].strip() == '')
 
-        # 验证空公式标记从文本中移除
+        # Verify empty formula marker was removed from text
         # self.assertNotIn('$   $', result['text'])
         # self.assertNotIn('$$   $$', result['text'])
 
 #     def test_formula_at_document_edges(self):
-#         """测试文档开头和结尾的公式"""
-#         # 开头的公式
+#         """Test formulas at the beginning and end of the document"""
+#         # Formula at the start
 #         text1 = """$start = 0$
-# 后续文本"""
+# Subsequent text"""
 #         result1 = self.metric._extract_from_markdown(text1)
 #         self.assertIn('start = 0', result1['formula'])
 #
-#         # 结尾的公式
-#         text2 = """前置文本
+#         # Formula at the end
+#         text2 = """Preceding text
 # $$end = 1$$"""
 #         result2 = self.metric._extract_from_markdown(text2)
 #         self.assertIn('end = 1', result2['formula'])
 
     def test_formula_within_table(self):
-        """测试表格中的公式提取"""
-        text = """| 公式类型 | 示例 |
+        """Test formula extraction within a table"""
+        text = """| Formula Type | Example |
 |----------|------|
-| 行内公式 | $a + b = c$ |
-| 行间公式 | $$\\int_0^1 x dx = 0.5$$ |"""
+| Inline formula | $a + b = c$ |
+| Block formula | $$\\int_0^1 x dx = 0.5$$ |"""
 
         result = self.metric._extract_from_markdown(text)
 
-        # 验证表格中的公式被提取
+        # Verify formulas in table were extracted
         self.assertIn('a + b = c', result['formula'])
         self.assertIn('\\int_0^1 x dx = 0.5', result['formula'])
 
-        # 验证表格结构仍然被正确提取
-        self.assertIn('| 公式类型 | 示例 |', result['table'])
+        # Verify table structure is still correctly extracted
+        self.assertIn('| Formula Type | Example |', result['table'])
 
     def test_formula_within_code_block(self):
-        """测试代码块中的公式不会被提取"""
-        text = """以下是一个代码示例：
-            行间代码
+        """Test that formulas in code blocks are not extracted"""
+        text = """Here is a code example:
+            Inline code
 
             ```python
-            # 这里面的公式不应该被提取
+            # Formulas in here should not be extracted
             def calculate():
-                # 行内公式 $a + b = c$ 在代码中
+                # Inline formula $a + b = c$ in code
                 result = 0
                 return result
                 ```
-            行内代码:`$A+B=C$`  
-                
+            Inline code:`$A+B=C$`
+
             """
         result = self.metric._extract_from_markdown(text)
         self.assertNotIn('a + b = c', result['formula'])
diff --git a/tests/test_llm_webkit_extractor.py b/tests/test_llm_webkit_extractor.py
index fa28026..54dbd3b 100644
--- a/tests/test_llm_webkit_extractor.py
+++ b/tests/test_llm_webkit_extractor.py
@@ -5,7 +5,7 @@
 
 
 def _is_llm_webkit_available():
-    """检查LLM-WebKit是否可用（用于预处理HTML模式）."""
+    """Check if LLM-WebKit is available (for preprocessed HTML mode)."""
     try:
         from webmainbench.extractors.factory import ExtractorFactory
         config = {"use_preprocessed_html": True}
@@ -16,40 +16,40 @@ def _is_llm_webkit_available():
 
 
 class TestLLMWebKitExtractor(unittest.TestCase):
-    """LLM-WebKit extractor功能测试."""
+    """LLM-WebKit extractor functional tests."""
 
     def setUp(self):
-        """测试前准备."""
-        # 自动发现抽取器
+        """Test setup."""
+        # Auto-discover extractors
         ExtractorFactory.auto_discover()
-        
-        # 准备测试用的预处理HTML内容
+
+        # Prepare preprocessed HTML content for testing
         self.preprocessed_main_html = """
         <div _item_id="1">
-            <h1>人工智能的发展趋势</h1>
-            <p>人工智能（AI）技术正在快速发展，对各行各业产生深远影响。</p>
+            <h1>Trends in Artificial Intelligence Development</h1>
+            <p>Artificial Intelligence (AI) technology is rapidly advancing, with far-reaching impact across all industries.</p>
         </div>
         <div _item_id="2">
-            <h2>机器学习的进步</h2>
-            <p>深度学习和大语言模型的突破使得AI系统能够理解和生成更自然的语言。</p>
+            <h2>Advances in Machine Learning</h2>
+            <p>Breakthroughs in deep learning and large language models have enabled AI systems to understand and generate more natural language.</p>
         </div>
         <div _item_id="3">
-            <h2>自动化应用</h2>
-            <p>从制造业的机器人到软件开发的代码生成，AI正在各个领域实现流程自动化。</p>
+            <h2>Automation Applications</h2>
+            <p>From robots in manufacturing to code generation in software development, AI is automating processes across all domains.</p>
         </div>
         """
-        
-        # 模拟提取结果
-        self.mock_extracted_content = "人工智能的发展趋势\n\n人工智能（AI）技术正在快速发展，对各行各业产生深远影响。\n\n机器学习的进步\n\n深度学习和大语言模型的突破使得AI系统能够理解和生成更自然的语言。"
+
+        # Simulate extraction results
+        self.mock_extracted_content = "Trends in Artificial Intelligence Development\n\nArtificial Intelligence (AI) technology is rapidly advancing, with far-reaching impact across all industries.\n\nAdvances in Machine Learning\n\nBreakthroughs in deep learning and large language models have enabled AI systems to understand and generate more natural language."
         self.mock_extracted_content_list = [
-            {"type": "heading", "content": "人工智能的发展趋势"},
-            {"type": "paragraph", "content": "人工智能（AI）技术正在快速发展，对各行各业产生深远影响。"},
-            {"type": "heading", "content": "机器学习的进步"},
-            {"type": "paragraph", "content": "深度学习和大语言模型的突破使得AI系统能够理解和生成更自然的语言。"}
+            {"type": "heading", "content": "Trends in Artificial Intelligence Development"},
+            {"type": "paragraph", "content": "Artificial Intelligence (AI) technology is rapidly advancing, with far-reaching impact across all industries."},
+            {"type": "heading", "content": "Advances in Machine Learning"},
+            {"type": "paragraph", "content": "Breakthroughs in deep learning and large language models have enabled AI systems to understand and generate more natural language."}
         ]
 
     def test_preprocessed_html_config(self):
-        """测试预处理HTML配置参数."""
+        """Test preprocessed HTML configuration parameters."""
         config = {
             "use_preprocessed_html": True,
             "preprocessed_html_field": "custom_html_field"
@@ -58,39 +58,39 @@ def test_preprocessed_html_config(self):
         try:
             extractor = ExtractorFactory.create("llm-webkit", config=config)
             
-            # 验证配置是否正确设置
+            # Verify that the configuration is correctly set
             self.assertTrue(extractor.inference_config.use_preprocessed_html)
             self.assertEqual(extractor.inference_config.preprocessed_html_field, "custom_html_field")
-            
+
         except Exception as e:
-            # 如果依赖不可用，跳过测试
+            # If dependency is not available, skip the test
             self.skipTest(f"LLM-WebKit dependencies not available: {e}")
 
     @patch('webmainbench.extractors.llm_webkit_extractor.LlmWebkitExtractor._extract_content_from_main_html')
     def test_preprocessed_html_extract_content(self, mock_extract_from_main):
-        """测试使用预处理HTML的_extract_content方法."""
-        # 配置mock返回值
+        """Test the _extract_content method using preprocessed HTML."""
+        # Configure mock return value
         mock_extract_from_main.return_value = (self.mock_extracted_content, self.mock_extracted_content_list)
-        
+
         config = {
             "use_preprocessed_html": True,
-            "model_path": "/fake/model/path"  # 测试用的假路径
+            "model_path": "/fake/model/path"  # Fake path for testing
         }
         
         try:
             extractor = ExtractorFactory.create("llm-webkit", config=config)
             
-            # 调用_extract_content方法
+            # Call the _extract_content method
             result = extractor._extract_content(self.preprocessed_main_html, "https://example.com")
-            
-            # 验证mock被正确调用
+
+            # Verify mock was called correctly
             mock_extract_from_main.assert_called_once_with(self.preprocessed_main_html, "https://example.com")
-            
-            # 验证结果
+
+            # Verify results
             self.assertIsInstance(result, ExtractionResult)
             self.assertTrue(result.success)
             self.assertEqual(result.content, self.mock_extracted_content)
-            self.assertEqual(result.confidence_score, 0.9)  # 预处理HTML的固定置信度
+            self.assertEqual(result.confidence_score, 0.9)  # Fixed confidence score for preprocessed HTML
             self.assertIsNotNone(result.extraction_time)
             
         except Exception as e:
@@ -98,34 +98,35 @@ def test_preprocessed_html_extract_content(self, mock_extract_from_main):
 
     @patch('webmainbench.extractors.llm_webkit_extractor.LlmWebkitExtractor._extract_content_from_main_html')
     def test_standard_html_mode(self, mock_extract_from_main):
-        """测试标准HTML模式（非预处理）."""
-        # 不设置use_preprocessed_html，应该走标准流程
+        """Test standard HTML mode (non-preprocessed)."""
+        # Not setting use_preprocessed_html, should follow the standard process
         config = {
             "use_preprocessed_html": False,
             "model_path": "/fake/model/path"
         }
-        
+
         try:
             extractor = ExtractorFactory.create("llm-webkit", config=config)
-            
-            # 使用标准HTML
+
+            # Use standard HTML
             standard_html = "<html><head><title>Test</title></head><body><p>Test content</p></body></html>"
-            
-            # 由于标准模式需要HTML简化等步骤，我们只测试配置
+
+            # Since standard mode requires HTML simplification and other steps, we only test the configuration
             self.assertFalse(extractor.inference_config.use_preprocessed_html)
-            
-            # 确保_extract_content_from_main_html没有被直接调用（因为要先经过HTML简化）
-            # 这里我们不实际调用_extract_content，因为它需要完整的依赖
+
+            # Ensure _extract_content_from_main_html was not called directly
+            # (because HTML simplification needs to happen first)
+            # We do not actually call _extract_content here because it requires full dependencies
             
         except Exception as e:
             self.skipTest(f"LLM-WebKit dependencies not available: {e}")
 
     def test_config_defaults(self):
-        """测试配置默认值."""
+        """Test configuration default values."""
         try:
             extractor = ExtractorFactory.create("llm-webkit")
-            
-            # 验证默认配置
+
+            # Verify default configuration
             self.assertFalse(extractor.inference_config.use_preprocessed_html)
             self.assertEqual(extractor.inference_config.preprocessed_html_field, "llm_webkit_html")
             
@@ -134,8 +135,8 @@ def test_config_defaults(self):
 
     @patch('webmainbench.extractors.llm_webkit_extractor.LlmWebkitExtractor._extract_content_from_main_html')
     def test_error_handling_in_preprocessed_mode(self, mock_extract_from_main):
-        """测试预处理模式下的错误处理."""
-        # 配置mock抛出异常
+        """Test error handling in preprocessed mode."""
+        # Configure mock to raise an exception
         mock_extract_from_main.side_effect = Exception("Mock extraction error")
         
         config = {
@@ -146,10 +147,10 @@ def test_error_handling_in_preprocessed_mode(self, mock_extract_from_main):
         try:
             extractor = ExtractorFactory.create("llm-webkit", config=config)
             
-            # 调用应该捕获异常并返回错误结果
+            # The call should catch the exception and return an error result
             result = extractor._extract_content(self.preprocessed_main_html)
-            
-            # 验证错误处理
+
+            # Verify error handling
             self.assertIsInstance(result, ExtractionResult)
             self.assertFalse(result.success)
             self.assertIn("LLM-WebKit extraction failed", result.error_message)
@@ -160,75 +161,75 @@ def test_error_handling_in_preprocessed_mode(self, mock_extract_from_main):
 
     @unittest.skipUnless(
         _is_llm_webkit_available(),
-        "跳过：需要LLM-WebKit依赖"
+        "Skip: LLM-WebKit dependencies required"
     )
     def test_preprocessed_html_integration(self):
-        """集成测试：演示预处理HTML功能的实际使用."""
+        """Integration test: demonstrates actual usage of the preprocessed HTML feature."""
         print("\n" + "="*50)
-        print("🚀 预处理HTML功能集成测试")
+        print("Preprocessed HTML Feature Integration Test")
         print("="*50)
-        
-        # 准备预处理HTML内容（模拟llm-webkit第一阶段的输出）
+
+        # Prepare preprocessed HTML content (simulate output from first stage of llm-webkit)
         preprocessed_main_html = """
         <div _item_id="1">
-            <h1>人工智能的发展趋势</h1>
-            <p>人工智能（AI）技术正在快速发展，对各行各业产生深远影响。本文将探讨AI的主要发展趋势和未来展望。</p>
+            <h1>Introduction to Deep Learning</h1>
+            <p>Artificial Intelligence (AI) technology is rapidly advancing, with far-reaching impact across all industries. This article explores the major trends and future prospects of AI.</p>
         </div>
         <div _item_id="2">
-            <h2>机器学习的进步</h2>
-            <p>深度学习和大语言模型的突破使得AI系统能够理解和生成更自然的语言，在对话、翻译、创作等领域表现出色。</p>
+            <h2>Advances in Machine Learning</h2>
+            <p>Breakthroughs in deep learning and large language models have enabled AI systems to understand and generate more natural language, excelling in dialogue, translation, and creative tasks.</p>
         </div>
         <div _item_id="3">
-            <h2>自动化应用</h2>
-            <p>从制造业的机器人到软件开发的代码生成，AI正在各个领域实现流程自动化，提高效率并降低成本。</p>
+            <h2>Automation Applications</h2>
+            <p>From robots in manufacturing to code generation in software development, AI is automating processes across all domains, improving efficiency and reducing costs.</p>
         </div>
         """
-        
+
         try:
-            # 测试1: 标准模式 vs 预处理模式的配置对比
-            print("\n📋 测试1: 配置对比")
-            
-            # 标准模式配置
+            # Test 1: Configuration comparison between standard mode and preprocessed mode
+            print("\nTest 1: Configuration comparison")
+
+            # Standard mode configuration
             standard_config = {
                 "use_preprocessed_html": False,
                 "model_path": "/fake/model/path"
             }
             standard_extractor = ExtractorFactory.create("llm-webkit", config=standard_config)
-            print(f"标准模式 - use_preprocessed_html: {standard_extractor.inference_config.use_preprocessed_html}")
-            
-            # 预处理模式配置
+            print(f"Standard mode - use_preprocessed_html: {standard_extractor.inference_config.use_preprocessed_html}")
+
+            # Preprocessed mode configuration
             preprocessed_config = {
                 "use_preprocessed_html": True,
                 "preprocessed_html_field": "llm_webkit_html",
                 "model_path": "/fake/model/path"
             }
             preprocessed_extractor = ExtractorFactory.create("llm-webkit", config=preprocessed_config)
-            print(f"预处理模式 - use_preprocessed_html: {preprocessed_extractor.inference_config.use_preprocessed_html}")
-            print(f"预处理字段: {preprocessed_extractor.inference_config.preprocessed_html_field}")
-            
-            # 测试2: 验证配置正确性
-            print("\n✅ 测试2: 配置验证")
+            print(f"Preprocessed mode - use_preprocessed_html: {preprocessed_extractor.inference_config.use_preprocessed_html}")
+            print(f"Preprocessed field: {preprocessed_extractor.inference_config.preprocessed_html_field}")
+
+            # Test 2: Verify configuration correctness
+            print("\nTest 2: Configuration validation")
             self.assertFalse(standard_extractor.inference_config.use_preprocessed_html)
             self.assertTrue(preprocessed_extractor.inference_config.use_preprocessed_html)
             self.assertEqual(preprocessed_extractor.inference_config.preprocessed_html_field, "llm_webkit_html")
-            print("配置验证通过！")
-            
-            # 测试3: 标题提取功能
-            print("\n🏷️ 测试3: 标题提取功能")
-            html_with_title = "<html><head><title>AI发展趋势报告</title></head><body>" + preprocessed_main_html + "</body></html>"
+            print("Configuration validation passed!")
+
+            # Test 3: Title extraction feature
+            print("\nTest 3: Title extraction feature")
+            html_with_title = "<html><head><title>AI Development Trends Report</title></head><body>" + preprocessed_main_html + "</body></html>"
             title = preprocessed_extractor._extract_title(html_with_title)
-            print(f"提取的标题: {title}")
-            self.assertEqual(title, "AI发展趋势报告")
-            
-            # 测试4: 语言检测功能
-            print("\n🌐 测试4: 语言检测功能")
-            test_content = "人工智能技术正在快速发展，对各行各业产生深远影响。"
+            print(f"Extracted title: {title}")
+            self.assertEqual(title, "AI Development Trends Report")
+
+            # Test 4: Language detection feature
+            print("\nTest 4: Language detection feature")
+            test_content = "Artificial intelligence technology is rapidly advancing, with far-reaching impact across all industries."
             language = preprocessed_extractor._detect_language(test_content)
-            print(f"检测到的语言: {language}")
-            self.assertEqual(language, "zh")
-            
-            print("\n✅ 预处理HTML功能集成测试完成！")
-            
+            print(f"Detected language: {language}")
+            self.assertEqual(language, "en")
+
+            print("\nPreprocessed HTML feature integration test complete!")
+
         except Exception as e:
             self.skipTest(f"LLM-WebKit dependencies not available: {e}")
 
@@ -237,74 +238,74 @@ def test_preprocessed_html_integration(self):
         "LLM-WebKit dependencies not available"
     )
     def test_preprocessed_html_e2e(self):
-        """预处理HTML功能的端到端测试."""
+        """End-to-end test for the preprocessed HTML feature."""
         try:
-            # 场景：已有一批通过llm-webkit第一阶段处理的数据
+            # Scenario: a batch of data already processed through the first stage of llm-webkit
             dataset_samples = [
                 {
                     "id": "sample_1",
                     "url": "https://example.com/article1",
                     "llm_webkit_html": """
                     <div _item_id="1">
-                        <h1>深度学习入门指南</h1>
-                        <p>深度学习是机器学习的一个重要分支。</p>
+                        <h1>Introduction to Deep Learning</h1>
+                        <p>Deep learning is an important branch of machine learning.</p>
                     </div>
                     """,
                 },
                 {
-                    "id": "sample_2", 
+                    "id": "sample_2",
                     "url": "https://example.com/article2",
                     "llm_webkit_html": """
                     <div _item_id="1">
-                        <h1>自然语言处理应用</h1>
-                        <p>NLP技术在各个领域都有广泛应用。</p>
+                        <h1>Natural Language Processing Applications</h1>
+                        <p>NLP technology has wide applications across many domains.</p>
                     </div>
                     """,
                 }
             ]
-            
-            # 创建预处理HTML模式的extractor
+
+            # Create extractor in preprocessed HTML mode
             config = {
                 "use_preprocessed_html": True,
                 "preprocessed_html_field": "llm_webkit_html"
             }
             extractor = ExtractorFactory.create("llm-webkit", config=config)
-            
-            # 验证配置
+
+            # Verify configuration
             self.assertTrue(extractor.inference_config.use_preprocessed_html)
             self.assertEqual(extractor.inference_config.preprocessed_html_field, "llm_webkit_html")
-            
-            # 批量处理测试
+
+            # Batch processing test
             results = []
             for sample in dataset_samples:
                 result = extractor._extract_content(sample['llm_webkit_html'], sample['url'])
                 results.append(result)
-            
-            # 核心断言验证
+
+            # Core assertion validation
             successful_results = [r for r in results if r.success]
-            
-            # 1. 所有样本都应该成功处理
-            self.assertEqual(len(successful_results), len(dataset_samples), 
-                           "所有样本都应该处理成功")
-            
-            # 2. 验证每个结果的基本属性
+
+            # 1. All samples should be processed successfully
+            self.assertEqual(len(successful_results), len(dataset_samples),
+                           "All samples should be processed successfully")
+
+            # 2. Verify basic properties of each result
             for i, result in enumerate(successful_results):
                 with self.subTest(sample_id=dataset_samples[i]['id']):
-                    # 内容不应为空
-                    self.assertGreater(len(result.content), 0, "提取的内容不应为空")
-                    
-                    # 预处理HTML的固定置信度
-                    self.assertEqual(result.confidence_score, 0.9, "预处理HTML的置信度应为0.9")
-                    
-                    # 应该包含相关关键词
-                    if "深度学习" in dataset_samples[i]['llm_webkit_html']:
-                        self.assertIn("深度学习", result.content, "应该包含深度学习相关内容")
-                    elif "自然语言处理" in dataset_samples[i]['llm_webkit_html']:
-                        self.assertIn("自然语言处理", result.content, "应该包含NLP相关内容")
-                    
-                    # 提取时间应该合理
-                    self.assertGreater(result.extraction_time, 0, "提取时间应该大于0")
-                    self.assertLess(result.extraction_time, 10, "提取时间应该在合理范围内")
+                    # Content should not be empty
+                    self.assertGreater(len(result.content), 0, "Extracted content should not be empty")
+
+                    # Fixed confidence score for preprocessed HTML
+                    self.assertEqual(result.confidence_score, 0.9, "Preprocessed HTML confidence score should be 0.9")
+
+                    # Should contain relevant keywords
+                    if "Deep learning" in dataset_samples[i]['llm_webkit_html']:
+                        self.assertIn("Deep learning", result.content, "Should contain deep learning related content")
+                    elif "Natural Language Processing" in dataset_samples[i]['llm_webkit_html']:
+                        self.assertIn("Natural Language Processing", result.content, "Should contain NLP related content")
+
+                    # Extraction time should be reasonable
+                    self.assertGreater(result.extraction_time, 0, "Extraction time should be greater than 0")
+                    self.assertLess(result.extraction_time, 10, "Extraction time should be within a reasonable range")
             
         except Exception as e:
             self.skipTest(f"LLM-WebKit dependencies not available: {e}")
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index cd9736a..d6c652f 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,18 +1,18 @@
 #!/usr/bin/env python
-"""测试新的内容类型指标"""
+"""Test new content type metrics"""
 
 import unittest
 from webmainbench.metrics import MetricCalculator
 
 
 class TestContentMetrics(unittest.TestCase):
-    """测试内容类型指标"""
+    """Test content type metrics"""
 
     def setUp(self):
-        """测试前准备"""
+        """Test setup"""
         self.calculator = MetricCalculator()
 
-        # 测试数据
+        # Test data
         self.predicted_content = """# 标题
 
 这是一段文字内容。
@@ -58,22 +58,22 @@ def hello():
 """
 
     def test_available_metrics(self):
-        """测试可用指标列表"""
+        """Test the list of available metrics"""
         metrics = self.calculator.list_available_metrics()
 
-        # 验证必要的指标都存在
+        # Verify all required metrics exist
         expected_metrics = ['code_edit', 'formula_edit', 'table_edit', 'table_TEDS', 'text_edit']
         for metric in expected_metrics:
             self.assertIn(metric, metrics, f"缺少指标: {metric}")
 
     def test_metric_calculation_success(self):
-        """测试指标计算成功"""
+        """Test successful metric calculation"""
         results = self.calculator.calculate_all(
             predicted_content=self.predicted_content,
             groundtruth_content=self.groundtruth_content
         )
 
-        # 验证所有指标都计算成功
+        # Verify all metrics were calculated successfully
         expected_metrics = ['code_edit', 'formula_edit', 'table_edit', 'table_TEDS', 'text_edit', 'overall']
         for metric_name in expected_metrics:
             self.assertIn(metric_name, results, f"缺少指标结果: {metric_name}")
@@ -81,7 +81,7 @@ def test_metric_calculation_success(self):
                             f"指标 {metric_name} 计算失败: {results[metric_name].error_message}")
 
     def test_code_edit_metric(self):
-        """测试代码编辑距离指标"""
+        """Test code edit distance metric"""
         results = self.calculator.calculate_all(
             predicted_content=self.predicted_content,
             groundtruth_content=self.groundtruth_content
@@ -90,18 +90,18 @@ def test_code_edit_metric(self):
         code_result = results['code_edit']
         self.assertTrue(code_result.success)
         self.assertIsInstance(code_result.score, float)
-        # 验证固定内容的确定分数
+        # Verify deterministic score for fixed content
         self.assertAlmostEqual(code_result.score, 0.9487179487179487, places=5,
                                msg=f"code_edit分数应该是0.9487179487179487，实际: {code_result.score}")
 
-        # 验证详细信息
+        # Verify details
         self.assertEqual(code_result.details['content_type'], 'code')
         self.assertIn('distance', code_result.details)
         self.assertIn('predicted_code_length', code_result.details)
         self.assertIn('groundtruth_code_length', code_result.details)
 
     def test_formula_edit_metric(self):
-        """测试公式编辑距离指标"""
+        """Test formula edit distance metric"""
         results = self.calculator.calculate_all(
             predicted_content=self.predicted_content,
             groundtruth_content=self.groundtruth_content
@@ -110,16 +110,16 @@ def test_formula_edit_metric(self):
         formula_result = results['formula_edit']
         self.assertTrue(formula_result.success)
         self.assertIsInstance(formula_result.score, float)
-        # 验证固定内容的确定分数
+        # Verify deterministic score for fixed content
         self.assertAlmostEqual(formula_result.score, 1.000000, places=5,
                                msg=f"formula_edit分数应该是1.000000，实际: {formula_result.score}")
 
-        # 验证详细信息
+        # Verify details
         self.assertEqual(formula_result.details['content_type'], 'formula')
         self.assertIn('distance', formula_result.details)
 
     def test_table_edit_metric(self):
-        """测试表格编辑距离指标"""
+        """Test table edit distance metric"""
         results = self.calculator.calculate_all(
             predicted_content=self.predicted_content,
             groundtruth_content=self.groundtruth_content
@@ -128,16 +128,16 @@ def test_table_edit_metric(self):
         table_result = results['table_edit']
         self.assertTrue(table_result.success)
         self.assertIsInstance(table_result.score, float)
-        # 验证固定内容的确定分数
+        # Verify deterministic score for fixed content
         self.assertAlmostEqual(table_result.score, 0.9333333333333333, places=5,
                                msg=f"table_edit分数应该是0.9333333333333333，实际: {table_result.score}")
 
-        # 验证详细信息
+        # Verify details
         self.assertEqual(table_result.details['content_type'], 'table')
         self.assertIn('distance', table_result.details)
 
     def test_table_teds_metric(self):
-        """测试表格TEDS指标"""
+        """Test table TEDS metric"""
         results = self.calculator.calculate_all(
             predicted_content=self.predicted_content,
             groundtruth_content=self.groundtruth_content
@@ -146,15 +146,15 @@ def test_table_teds_metric(self):
         teds_result = results['table_TEDS']
         self.assertTrue(teds_result.success)
         self.assertIsInstance(teds_result.score, float)
-        # 验证固定内容的确定分数
+        # Verify deterministic score for fixed content
         self.assertAlmostEqual(teds_result.score, 0.97857, places=5,
                                msg=f"table_TEDS分数应该是0.97857，实际: {teds_result.score}")
 
-        # 验证详细信息
+        # Verify details
         self.assertEqual(teds_result.details['content_type'], 'table')
 
     def test_text_edit_metric(self):
-        """测试纯文本编辑距离指标"""
+        """Test plain text edit distance metric"""
         results = self.calculator.calculate_all(
             predicted_content=self.predicted_content,
             groundtruth_content=self.groundtruth_content
@@ -163,22 +163,22 @@ def test_text_edit_metric(self):
         text_result = results['text_edit']
         self.assertTrue(text_result.success)
         self.assertIsInstance(text_result.score, float)
-        # 验证固定内容的确定分数
+        # Verify deterministic score for fixed content
         self.assertAlmostEqual(text_result.score, 0.8904109589041096, places=5,
                                msg=f"text_edit分数应该是0.8904109589041096，实际: {text_result.score}")
 
-        # 验证详细信息
+        # Verify details
         self.assertEqual(text_result.details['content_type'], 'text')
         self.assertIn('distance', text_result.details)
 
     def test_overall_metric_calculation(self):
-        """测试overall指标是其他指标的平均值"""
+        """Test that overall metric is the average of other metrics"""
         results = self.calculator.calculate_all(
             predicted_content=self.predicted_content,
             groundtruth_content=self.groundtruth_content
         )
 
-        # 获取individual指标分数
+        # Get individual metric scores
         individual_metrics = ['code_edit', 'formula_edit', 'table_edit', 'table_TEDS', 'text_edit']
         individual_scores = []
 
@@ -187,85 +187,85 @@ def test_overall_metric_calculation(self):
             self.assertTrue(results[metric_name].success)
             individual_scores.append(results[metric_name].score)
 
-        # 计算期望的overall分数
+        # Calculate expected overall score
         expected_overall = sum(individual_scores) / len(individual_scores)
 
-        # 验证overall分数
+        # Verify overall score
         overall_result = results['overall']
         self.assertTrue(overall_result.success)
         self.assertAlmostEqual(overall_result.score, expected_overall, places=5,
                                msg="overall分数应该是其他指标的平均值")
 
-        # 验证overall详细信息
+        # Verify overall details
         self.assertEqual(overall_result.details['source'], 'average_of_all_metrics')
         self.assertEqual(overall_result.details['successful_metrics'], len(individual_metrics))
 
     def test_identical_content(self):
-        """测试相同内容的情况"""
-        # 使用相同的内容
+        """Test the case with identical content"""
+        # Use identical content
         results = self.calculator.calculate_all(
             predicted_content=self.groundtruth_content,
             groundtruth_content=self.groundtruth_content
         )
 
-        # 完全相同的内容应该得到满分
+        # Identical content should get a perfect score
         for metric_name in ['code_edit', 'formula_edit', 'table_edit', 'text_edit']:
             if metric_name in results and results[metric_name].success:
                 self.assertAlmostEqual(results[metric_name].score, 1.0, places=5,
                                        msg=f"相同内容的{metric_name}应该得到满分，实际: {results[metric_name].score}")
 
     def test_empty_content(self):
-        """测试空内容的情况"""
+        """Test the case with empty content"""
         results = self.calculator.calculate_all(
             predicted_content="",
             groundtruth_content=""
         )
 
-        # 空内容应该能正确处理，不应该出错
+        # Empty content should be handled correctly without errors
         for metric_name, result in results.items():
-            if metric_name != 'overall':  # overall可能会有特殊处理
+            if metric_name != 'overall':  # overall may have special handling
                 self.assertTrue(result.success or result.score == 0.0,
                                 f"空内容的{metric_name}应该正确处理")
 
 
 class TestErrorHandling(unittest.TestCase):
-    """测试错误处理"""
+    """Test error handling"""
 
     def setUp(self):
         self.calculator = MetricCalculator()
 
     def test_malformed_content(self):
-        """测试格式错误的输入"""
-        # 应该能处理各种错误输入而不崩溃
+        """Test malformed input"""
+        # Should be able to handle various invalid inputs without crashing
         results = self.calculator.calculate_all(
             predicted_content="test",
             groundtruth_content="test"
         )
 
-        # 不应该有未捕获的异常
+        # Should not have uncaught exceptions
         self.assertIsInstance(results, dict)
 
     def test_none_inputs(self):
-        """测试None输入"""
+        """Test None inputs"""
         results = self.calculator.calculate_all(
             predicted_content=None,
             groundtruth_content=None
         )
 
-        # 应该能处理None输入
+        # Should be able to handle None inputs
         self.assertIsInstance(results, dict)
 
 
 class TestRealSampleMetrics(unittest.TestCase):
-    """测试基于LLM-WebKit实际提取结果的指标计算"""
+    """Test metric calculation based on actual LLM-WebKit extraction results"""
 
     def setUp(self):
-        """测试前准备"""
+        """Test setup"""
         self.calculator = MetricCalculator()
 
     def test_text_code_sample_edit_distance(self):
-        """测试文本+代码样本的编辑距离"""
-        # 基于实际调试结果的数据
+        """Test edit distance of text+code samples"""
+        # Data based on actual debug results
         groundtruth = """# Python编程示例
 
 这是一段关于Python编程的介绍文本。
@@ -290,26 +290,26 @@ def hello_world():
 
 以上代码展示了一个简单的Python函数。"""
 
-        # 计算编辑距离（基于实际调试结果）
+        # Calculate edit distance (based on actual debug results)
         results = self.calculator.calculate_all(
             predicted_content=predicted,
             groundtruth_content=groundtruth
         )
 
-        # 验证文本编辑距离（固定内容应该有确定分数）
+        # Verify text edit distance (fixed content should have a deterministic score)
         self.assertIn("text_edit", results)
         self.assertTrue(results["text_edit"].success)
         self.assertAlmostEqual(results["text_edit"].score, 0.9552238805970149, places=5,
                                msg=f"text_edit分数应该是0.9552238805970149，实际: {results['text_edit'].score}")
 
-        # 验证代码编辑距离（缺少python标识符导致轻微差异）
+        # Verify code edit distance (slight difference due to missing python identifier)
         self.assertIn("code_edit", results)
         self.assertTrue(results["code_edit"].success)
         self.assertAlmostEqual(results["code_edit"].score, 1.0, places=5,
                                msg=f"code_edit分数应该是1.0，实际: {results['code_edit'].score}")
 
     def test_html_table_edit_distance(self):
-        """测试表格样本的编辑距离"""
+        """Test edit distance for table samples"""
         groundtruth = """
         
         <table>
@@ -871,20 +871,20 @@ def test_html_table_edit_distance(self):
             groundtruth_content=groundtruth
         )
 
-        # 验证表格编辑距离（分隔符长度差异导致的固定分数）
+        # Verify table edit distance (fixed score due to separator length difference)
         self.assertIn("table_edit", results)
         self.assertTrue(results["table_edit"].success)
         self.assertAlmostEqual(results["table_edit"].score, 0.5935733724094621, places=5,
-                               msg=f"table_edit分数应该是0.5935733724094621，实际: {results['table_edit'].score}")
+                               msg=f"table_edit score should be 0.5935733724094621, actual: {results['table_edit'].score}")
 
-        # 验证TEDS指标（表格结构完全相同，满分）
+        # Verify TEDS metric (identical table structure, perfect score)
         self.assertIn("table_TEDS", results)
         self.assertTrue(results["table_TEDS"].success)
         self.assertAlmostEqual(results["table_TEDS"].score, 0.9984520490180891, places=5,
                                msg=f"table_TEDS分数应该是0.0.9984520490180891，实际: {results['table_TEDS'].score}")
 
     def test_table_sample_edit_distance(self):
-        """测试渲染一致,表格样式不一致的编辑距离"""
+        """Test edit distance for tables with consistent rendering but inconsistent style"""
         groundtruth = """
 | 产品 | 销量 | 收入 |
 |------|------|------|
@@ -935,21 +935,21 @@ def test_formula_sample_edit_distance(self):
             groundtruth_content=groundtruth
         )
 
-        # 验证公式编辑距离（符号转义导致的固定低分）
+        # Verify formula edit distance (fixed low score due to symbol escaping)
         self.assertIn("formula_edit", results)
         self.assertTrue(results["formula_edit"].success)
         self.assertAlmostEqual(results["formula_edit"].score, 0.0, places=5,
-                               msg=f"formula_edit分数应该是0.0，实际: {results['formula_edit'].score}")
+                               msg=f"formula_edit score should be 0.0, actual: {results['formula_edit'].score}")
 
-        # 验证文本编辑距离（去除公式后的纯文本，也受符号转义影响）
+        # Verify text edit distance (plain text after removing formulas, also affected by symbol escaping)
         self.assertIn("text_edit", results)
         self.assertTrue(results["text_edit"].success)
         self.assertAlmostEqual(results["text_edit"].score, 0.95, places=5,
-                               msg=f"text_edit分数应该是0.95，实际: {results['text_edit'].score}")
+                               msg=f"text_edit score should be 0.95, actual: {results['text_edit'].score}")
 
     def test_overall_score_calculation(self):
-        """测试综合分数计算"""
-        # 使用第一个样本测试综合分数
+        """Test overall score calculation"""
+        # Use the first sample to test overall score
         groundtruth = """# Python编程示例
 
 这是一段关于Python编程的介绍文本。
@@ -979,11 +979,11 @@ def hello_world():
             groundtruth_content=groundtruth
         )
 
-        # 验证overall分数存在且合理
+        # Verify overall score exists and is reasonable
         self.assertIn("overall", results)
         self.assertTrue(results["overall"].success)
 
-        # overall应该是所有成功指标的平均值
+        # overall should be the average of all successful metrics
         successful_scores = []
         for metric_name, result in results.items():
             if metric_name != "overall" and result.success:
@@ -993,11 +993,11 @@ def hello_world():
             expected_overall = sum(successful_scores) / len(successful_scores)
             actual_overall = results["overall"].score
 
-            # 允许小幅计算误差
+            # Allow small calculation errors
             self.assertAlmostEqual(actual_overall, expected_overall, places=3)
 
     def test_all_metrics_coverage(self):
-        """测试所有6项指标都被计算"""
+        """Test that all 6 metrics are calculated"""
         groundtruth = """# 综合示例
 
 这是文本内容。
@@ -1015,41 +1015,41 @@ def test():
 
 更多文本。"""
 
-        predicted = groundtruth  # 使用相同内容测试
+        predicted = groundtruth  # Use identical content for testing
 
         results = self.calculator.calculate_all(
             predicted_content=predicted,
             groundtruth_content=groundtruth
         )
 
-        # 验证所有6项指标都存在
+        # Verify all 6 metrics exist
         expected_metrics = ["overall", "text_edit", "code_edit", "table_edit", "table_TEDS", "formula_edit"]
 
-        print(f"\n=== 完全相同内容的指标测试结果 ===")
+        print(f"\n=== Metric test results for identical content ===")
 
         for metric in expected_metrics:
-            self.assertIn(metric, results, f"指标 {metric} 缺失")
-            self.assertTrue(results[metric].success, f"指标 {metric} 计算失败")
+            self.assertIn(metric, results, f"Metric {metric} is missing")
+            self.assertTrue(results[metric].success, f"Metric {metric} calculation failed")
 
             score = results[metric].score
             print(f"{metric}: {score:.6f}")
 
-            # 完全相同的内容应该得到满分 1.0
+            # Completely identical content should get a perfect score of 1.0
             self.assertAlmostEqual(score, 1.0,
                                    places=4,
-                                   msg=f"完全相同内容的 {metric} 应该得到满分，实际得分: {score}")
+                                   msg=f"Identical content {metric} should get a perfect score, actual score: {score}")
 
-        print("✅ 所有指标都正确得到满分!")
+        print("All metrics correctly received a perfect score!")
 
 
 def run_visual_test():
-    """运行可视化测试（保留原有的打印功能）"""
-    print("=== 新指标功能测试 ===\n")
+    """Run visual test (preserves original print functionality)"""
+    print("=== New Metric Feature Test ===\n")
 
     calculator = MetricCalculator()
 
-    # 显示可用指标
-    print("可用的指标:")
+    # Display available metrics
+    print("Available metrics:")
     metrics = calculator.list_available_metrics()
     for metric in metrics:
         print(f"  - {metric}")
@@ -1100,15 +1100,15 @@ def hello():
 最后是正确的文字内容。
 """
 
-    # 计算所有指标
-    print("正在计算指标...")
+    # Calculate all metrics
+    print("Calculating metrics...")
     results = calculator.calculate_all(
         predicted_content=predicted_content,
         groundtruth_content=groundtruth_content
     )
 
-    # 显示结果
-    print("\n=== 评测结果 ===")
+    # Display results
+    print("\n=== Evaluation Results ===")
     print("-" * 60)
 
     for metric_name, result in results.items():
@@ -1116,36 +1116,36 @@ def hello():
             print(f"{metric_name:15}: {result.score:.4f}")
             if "content_type" in result.details:
                 content_type = result.details["content_type"]
-                print(f"{'':15}  类型: {content_type}")
+                print(f"{'':15}  Type: {content_type}")
             print()
         else:
             print(f"{metric_name:15}: ERROR - {result.error_message}")
             print()
 
-    # 显示详细信息
-    print("\n=== 详细信息 ===")
+    # Display details
+    print("\n=== Details ===")
     for metric_name in ["code_edit", "formula_edit", "table_edit", "text_edit"]:
         if metric_name in results and results[metric_name].success:
             details = results[metric_name].details
             print(f"\n{metric_name}:")
-            print(f"  预测长度: {details.get('predicted_' + details.get('content_type', '') + '_length', 'N/A')}")
-            print(f"  真实长度: {details.get('groundtruth_' + details.get('content_type', '') + '_length', 'N/A')}")
-            print(f"  编辑距离: {details.get('distance', 'N/A')}")
+            print(f"  Predicted length: {details.get('predicted_' + details.get('content_type', '') + '_length', 'N/A')}")
+            print(f"  Groundtruth length: {details.get('groundtruth_' + details.get('content_type', '') + '_length', 'N/A')}")
+            print(f"  Edit distance: {details.get('distance', 'N/A')}")
 
 
 if __name__ == "__main__":
     import sys
 
     if len(sys.argv) > 1 and sys.argv[1] == "--visual":
-        # 运行可视化测试
+        # Run visual test
         try:
             run_visual_test()
-            print("\n✅ 新指标测试完成！")
+            print("\nNew metric test complete!")
         except Exception as e:
-            print(f"\n❌ 测试失败: {e}")
+            print(f"\nTest failed: {e}")
             import traceback
 
             traceback.print_exc()
     else:
-        # 运行单元测试
+        # Run unit tests
         unittest.main(verbosity=2)
\ No newline at end of file
diff --git a/tests/test_table_extraction.py b/tests/test_table_extraction.py
index 68f7266..6a5309c 100644
--- a/tests/test_table_extraction.py
+++ b/tests/test_table_extraction.py
@@ -1,18 +1,18 @@
 #!/usr/bin/env python
-"""测试Markdown表格提取功能"""
+"""Test Markdown table extraction functionality"""
 
 import unittest
 import sys
 import os
 
-# 添加项目根目录到Python路径
+# Add project root directory to Python path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 
 from webmainbench.metrics.base import BaseMetric, MetricResult
 
 
 class TestTableExtractionMetric(BaseMetric):
-    """测试用的具体实现类"""
+    """Concrete implementation class for testing"""
     
     def _setup(self) -> None:
         pass
@@ -26,33 +26,33 @@ def _calculate_score(self, predicted: str, groundtruth: str, **kwargs) -> Metric
 
 
 class TestTableExtraction(unittest.TestCase):
-    """测试Markdown表格提取功能"""
+    """Test Markdown table extraction functionality"""
 
     def setUp(self):
         self.metric = TestTableExtractionMetric("test_metric")
 
     def test_basic_table_extraction(self):
-        """测试基本表格提取"""
-        text = """文字内容
+        """Test basic table extraction"""
+        text = """Text content
 
-| 列1 | 列2 |
+| Col1 | Col2 |
 |-----|-----|
-| 数据1 | 数据2 |
+| Data1 | Data2 |
 
-更多文字"""
+More text"""
 
         result = self.metric._extract_from_markdown(text)
-        
-        # 验证表格被提取
-        self.assertIn('| 列1 | 列2 |', result['table'])
+
+        # Verify table was extracted
+        self.assertIn('| Col1 | Col2 |', result['table'])
         self.assertIn('|-----|-----|', result['table'])
-        self.assertIn('| 数据1 | 数据2 |', result['table'])
-        
-        # 验证文本中表格被移除
-        # self.assertNotIn('| 列1 | 列2 |', result['text'])
+        self.assertIn('| Data1 | Data2 |', result['table'])
+
+        # Verify table was removed from text
+        # self.assertNotIn('| Col1 | Col2 |', result['text'])
 
     def test_no_name_error(self):
-        """测试修复后的代码不会出现 'name table_lines is not defined' 错误"""
+        """Test that the fixed code does not raise 'name table_lines is not defined' error"""
         text = """| A | B |
 |-----|-----|
 | 1 | 2 |"""
@@ -61,191 +61,191 @@ def test_no_name_error(self):
             result = self.metric._extract_from_markdown(text)
             self.assertIsInstance(result, dict)
             self.assertIn('table', result)
-            print(f"✅ 表格提取成功: {repr(result['table'])}")
+            print(f"✅ Table extracted successfully: {repr(result['table'])}")
         except NameError as e:
             if 'table_lines' in str(e):
-                self.fail(f"仍然存在table_lines未定义的错误: {e}")
+                self.fail(f"NameError for table_lines still exists: {e}")
             else:
                 raise
 
     def test_html_table_extraction(self):
-        """测试HTML表格提取"""
-        text = """这是HTML表格：
+        """Test HTML table extraction"""
+        text = """This is an HTML table:
 <table>
-<tr><th>标题1</th><th>标题2</th></tr>
-<tr><td>数据1</td><td>数据2</td></tr>
+<tr><th>Header1</th><th>Header2</th></tr>
+<tr><td>Data1</td><td>Data2</td></tr>
 </table>
-这是普通文本。"""
+This is plain text."""
 
         result = self.metric._extract_from_markdown(text)
         
-        # 验证HTML表格被提取
+        # Verify HTML table is extracted
         expected_table = """<table>
-<tr><th>标题1</th><th>标题2</th></tr>
-<tr><td>数据1</td><td>数据2</td></tr>
+<tr><th>Header1</th><th>Header2</th></tr>
+<tr><td>Data1</td><td>Data2</td></tr>
 </table>"""
         self.assertIn(expected_table, result['table'])
         
-        # 验证文本中HTML表格被移除
+        # Verify HTML table was removed from text
         # self.assertNotIn('<table>', result['text'])
 
     def test_complex_markdown_table(self):
-        """测试复杂Markdown表格"""
-        text = """复杂表格：
+        """Test complex Markdown table"""
+        text = """Complex table:
 
-| 姓名 | 年龄 | 职业 | 薪资 |
+| Name | Age | Role | Salary |
 |:-----|:----:|-----:|------|
-| 张三 | 25   | 工程师 | 15k |
-| 李四 | 30   | 设计师 | 18k |
-| 王五 | 28   | 产品经理 | 20k |
+| Alice | 25   | Engineer | 15k |
+| Bob   | 30   | Designer | 18k |
+| Carol | 28   | PM       | 20k |
 
-表格结束"""
+End of table"""
 
         result = self.metric._extract_from_markdown(text)
         
-        # 验证复杂表格被完整提取
-        expected_table = """| 姓名 | 年龄 | 职业 | 薪资 |
+        # Verify complex table is completely extracted
+        expected_table = """| Name | Age | Role | Salary |
 |:-----|:----:|-----:|------|
-| 张三 | 25   | 工程师 | 15k |
-| 李四 | 30   | 设计师 | 18k |
-| 王五 | 28   | 产品经理 | 20k |"""
+| Alice | 25   | Engineer | 15k |
+| Bob   | 30   | Designer | 18k |
+| Carol | 28   | PM       | 20k |"""
         self.assertIn(expected_table, result['table'])
         
-        # 验证文本中表格被移除
-        # self.assertNotIn('| 姓名 | 年龄 | 职业 | 薪资 |', result['text'])
+        # Verify table was removed from text
+        # self.assertNotIn('| Name | Age | Role | Salary |', result['text'])
 
 
     def test_table_with_alignment(self):
-        """测试带对齐的表格"""
-        text = """对齐表格：
-| 左对齐 | 居中 | 右对齐 |
+        """Test table with alignment"""
+        text = """Alignment table:
+| Left | Center | Right |
 |:-------|:----:|-------:|
-| 内容1  | 内容2 | 内容3  |"""
+| Item1  | Item2 | Item3  |"""
 
         result = self.metric._extract_from_markdown(text)
         
-        # 验证对齐表格被提取
-        expected_table = """| 左对齐 | 居中 | 右对齐 |
+        # Verify alignment table is extracted
+        expected_table = """| Left | Center | Right |
 |:-------|:----:|-------:|
-| 内容1  | 内容2 | 内容3  |"""
+| Item1  | Item2 | Item3  |"""
         self.assertIn(expected_table, result['table'])
 
     def test_invalid_table_ignored(self):
-        """测试无效表格被忽略"""
-        text = """这不是表格：| 列1 | 列2 |
-这也不是：|-----|
-这也不是：| 数据 |"""
+        """Test that invalid tables are ignored"""
+        text = """Not a table: | Col1 | Col2 |
+Not this either: |-----|
+Not this either: | Data |"""
 
         result = self.metric._extract_from_markdown(text)
         
-        # 验证无效表格不被提取
+        # Verify invalid tables are not extracted
         self.assertEqual(result['table'], '')
         
-        # 验证原始文本保持不变
-        self.assertIn('| 列1 | 列2 |', result['text'])
+        # Verify original text is unchanged
+        self.assertIn('| Col1 | Col2 |', result['text'])
 
     def test_table_with_escaped_pipes(self):
-        """测试包含转义管道的表格"""
-        text = """转义管道表格：
-| 列1 | 列2 \| 列3 | 列4 |
+        """Test table containing escaped pipes"""
+        text = """Escaped pipe table:
+| Col1 | Col2 \| Col3 | Col4 |
 |-----|-----|-----|
-| 数据1 | 数据2 | 数据3 |"""
+| Data1 | Data2 | Data3 |"""
 
         result = self.metric._extract_from_markdown(text)
         
-        # 验证包含转义管道的表格被提取
-        expected_table = """| 列1 | 列2 \\| 列3 | 列4 |
+        # Verify table with escaped pipes is extracted
+        expected_table = """| Col1 | Col2 \\| Col3 | Col4 |
 |-----|-----|-----|
-| 数据1 | 数据2 | 数据3 |"""
+| Data1 | Data2 | Data3 |"""
         self.assertIn(expected_table, result['table'])
 
     def test_table_at_document_end(self):
-        """测试文档末尾的表格"""
-        text = """开始内容
-| 列1 | 列2 |
+        """Test table at document end"""
+        text = """Start content
+| Col1 | Col2 |
 |-----|-----|
-| 数据1 | 数据2 |"""
+| Data1 | Data2 |"""
 
         result = self.metric._extract_from_markdown(text)
         
-        # 验证文档末尾的表格被提取
-        expected_table = """| 列1 | 列2 |
+        # Verify table at document end is extracted
+        expected_table = """| Col1 | Col2 |
 |-----|-----|
-| 数据1 | 数据2 |"""
+| Data1 | Data2 |"""
         self.assertIn(expected_table, result['table'])
 
 
 
     def test_empty_and_whitespace_handling(self):
-        """测试空内容和空白处理"""
-        # 测试空字符串
+        """Test empty content and whitespace handling"""
+        # Test empty string
         result = self.metric._extract_from_markdown("")
         self.assertEqual(result['table'], '')
         self.assertEqual(result['text'], '')
         
-        # 测试只有空白字符
+        # Test whitespace only
         result = self.metric._extract_from_markdown("   \n\n  ")
         self.assertEqual(result['table'], '')
         self.assertEqual(result['text'], '   \n\n  ')
 
     def test_table_with_complex_content(self):
-        """测试包含复杂内容的表格"""
-        text = """复杂内容表格：
-| 列1 | 列2 | 列3 |
+        """Test table containing complex content"""
+        text = """Complex content table:
+| Col1 | Col2 | Col3 |
 |-----|-----|-----|
-| 包含**粗体** | 包含`代码` | 包含[链接](url) |
-| 包含*斜体* | 包含$公式$ | 包含>引用 |"""
+| contains **bold** | contains `code` | contains [link](url) |
+| contains *italic* | contains $formula$ | contains >quote |"""
 
         result = self.metric._extract_from_markdown(text)
         
-        # 验证复杂内容表格被提取
-        expected_table = """| 列1 | 列2 | 列3 |
+        # Verify table with complex content is extracted
+        expected_table = """| Col1 | Col2 | Col3 |
 |-----|-----|-----|
-| 包含**粗体** | 包含`代码` | 包含[链接](url) |
-| 包含*斜体* | 包含$公式$ | 包含>引用 |"""
+| contains **bold** | contains `code` | contains [link](url) |
+| contains *italic* | contains $formula$ | contains >quote |"""
         self.assertIn(expected_table, result['table'])
 
     def test_abnormal_html_table(self):
-        """测试复杂html表格,不要重复抽取"""
+        """Test complex HTML table, avoid duplicate extraction"""
         text = """<table><tbody><tr><td><table><tbody><tr><td><table><tbody><tr><td><strong>Better Management of /$800 Bln Forex Reserves Urged</strong></td></tr></tbody></table></td></tr><tr><td><p>A number of political advisors on Sunday called for more rationally managing China's massive foreign exchange reserves, which doubled over the 2004-05 period to an equivalent of US/$818.9 billion, second only to Japan.</p><p>The quick buildup is largely a result of China's booming exports and foreign exchange controls by the government, as well as speculation on the yuan's rise, industry watchers agree.</p><p>A big part of China's foreign exchange reserves are US dollar-denominated assets, including bonds issued by the US government. "Risks in the international foreign exchange market should be lowered when China manages its reserves," said Professor Guo Guoqing of a business school of the People's University of China.</p><p>Guo, a member of the National Committee of the Chinese People's Political Consultative Conference (CPPCC), the country's top advisory body, urged the government to cut back on subsidies for exports and take other measures to reduce foreign trade surpluses appropriately and achieve the balance in international payments.</p><p>Part of the reserves should be channeled into the imports of more high-tech machinery, equipment and other products, he suggested on the sidelines of the CPPCC's annual session.</p><p>The United States has been contending that the value of yuan, also known as renminbi or RMB, is too low, giving Chinese exporters an "unfair" advantage. But China said its huge trade surpluses are also a result of the US reluctance to export goods involving state-of-the-art technologies.</p><p>Fu Rui, also a CPPCC member, said with ample foreign exchange reserves, China could intentionally bulk up the reserves of strategic resources.</p><p>The international consensus is a country's rational foreign exchange reserves should equal to its imports demand for a full quarter. Also taking into consideration of payments for foreign debts, returns for foreign investors and other demands in China, many believe it is enough for the country to retain US/$300 billion.</p><p>But Lin Yifu, a popular economist, underscored China's per capita foreign exchange reserves remains not large - less than one-tenth of Japan's and far below that of Hong Kong and Singapore.</p><p>The reserves were "tremendous fruits" from China's reform and opening-up drive, he said.</p><p>His remarks were echoed by Xiao Zhuoji, a well-known economics professor with Beijing University. "The rise of foreign exchange reserves reflects China's fast, sustained economic growth and sound international payments," he said.</p><p>"The reserves are of significant importance to upgrade the China image in the international economic arena, strengthen the nation's macro-control capabilities and guard against financial risks," added Xiao, a Standing Committee member of the CPPCC National Committee.</p><p>But as the People's Bank of China, or the central bank, has to buy foreign exchange reserves under the current foreign exchange control policies, the country's monetary base will be enlarged, increasing its inflationary pressure and difficulties on macro-economic controls, analysts acknowledge.</p><p>Another prevailing view is that China's hefty foreign exchange reserves actually "occupied" large amounts of fund resources that otherwise can be diverted for domestic investment and consumption.</p><p>Some CPPCC members said they believe it is already "meaningless" now to talk about whether China's foreign exchange reserves size is big or not. "The key lies on how to raise the reserves' yields."</p><p>"If the annual yields from foreign exchange reserves could reach a stable 5 percent, the nation will reap in 300 billion yuan a year. What a big fortune!" one advisor told Xinhua.</p><p>Central banker Zhou Xiaochuan reiterated earlier that China will "pay attention to and maintain the flexibility" of foreign reserves structure, which is unknown to the public.</p></td></tr></tbody></table></td></tr></tbody></table>"""
 
         result = self.metric._extract_from_markdown(text)
 
-        # 验证复杂表格被完整提取
+        # Verify complex table is completely extracted
         expected_table = """<table><tbody><tr><td><table><tbody><tr><td><table><tbody><tr><td><strong>Better Management of /$800 Bln Forex Reserves Urged</strong></td></tr></tbody></table></td></tr><tr><td><p>A number of political advisors on Sunday called for more rationally managing China's massive foreign exchange reserves, which doubled over the 2004-05 period to an equivalent of US/$818.9 billion, second only to Japan.</p><p>The quick buildup is largely a result of China's booming exports and foreign exchange controls by the government, as well as speculation on the yuan's rise, industry watchers agree.</p><p>A big part of China's foreign exchange reserves are US dollar-denominated assets, including bonds issued by the US government. "Risks in the international foreign exchange market should be lowered when China manages its reserves," said Professor Guo Guoqing of a business school of the People's University of China.</p><p>Guo, a member of the National Committee of the Chinese People's Political Consultative Conference (CPPCC), the country's top advisory body, urged the government to cut back on subsidies for exports and take other measures to reduce foreign trade surpluses appropriately and achieve the balance in international payments.</p><p>Part of the reserves should be channeled into the imports of more high-tech machinery, equipment and other products, he suggested on the sidelines of the CPPCC's annual session.</p><p>The United States has been contending that the value of yuan, also known as renminbi or RMB, is too low, giving Chinese exporters an "unfair" advantage. But China said its huge trade surpluses are also a result of the US reluctance to export goods involving state-of-the-art technologies.</p><p>Fu Rui, also a CPPCC member, said with ample foreign exchange reserves, China could intentionally bulk up the reserves of strategic resources.</p><p>The international consensus is a country's rational foreign exchange reserves should equal to its imports demand for a full quarter. Also taking into consideration of payments for foreign debts, returns for foreign investors and other demands in China, many believe it is enough for the country to retain US/$300 billion.</p><p>But Lin Yifu, a popular economist, underscored China's per capita foreign exchange reserves remains not large - less than one-tenth of Japan's and far below that of Hong Kong and Singapore.</p><p>The reserves were "tremendous fruits" from China's reform and opening-up drive, he said.</p><p>His remarks were echoed by Xiao Zhuoji, a well-known economics professor with Beijing University. "The rise of foreign exchange reserves reflects China's fast, sustained economic growth and sound international payments," he said.</p><p>"The reserves are of significant importance to upgrade the China image in the international economic arena, strengthen the nation's macro-control capabilities and guard against financial risks," added Xiao, a Standing Committee member of the CPPCC National Committee.</p><p>But as the People's Bank of China, or the central bank, has to buy foreign exchange reserves under the current foreign exchange control policies, the country's monetary base will be enlarged, increasing its inflationary pressure and difficulties on macro-economic controls, analysts acknowledge.</p><p>Another prevailing view is that China's hefty foreign exchange reserves actually "occupied" large amounts of fund resources that otherwise can be diverted for domestic investment and consumption.</p><p>Some CPPCC members said they believe it is already "meaningless" now to talk about whether China's foreign exchange reserves size is big or not. "The key lies on how to raise the reserves' yields."</p><p>"If the annual yields from foreign exchange reserves could reach a stable 5 percent, the nation will reap in 300 billion yuan a year. What a big fortune!" one advisor told Xinhua.</p><p>Central banker Zhou Xiaochuan reiterated earlier that China will "pay attention to and maintain the flexibility" of foreign reserves structure, which is unknown to the public.</p></td></tr></tbody></table></td></tr></tbody></table>"""
         self.assertIn(expected_table, result['table'])
 
     def test_html_table_in_code(self):
-        """测试代码块中的HTML表格不被提取"""
-        text = """这是代码块中的HTML表格：
+        """Test that HTML tables in code blocks are not extracted"""
+        text = """This is an HTML table in a code block:
 
         ```
        
-       <table> <tr><th>标题1</th><th>标题2</th></tr> <tr><td>数据1</td><td>数据2</td></tr> </table> 
+       <table> <tr><th>Header1</th><th>Header2</th></tr> <tr><td>Data1</td><td>Data2</td></tr> </table> 
        
        
        ```
-        这是正常文本中的HTML表格（应该被提取）：
+        This is an HTML table in normal text (should be extracted):
         
-        <table> <tr><th>姓名</th><th>年龄</th></tr> <tr><td>张三</td><td>25</td></tr> </table>
-        这是内联代码中的表格：`<table><tr><td>`不应该提取</td></tr></table>
+        <table> <tr><th>Name</th><th>Age</th></tr> <tr><td>Alice</td><td>25</td></tr> </table>
+        This is a table in inline code: `<table><tr><td>`should not be extracted</td></tr></table>
         
-        正常文本结束。"""
+        Normal text ends here."""
 
         result = self.metric._extract_from_markdown(text)
 
-        # 验证代码块中的HTML表格没有被提取
-        self.assertNotIn('<tr><th>标题1</th><th>标题2</th></tr>', result['table'])
-        self.assertNotIn('<tr><td>数据1</td><td>数据2</td></tr>', result['table'])
+        # Verify HTML tables in code blocks are not extracted
+        self.assertNotIn('<tr><th>Header1</th><th>Header2</th></tr>', result['table'])
+        self.assertNotIn('<tr><td>Data1</td><td>Data2</td></tr>', result['table'])
 
-        # 验证内联代码中的表格没有被提取
+        # Verify table in inline code is not extracted
         self.assertNotIn('<table><tr><td>', result['table'])
 
-        # 验证正常文本中的HTML表格被正确提取
-        self.assertIn('<tr><th>姓名</th><th>年龄</th></tr>', result['table'])
-        self.assertIn('<tr><td>张三</td><td>25</td></tr>', result['table'])
+        # Verify HTML table in normal text is correctly extracted
+        self.assertIn('<tr><th>Name</th><th>Age</th></tr>', result['table'])
+        self.assertIn('<tr><td>Alice</td><td>25</td></tr>', result['table'])
 
-        # 验证只提取了一个表格
+        # Verify only one table was extracted
         table_count = result['table'].count('<table>')
         self.assertEqual(table_count, 1)
 
diff --git a/tests/test_teds.py b/tests/test_teds.py
index 6e1cf30..93bada4 100644
--- a/tests/test_teds.py
+++ b/tests/test_teds.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 """
 Comprehensive tests for TEDS (Tree-Edit Distance based Similarity) metrics.
-TEDS (树编辑距离相似性) 指标的综合测试
 """
 
 import re
@@ -20,15 +19,15 @@
 
 
 class TestTEDSBasic(unittest.TestCase):
-    """Basic TEDS functionality tests - 基本功能测试"""
-    
+    """Basic TEDS functionality tests"""
+
     def setUp(self):
         """Set up test fixtures"""
         # self.teds = TEDSMetric("test_teds")
         self.teds_metric = TEDSMetric(name="table_TEDS")
         self.s_teds_metric = StructureTEDSMetric(name="s_teds")
 
-        # 创建一个"成功"状态的table_edit结果（用于满足依赖）
+        # Create a "success" status table_edit result (to satisfy dependency)
         self.valid_table_edit_result = MetricResult(
             metric_name="table_edit",
             score=1.0,
@@ -41,77 +40,77 @@ def setUp(self):
         )
 
     def test_teds_identical_tables(self):
-        """测试完全相同的表格"""
+        """Test completely identical tables"""
         pred = "<table><tr><td>1</td></tr></table>"
         gt = "<table><tr><td>1</td></tr></table>"
 
         result = self.teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertEqual(result.score, 1.0)
 
     def test_teds_different_tables(self):
-        """测试完全不同的表格"""
+        """Test completely different tables"""
         pred = "<table><tr><td>1</td></tr></table>"
         gt = "<table><tr><td>2</td></tr></table>"
 
         result = self.teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertLess(result.score, 1.0)
 
     def test_teds_empty_tables(self):
-        """测试空表格"""
+        """Test empty tables"""
         pred = "<table></table>"
         gt = "<table></table>"
 
         result = self.teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertEqual(result.score, 1.0)
 
     def test_teds_markdown_conversion(self):
-        """测试Markdown表格转换"""
+        """Test Markdown table conversion"""
         pred = "| A | B |\n|-----|-----|\n| 1 | 2 |"
         gt = "<table><tr><th>A</th><th>B</th></tr><tr><td>1</td><td>2</td></tr></table>"
 
         result = self.teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertAlmostEqual(result.score, 1.0, places=3)
 
     def test_teds_list_conversion(self):
-        """测试列表转换为表格"""
+        """Test list conversion to table"""
         pred = [[1, 2], [3, 4]]
         gt = "<table><tr><td>1</td><td>2</td></tr><tr><td>3</td><td>4</td></tr></table>"
 
         result = self.teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertAlmostEqual(result.score, 1.0, places=3)
 
     def test_teds_complex_table(self):
-        """测试复杂表格（包含合并单元格）"""
+        """Test complex table (with merged cells)"""
         pred = """
         <table>
             <tr><th colspan="2">Header</th></tr>
@@ -128,70 +127,70 @@ def test_teds_complex_table(self):
         result = self.teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertEqual(result.score, 1.0)
 
     def test_s_teds_identical_structure(self):
-        """测试结构相同但内容不同的表格（S-TEDS应忽略内容）"""
+        """Test tables with identical structure but different content (S-TEDS should ignore content)"""
         pred = "<table><tr><td>内容A</td></tr></table>"
         gt = "<table><tr><td>内容B</td></tr></table>"
 
         result = self.s_teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertEqual(result.score, 1.0)
 
     def test_s_teds_different_structure(self):
-        """测试结构不同的表格（S-TEDS应敏感）"""
+        """Test tables with different structure (S-TEDS should be sensitive)"""
         pred = "<table><tr><td>1</td></tr></table>"
         gt = "<table><tr><td>1</td><td>2</td></tr></table>"
 
         result = self.s_teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertLess(result.score, 1.0)
 
     def test_s_teds_colspan_sensitivity(self):
-        """测试S-TEDS对colspan的敏感性"""
+        """Test S-TEDS sensitivity to colspan"""
         pred = "<table><tr><th colspan='2'>标题</th></tr></table>"
         gt = "<table><tr><th>标题1</th><th>标题2</th></tr></table>"
 
         result = self.s_teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertLess(result.score, 1.0)
 
     def test_unicode_content(self):
-        """测试包含Unicode字符的表格"""
+        """Test tables containing Unicode characters"""
         pred = "<table><tr><td>测试文本</td><td>Текст на русском</td></tr></table>"
         gt = "<table><tr><td>测试文本</td><td>Текст на русском</td></tr></table>"
 
         result = self.teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertEqual(result.score, 1.0)
 
     def test_very_large_table(self):
-        """测试大型表格（性能测试）"""
+        """Test large table (performance test)"""
         pred_rows = []
         gt_rows = []
         for i in range(100):
@@ -204,14 +203,14 @@ def test_very_large_table(self):
         result = self.teds_metric.calculate(
             predicted=pred,
             groundtruth=gt,
-            table_edit_result=self.valid_table_edit_result  # 传入依赖
+            table_edit_result=self.valid_table_edit_result  # pass in dependency
         )
 
         self.assertTrue(result.success)
         self.assertEqual(result.score, 1.0)
 
     def test_teds_structure_same_content_different(self):
-        """测试结构相同但内容不同的表格 - 验证修复后的TEDS不会返回0分"""
+        """Test tables with identical structure but different content - verify fixed TEDS does not return 0"""
         pred = "<table><tr><td>我不喜欢你</td></tr></table>"
         gt = "<table><tr><td>我喜欢你</td></tr></table>"
 
@@ -224,12 +223,12 @@ def test_teds_structure_same_content_different(self):
 
 
 class TestTEDSAdvanced(unittest.TestCase):
-    """Advanced TEDS functionality tests - 高级功能测试"""
+    """Advanced TEDS functionality tests"""
 
     def setUp(self):
         """Set up test fixtures"""
         self.teds = TEDSMetric("teds")
-        # 创建一个有效的table_edit结果作为依赖
+        # Create a valid table_edit result as dependency
         self.valid_table_edit_result = MetricResult(
             metric_name="table_edit",
             score=1.0,
@@ -242,7 +241,7 @@ def setUp(self):
         )
 
     def test_teds_markdown_conversion(self):
-        """Test TEDS with markdown input - 测试Markdown输入"""
+        """Test TEDS with markdown input"""
         markdown_table = """
         | Name | Age |
         |------|-----|
@@ -261,13 +260,13 @@ def test_teds_markdown_conversion(self):
         result = self.teds.calculate(
             markdown_table,
             html_table,
-            table_edit_result=self.valid_table_edit_result  # 添加依赖参数
+            table_edit_result=self.valid_table_edit_result  # add dependency parameter
         )
         self.assertTrue(result.success)
         self.assertGreater(result.score, 0.7)  # Should be quite similar
 
     def test_teds_list_conversion(self):
-        """Test TEDS with list input - 测试列表输入"""
+        """Test TEDS with list input"""
         list_data = [
             ["Name", "Age"],
             ["John", "25"],
@@ -285,13 +284,13 @@ def test_teds_list_conversion(self):
         result = self.teds.calculate(
             list_data,
             html_table,
-            table_edit_result=self.valid_table_edit_result  # 添加依赖参数
+            table_edit_result=self.valid_table_edit_result  # add dependency parameter
         )
         self.assertTrue(result.success)
         self.assertGreaterEqual(result.score, 0.8)
 
     def test_teds_complex_table(self):
-        """Test TEDS with complex table containing colspan - 测试复杂表格"""
+        """Test TEDS with complex table containing colspan"""
         table1 = """
         <table>
             <tr><th colspan="2">Header</th></tr>
@@ -309,14 +308,14 @@ def test_teds_complex_table(self):
         result = self.teds.calculate(
             table1,
             table2,
-            table_edit_result=self.valid_table_edit_result  # 添加依赖参数
+            table_edit_result=self.valid_table_edit_result  # add dependency parameter
         )
         self.assertTrue(result.success)
         self.assertGreater(result.score, 0.0)
         self.assertLess(result.score, 1.0)
 
     def test_teds_content_similarity(self):
-        """Test TEDS with similar content but different text - 测试内容相似度"""
+        """Test TEDS with similar content but different text"""
         table1 = "<table><tr><td>苹果很好吃</td><td>香蕉也不错</td></tr></table>"
         table2 = "<table><tr><td>苹果很美味</td><td>香蕉也很好</td></tr></table>"
 
@@ -329,12 +328,12 @@ def test_teds_content_similarity(self):
         self.assertAlmostEqual(result.score, 0.931818, places=6)
 
 class TestStructureTEDS(unittest.TestCase):
-    """Structure-only TEDS tests - 结构化TEDS测试"""
+    """Structure-only TEDS tests"""
 
     def setUp(self):
         """Set up test fixtures"""
         self.s_teds = StructureTEDSMetric("s_teds")
-        # 创建一个有效的table_edit结果作为依赖
+        # Create a valid table_edit result as dependency
         self.valid_table_edit_result = MetricResult(
             metric_name="table_edit",
             score=1.0,
@@ -347,14 +346,14 @@ def setUp(self):
         )
 
     def test_s_teds_identical_structure(self):
-        """Test S-TEDS with identical structure but different content - 测试相同结构不同内容"""
+        """Test S-TEDS with identical structure but different content"""
         table1 = "<table><tr><th>Name</th><th>Age</th></tr><tr><td>John</td><td>25</td></tr></table>"
         table2 = "<table><tr><th>产品</th><th>价格</th></tr><tr><td>苹果</td><td>5</td></tr></table>"
 
         result = self.s_teds.calculate(
             table1,
             table2,
-            table_edit_result=self.valid_table_edit_result  # 添加依赖参数
+            table_edit_result=self.valid_table_edit_result  # add dependency parameter
         )
 
         self.assertTrue(result.success)
@@ -362,7 +361,7 @@ def test_s_teds_identical_structure(self):
         self.assertEqual(result.details.get('algorithm'), 'TEDS')
 
     def test_s_teds_different_structure(self):
-        """Test S-TEDS with different structure - 测试不同结构"""
+        """Test S-TEDS with different structure"""
         table1 = """
         <table>
             <tr><th>Name</th><th>Age</th></tr>
@@ -380,13 +379,13 @@ def test_s_teds_different_structure(self):
         result = self.s_teds.calculate(
             table1,
             table2,
-            table_edit_result=self.valid_table_edit_result  # 添加依赖参数
+            table_edit_result=self.valid_table_edit_result  # add dependency parameter
         )
         self.assertTrue(result.success)
         self.assertLess(result.score, 1.0)
 
     def test_s_teds_colspan_sensitivity(self):
-        """Test S-TEDS sensitivity to colspan - 测试colspan敏感性"""
+        """Test S-TEDS sensitivity to colspan"""
         table1 = """
         <table>
             <tr><th colspan="2">Header</th></tr>
@@ -404,20 +403,20 @@ def test_s_teds_colspan_sensitivity(self):
         result = self.s_teds.calculate(
             table1,
             table2,
-            table_edit_result=self.valid_table_edit_result  # 添加依赖参数
+            table_edit_result=self.valid_table_edit_result  # add dependency parameter
         )
         self.assertTrue(result.success)
         self.assertLess(result.score, 1.0)  # Should detect structural difference
 
 
 class TestTEDSEdgeCases(unittest.TestCase):
-    """TEDS edge cases and error handling tests - TEDS边界情况和错误处理测试"""
+    """TEDS edge cases and error handling tests"""
 
     def setUp(self):
         """Set up test fixtures"""
         self.teds = TEDSMetric("teds")
         self.s_teds = StructureTEDSMetric("s_teds")
-        # 创建一个有效的table_edit结果作为依赖
+        # Create a valid table_edit result as dependency
         self.valid_table_edit_result = MetricResult(
             metric_name="table_edit",
             score=1.0,
@@ -430,33 +429,33 @@ def setUp(self):
         )
 
     def test_malformed_html(self):
-        """Test TEDS with malformed HTML - 测试格式错误的HTML"""
+        """Test TEDS with malformed HTML"""
         malformed_table = "<table><tr><th>Name<td>John</table>"
         good_table = "<table><tr><th>Name</th></tr><tr><td>John</td></tr></table>"
 
         result = self.teds.calculate(
             malformed_table,
             good_table,
-            table_edit_result=self.valid_table_edit_result  # 添加依赖参数
+            table_edit_result=self.valid_table_edit_result  # add dependency parameter
         )
         # Should handle gracefully without crashing
         self.assertTrue(result.success or not result.success)  # Either way is acceptable
 
     def test_unicode_content(self):
-        """Test TEDS with Unicode content - 测试Unicode内容"""
-        table1 = "<table><tr><th>姓名</th><th>年龄</th></tr><tr><td>张三</td><td>25</td></tr></table>"
-        table2 = "<table><tr><th>姓名</th><th>年龄</th></tr><tr><td>李四</td><td>30</td></tr></table>"
+        """Test TEDS with Unicode content"""
+        table1 = "<table><tr><th>Name</th><th>Age</th></tr><tr><td>Zhang San</td><td>25</td></tr></table>"
+        table2 = "<table><tr><th>Name</th><th>Age</th></tr><tr><td>Li Si</td><td>30</td></tr></table>"
 
         result = self.teds.calculate(
             table1,
             table2,
-            table_edit_result=self.valid_table_edit_result  # 添加依赖参数
+            table_edit_result=self.valid_table_edit_result  # add dependency parameter
         )
         self.assertTrue(result.success)
         self.assertGreater(result.score, 0.0)
 
     def test_very_large_table(self):
-        """Test TEDS with large table - 测试大表格"""
+        """Test TEDS with large table"""
         # Create a moderately large table
         rows = []
         for i in range(20):
@@ -468,7 +467,7 @@ def test_very_large_table(self):
         result = self.teds.calculate(
             large_table1,
             large_table2,
-            table_edit_result=self.valid_table_edit_result  # 添加依赖参数
+            table_edit_result=self.valid_table_edit_result  # add dependency parameter
         )
         self.assertTrue(result.success)
         self.assertGreater(result.score, 0.0)
@@ -476,13 +475,13 @@ def test_very_large_table(self):
 
 
 def run_all_teds_tests():
-    """Run all TEDS tests - 运行所有TEDS测试"""
+    """Run all TEDS tests"""
     loader = unittest.TestLoader()
     suite = unittest.TestSuite()
 
     # Add all test classes
     test_classes = [
-        # 注意：确保TestTEDSBasic已定义或从其他文件导入
+        # Note: ensure TestTEDSBasic is defined or imported from another file
         TestTEDSBasic,
         TestTEDSAdvanced,
         TestStructureTEDS,
@@ -500,12 +499,12 @@ def run_all_teds_tests():
 
 
 if __name__ == '__main__':
-    print("=== 🧪 TEDS 算法综合测试 ===\n")
+    print("=== TEDS Algorithm Comprehensive Tests ===\n")
 
     success = run_all_teds_tests()
 
     if success:
-        print("\n✅ 所有TEDS测试通过！")
+        print("\nAll TEDS tests passed!")
     else:
-        print("\n❌ 部分TEDS测试失败！")
+        print("\nSome TEDS tests failed!")
         sys.exit(1)
\ No newline at end of file
diff --git a/tests/test_test_model_extractor.py b/tests/test_test_model_extractor.py
index aca17d2..14c3ca2 100644
--- a/tests/test_test_model_extractor.py
+++ b/tests/test_test_model_extractor.py
@@ -2,28 +2,28 @@
 from webmainbench.extractors.test_model_extractor import TestModelExtractor
 
 class TestTestModelExtractor(unittest.TestCase):
-    """测试 TestModelExtractor 的基本功能"""
+    """Tests basic functionality of TestModelExtractor"""
 
     def setUp(self):
-        """初始化测试用的抽取器实例"""
+        """Initialize the extractor instance for testing"""
         self.extractor = TestModelExtractor("test-model")
 
-        # 使用 data 目录下的 test_model.jsonl 作为测试数据
+        # Use test_model.jsonl in the data directory as test data
         import json
         from pathlib import Path
 
-        # 读取第一个样本作为测试用例
+        # Read the first sample as a test case
         data_path = Path(__file__).parent.parent / "data" / "test_model.jsonl"
         with open(data_path, "r", encoding="utf-8") as f:
             first_line = f.readline()
             sample_dict = json.loads(first_line)
 
-        # 由于 TestModelExtractor 期望 sample 支持属性访问，这里用 SimpleNamespace 包装
+        # Since TestModelExtractor expects sample to support attribute access, wrap it with SimpleNamespace
         from types import SimpleNamespace
         self.sample_data = SimpleNamespace(**sample_dict)
 
     def test_extract_from_sample(self):
-        """测试extract_from_sample方法"""
+        """Test the extract_from_sample method"""
         result = self.extractor.extract_from_sample(self.sample_data)
         self.assertTrue(result.success)
         self.assertEqual(result.content, self.sample_data.llm_webkit_md)
@@ -32,7 +32,7 @@ def test_extract_from_sample(self):
         self.assertEqual(result.confidence_score, 1.0)
 
     def test_extract_with_empty_html(self):
-        """测试extract方法遇到空html的情况"""
+        """Test the extract method when given empty html"""
         result = self.extractor.extract("")
         self.assertFalse(result.success)
         self.assertIn("Empty HTML input", result.error_message)
diff --git a/tools/label_tool.py b/tools/label_tool.py
index 05b0d9e..cd4855e 100644
--- a/tools/label_tool.py
+++ b/tools/label_tool.py
@@ -1,8 +1,8 @@
 '''
-本脚本用于从jsonl文件读取表格数据，并在streamlit应用中展示和编辑。
-命令行输入示例：
-streamlit run /home/zhangshuo/Desktop/vscodeworkspace/WebMainBench/tools/label_tool.py -- WebMainBench/data/WebMainBench_test_0814_llm-webkit_filtered_table_results.jsonl
-注意：-- 后面有个空格，否则会报错，然后再接数据文件路径
+This script reads table data from a JSONL file and displays/edits it in a Streamlit app.
+Example command:
+    streamlit run /home/zhangshuo/Desktop/vscodeworkspace/WebMainBench/tools/label_tool.py -- WebMainBench/data/WebMainBench_test_0814_llm-webkit_filtered_table_results.jsonl
+Note: there must be a space after --, otherwise it will raise an error. The data file path follows after.
 
 '''
 
@@ -13,14 +13,14 @@
 
 import sys
 
-# 支持通过命令行参数传入数据文件路径
+# Support passing data file path via command-line argument
 if len(sys.argv) > 1:
     DATA_FILE = sys.argv[1]
 else:
-    st.error("请通过命令行参数传入数据文件路径，例如：python read_table.py /path/to/data.jsonl")
+    st.error("Please pass the data file path via command-line argument, e.g.: python read_table.py /path/to/data.jsonl")
     st.stop()
 
-# 读取所有数据
+# Load all data
 @st.cache_data(show_spinner=False)
 def load_data():
     data = []
@@ -32,7 +32,7 @@ def load_data():
                 data.append(json.loads(line))
     return data
 
-# 只更新单条数据，避免全文件重写
+# Update a single item only, avoiding full file rewrite
 def update_single_item(index, new_item):
     lines = []
     if not os.path.exists(DATA_FILE):
@@ -40,30 +40,30 @@ def update_single_item(index, new_item):
     with open(DATA_FILE, "r", encoding="utf-8") as f:
         lines = f.readlines()
     if 0 <= index < len(lines):
-        # 保证只替换对应行
+        # Ensure only the corresponding line is replaced
         lines[index] = json.dumps(new_item, ensure_ascii=False) + "\n"
         with open(DATA_FILE, "w", encoding="utf-8") as f:
             f.writelines(lines)
 
-# Streamlit页面布局
+# Streamlit page layout
 st.set_page_config(layout="wide")
 st.title("WebMainBench Table Editor")
 
 data = load_data()
 if not data:
-    st.warning("未找到数据文件或文件为空。")
+    st.warning("Data file not found or is empty.")
     st.stop()
 
-# 选择要编辑的条目
-index = st.sidebar.number_input("选择条目索引", min_value=0, max_value=len(data)-1, value=0, step=1)
+# Select the entry to edit
+index = st.sidebar.number_input("Select entry index", min_value=0, max_value=len(data)-1, value=0, step=1)
 item = data[index]
 
-# 三列布局
+# Three-column layout
 col1, col2, col3 = st.columns([1.5, 1.5, 2])
 
 with col1:
-    st.subheader("HTML 渲染")
-    # 尝试用st.components.v1.html增强HTML+CSS渲染能力
+    st.subheader("HTML Render")
+    # Try to use st.components.v1.html for enhanced HTML+CSS rendering
     try:
         st.components.v1.html(
             f'<div style="border:1px solid #ddd;padding:8px;overflow:auto;max-height:600px">{item.get("html","")}</div>',
@@ -75,12 +75,12 @@ def update_single_item(index, new_item):
             f'<div style="border:1px solid #ddd;padding:8px;overflow:auto;max-height:600px">{item.get("html","")}</div>',
             unsafe_allow_html=True
         )
-        st.info("st.components.v1.html不可用，已回退为st.markdown。")
+        st.info("st.components.v1.html is unavailable, falling back to st.markdown.")
 
 with col2:
-    st.subheader("Markdown 渲染")
-    # 实时渲染 Markdown，优先显示编辑区内容
-    # 使用 session_state 保持编辑内容
+    st.subheader("Markdown Render")
+    # Render Markdown in real time, prioritizing the content in the edit area
+    # Use session_state to persist edited content
     if f"markdown_edit_{index}" not in st.session_state:
         st.session_state[f"markdown_edit_{index}"] = item.get("groundtruth_content", "")
     current_markdown = st.session_state[f"markdown_edit_{index}"]
@@ -90,21 +90,21 @@ def update_single_item(index, new_item):
     )
 
 with col3:
-    st.subheader("Markdown 源代码（可编辑）")
-    # 实时更新 session_state
+    st.subheader("Markdown Source (Editable)")
+    # Update session_state in real time
     new_markdown = st.text_area(
-        "编辑 Markdown",
+        "Edit Markdown",
         value=st.session_state.get(f"markdown_edit_{index}", item.get("groundtruth_content", "")),
         height=600,
         key=f"markdown_edit_{index}"
     )
-    # 提交按钮只用于保存到文件
-    if st.button("提交更改", key=f"submit_{index}"):
+    # Submit button is only used to save to file
+    if st.button("Submit Changes", key=f"submit_{index}"):
         if new_markdown != item.get("groundtruth_content", ""):
-            # 只更新当前条目，避免全文件重写和数据覆盖
+            # Only update the current entry to avoid full file rewrite and data overwrite
             data[index]["groundtruth_content"] = new_markdown
             update_single_item(index, data[index])
-            load_data.clear()  # 清理缓存，确保下次读取到最新数据
-            st.success("更改已保存！")
+            load_data.clear()  # Clear cache to ensure fresh data is read next time
+            st.success("Changes saved!")
         else:
-            st.info("内容未更改，无需保存。")
+            st.info("Content unchanged, no save needed.")
diff --git a/webmainbench/config.py b/webmainbench/config.py
index 7b79a1d..27b9db8 100644
--- a/webmainbench/config.py
+++ b/webmainbench/config.py
@@ -1,12 +1,17 @@
 """Package-wide configuration."""
+import os
+from dotenv import load_dotenv
+
+# Load .env file from project root
+load_dotenv()
 
 # LLM settings for refinement of extractor outputs
 LLM_CONFIG = {
-    'llm_base_url': '',
-    'llm_api_key': '',
-    'llm_model': 'deepseek-chat',
-    'use_llm': True,
+    'llm_base_url': os.getenv('LLM_BASE_URL', ''),
+    'llm_api_key': os.getenv('LLM_API_KEY', ''),
+    'llm_model': os.getenv('LLM_MODEL', 'deepseek-chat'),
+    'use_llm': os.getenv('USE_LLM', 'True').lower() == 'true',
 }
 
 # When True, print LLM enhancement / cache diagnostics (very noisy).
-METRICS_DEBUG = False
+METRICS_DEBUG = os.getenv('METRICS_DEBUG', 'False').lower() == 'true'
\ No newline at end of file
diff --git a/webmainbench/data/dataset.py b/webmainbench/data/dataset.py
index 2b7ffc6..c7fbedf 100644
--- a/webmainbench/data/dataset.py
+++ b/webmainbench/data/dataset.py
@@ -29,8 +29,8 @@ class DataSample:
     difficulty: Optional[str] = None  # easy, medium, hard
     tags: Optional[List[str]] = None
     llm_webkit_md: Optional[str] = None
-    llm_webkit_html: Optional[str] = None  # 预处理HTML字段
-    main_html: Optional[str] = None  # 主要HTML内容字段
+    llm_webkit_html: Optional[str] = None  # Pre-processed HTML field
+    main_html: Optional[str] = None  # Main HTML content field
     
     # Extracted results (populated during evaluation)
     extracted_results: Optional[Dict[str, Any]] = None
@@ -59,28 +59,28 @@ def to_dict(self) -> Dict[str, Any]:
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "DataSample":
         """Create from dictionary, ignoring unknown fields and supporting field mapping."""
-        # 获取类的所有字段名
+        # Get all field names of the class
         import dataclasses
         field_names = {f.name for f in dataclasses.fields(cls)}
         
-        # 定义字段名映射（外部字段名 -> 内部字段名）
+        # Define field name mappings (external field name -> internal field name)
         field_mapping = {
-            "track_id": "id",  # track_id 映射到 id
-            "content": "groundtruth_content",  # content 映射到 groundtruth_content
-            "convert_main_content": "groundtruth_content",  # convert_main_content 映射到 groundtruth_content
-            "content_list": "groundtruth_content_list",  # content_list 映射到 groundtruth_content_list
+            "track_id": "id",  # track_id maps to id
+            "content": "groundtruth_content",  # content maps to groundtruth_content
+            "convert_main_content": "groundtruth_content",  # convert_main_content maps to groundtruth_content
+            "content_list": "groundtruth_content_list",  # content_list maps to groundtruth_content_list
         }
         
-        # 只提取定义的字段，忽略其他字段
+        # Only extract defined fields, ignore others
         filtered_data = {}
         for key, value in data.items():
-            # 首先检查是否需要字段映射
+            # First check if field mapping is needed
             mapped_key = field_mapping.get(key, key)
             
-            # 如果映射后的字段名在类字段中，则添加
+            # If the mapped field name is in the class fields, add it
             if mapped_key in field_names:
                 filtered_data[mapped_key] = value
-            # 忽略未定义的字段，如 layout_id、max_layer_n 等
+            # Ignore undefined fields such as layout_id, max_layer_n, etc.
         
         return cls(**filtered_data)
 
diff --git a/webmainbench/data/loader.py b/webmainbench/data/loader.py
index 52e3e0d..23b4164 100644
--- a/webmainbench/data/loader.py
+++ b/webmainbench/data/loader.py
@@ -31,7 +31,7 @@ def load_jsonl(file_path: Union[str, Path], **kwargs) -> BenchmarkDataset:
         with jsonlines.open(file_path, 'r') as reader:
             for idx, line in enumerate(reader):
                 try:
-                    # 使用DataSample.from_dict()来正确处理字段映射和过滤
+                    # Use DataSample.from_dict() to properly handle field mapping and filtering
                     sample = DataSample.from_dict(line)
                     dataset.add_sample(sample)
                     
@@ -159,15 +159,15 @@ def stream_jsonl(file_path: Union[str, Path],
                     categories: Optional[List[str]] = None,
                     max_samples: Optional[int] = None) -> Iterator[DataSample]:
         """
-        流式读取JSONL文件，逐个返回DataSample，减少内存使用。
-        
+        Stream JSONL file, yielding DataSamples one by one to reduce memory usage.
+
         Args:
-            file_path: JSONL文件路径
-            categories: 类别过滤列表
-            max_samples: 最大样本数限制
-            
+            file_path: Path to JSONL file
+            categories: Category filter list
+            max_samples: Maximum sample count limit
+
         Yields:
-            DataSample: 逐个生成的数据样本
+            DataSample: Data samples yielded one by one
         """
         file_path = Path(file_path)
         
@@ -175,18 +175,18 @@ def stream_jsonl(file_path: Union[str, Path],
         with jsonlines.open(file_path, 'r') as reader:
             for line_idx, line in enumerate(reader):
                 try:
-                    # 创建样本
+                    # Create sample
                     sample = DataSample.from_dict(line)
                     
-                    # 类别过滤
+                    # Category filter
                     if categories and sample.content_type not in categories:
                         continue
                     
-                    # 返回样本
+                    # Yield sample
                     yield sample
                     sample_count += 1
                     
-                    # 检查样本数限制
+                    # Check sample count limit
                     if max_samples and sample_count >= max_samples:
                         break
                         
@@ -200,16 +200,16 @@ def stream_jsonl_batched(file_path: Union[str, Path],
                            categories: Optional[List[str]] = None,
                            max_samples: Optional[int] = None) -> Iterator[List[DataSample]]:
         """
-        流式读取JSONL文件，按批次返回DataSample列表。
-        
+        Stream JSONL file, returning DataSample lists in batches.
+
         Args:
-            file_path: JSONL文件路径
-            batch_size: 批次大小
-            categories: 类别过滤列表
-            max_samples: 最大样本数限制
-            
+            file_path: Path to JSONL file
+            batch_size: Batch size
+            categories: Category filter list
+            max_samples: Maximum sample count limit
+
         Yields:
-            List[DataSample]: 批次数据样本列表
+            List[DataSample]: Batch list of data samples
         """
         batch = []
         sample_count = 0
@@ -218,7 +218,7 @@ def stream_jsonl_batched(file_path: Union[str, Path],
             batch.append(sample)
             sample_count += 1
             
-            # 达到批次大小或样本数限制时返回批次
+            # Yield batch when batch size or sample count limit is reached
             if len(batch) >= batch_size or (max_samples and sample_count >= max_samples):
                 yield batch
                 batch = []
@@ -226,6 +226,6 @@ def stream_jsonl_batched(file_path: Union[str, Path],
                 if max_samples and sample_count >= max_samples:
                     break
         
-        # 返回最后一批（如果有）
+        # Yield the last batch (if any)
         if batch:
             yield batch 
\ No newline at end of file
diff --git a/webmainbench/data/saver.py b/webmainbench/data/saver.py
index 75640ed..b7d26b3 100644
--- a/webmainbench/data/saver.py
+++ b/webmainbench/data/saver.py
@@ -98,7 +98,7 @@ def save_evaluation_results(results: Union["EvaluationResult", Dict[str, Any]],
         else:
             results_dict = results
         
-        # 移除extracted_content和extracted_content_list字段以减少文件大小
+        # Remove extracted_content and extracted_content_list fields to reduce file size
         results_dict = DataSaver._remove_content_fields(results_dict)
         
         if format.lower() == "json":
@@ -131,7 +131,7 @@ def save_summary_report(results: Union["EvaluationResult", List["EvaluationResul
         file_path = Path(file_path)
         file_path.parent.mkdir(parents=True, exist_ok=True)
 
-        # 转换结果为字典列表
+        # Convert results to list of dicts
         def to_dict_if_needed(item):
             return item.to_dict() if hasattr(item, 'to_dict') else item
 
@@ -145,10 +145,10 @@ def to_dict_if_needed(item):
             metadata = result.get('metadata', {})
             error_analysis = result.get('error_analysis', {})
 
-            # 获取抽取器版本
+            # Get extractor version
             extractor_name = metadata.get('extractor_name', 'unknown')
             try:
-                # 映射抽取器名称到包名
+                # Map extractor name to package name
                 package_mapping = {
                     'llm-webkit': 'llm_web_kit',
                     'magic-html': 'magic_html',
@@ -302,18 +302,18 @@ def save_dataset_with_extraction(results: Union["EvaluationResult", Dict[str, An
                         if isinstance(metric_data, dict) and metric_data.get('success', False):
                             sample_dict[f'{current_extractor_name}_{metric_name}_score'] = metric_data.get('score', 0)
 
-                    # 解析预测值（predicted）
+                    # Parse predicted values
                     predicted_content = extraction_result.get('extracted_content', '')
-                    predicted_parts = BaseMetric._extract_from_markdown(predicted_content, field_name="llm_webkit_md")  # 关键：解析预测内容
+                    predicted_parts = BaseMetric._extract_from_markdown(predicted_content, field_name="llm_webkit_md")  # Key: parse predicted content
                     for part_type in ['code', 'formula', 'table', 'text']:
                         sample_dict[f'{current_extractor_name}_predicted_{part_type}'] = predicted_parts.get(part_type, '')
 
-            # 解析真实值（groundtruth）- 只需要解析一次
-            if extractor_names:  # 只有当存在extractor时才解析
+            # Parse groundtruth values - only needs to be parsed once
+            if extractor_names:  # Only parse when extractors exist
                 groundtruth_content = sample_dict.get('groundtruth_content', '')
-                groundtruth_parts = BaseMetric._extract_from_markdown(groundtruth_content, field_name="groundtruth_content")  # 关键：解析真实内容
+                groundtruth_parts = BaseMetric._extract_from_markdown(groundtruth_content, field_name="groundtruth_content")  # Key: parse groundtruth content
                 for part_type in ['code', 'formula', 'table', 'text']:
-                    # 使用第一个extractor的名字作为前缀，或者使用通用前缀
+                    # Use the first extractor name as prefix, or use a generic prefix
                     prefix = extractor_names[0] if len(extractor_names) == 1 else 'groundtruth'
                     sample_dict[f'{prefix}_groundtruth_{part_type}'] = groundtruth_parts.get(part_type, '')
 
@@ -335,17 +335,17 @@ def _save_jsonl_list(data_list: List[Dict[str, Any]], file_path: Union[str, Path
     
     @staticmethod
     def _remove_content_fields(data: Dict[str, Any]) -> Dict[str, Any]:
-        """移除extracted_content和extracted_content_list字段以减少保存文件大小"""
+        """Remove extracted_content and extracted_content_list fields to reduce saved file size."""
         import copy
         
         cleaned_data = copy.deepcopy(data)
         
         def remove_fields(obj):
             if isinstance(obj, dict):
-                # 移除extracted_content和extracted_content_list字段
+                # Remove extracted_content and extracted_content_list fields
                 obj.pop('extracted_content', None)
                 obj.pop('extracted_content_list', None)
-                # 递归处理嵌套字典和列表
+                # Recursively process nested dicts and lists
                 for value in obj.values():
                     if isinstance(value, (dict, list)):
                         remove_fields(value)
@@ -361,16 +361,16 @@ def remove_fields(obj):
     def append_intermediate_results(results: List[Dict[str, Any]], 
                                   file_path: Union[str, Path]) -> None:
         """
-        追加保存中间结果，用于批处理时释放内存。
-        
+        Append and save intermediate results, used for releasing memory during batch processing.
+
         Args:
-            results: 要保存的结果列表
-            file_path: 输出文件路径
+            results: List of results to save
+            file_path: Output file path
         """
         file_path = Path(file_path)
         file_path.parent.mkdir(parents=True, exist_ok=True)
         
-        # 追加模式写入JSONL
+        # Append mode write to JSONL
         with open(file_path, 'a', encoding='utf-8') as f:
             for result in results:
                 json.dump(result, f, ensure_ascii=False)
@@ -381,15 +381,15 @@ def save_streaming_results(results_iterator,
                              file_path: Union[str, Path],
                              batch_size: int = 100) -> int:
         """
-        流式保存评测结果，适用于大数据集处理。
-        
+        Stream and save evaluation results, suitable for large dataset processing.
+
         Args:
-            results_iterator: 结果迭代器
-            file_path: 输出文件路径
-            batch_size: 批次保存大小
-            
+            results_iterator: Results iterator
+            file_path: Output file path
+            batch_size: Batch save size
+
         Returns:
-            int: 保存的结果数量
+            int: Number of saved results
         """
         file_path = Path(file_path)
         file_path.parent.mkdir(parents=True, exist_ok=True)
@@ -402,14 +402,14 @@ def save_streaming_results(results_iterator,
                 batch.append(result)
                 saved_count += 1
                 
-                # 达到批次大小时写入
+                # Write when batch size is reached
                 if len(batch) >= batch_size:
                     for item in batch:
                         json.dump(item, f, ensure_ascii=False)
                         f.write('\n')
                     batch = []
             
-            # 保存最后一批
+            # Save the last batch
             if batch:
                 for item in batch:
                     json.dump(item, f, ensure_ascii=False)
@@ -420,19 +420,19 @@ def save_streaming_results(results_iterator,
     @staticmethod
     def create_streaming_writer(file_path: Union[str, Path]):
         """
-        创建流式写入器，用于逐个保存结果。
-        
+        Create a streaming writer for saving results one by one.
+
         Args:
-            file_path: 输出文件路径
-            
+            file_path: Output file path
+
         Returns:
-            StreamingResultWriter: 流式写入器实例
+            StreamingResultWriter: Streaming writer instance
         """
         return StreamingResultWriter(file_path)
 
 
 class StreamingResultWriter:
-    """流式结果写入器，用于逐个保存评测结果"""
+    """Streaming result writer for saving evaluation results one by one."""
     
     def __init__(self, file_path: Union[str, Path]):
         self.file_path = Path(file_path)
@@ -449,15 +449,15 @@ def __exit__(self, exc_type, exc_val, exc_tb):
             self.file_handle.close()
     
     def write_result(self, result: Dict[str, Any]) -> None:
-        """写入单个结果"""
+        """Write a single result."""
         if self.file_handle:
             json.dump(result, self.file_handle, ensure_ascii=False)
             self.file_handle.write('\n')
-            self.file_handle.flush()  # 确保立即写入
+            self.file_handle.flush()  # Ensure immediate write
             self.count += 1
     
     def get_count(self) -> int:
-        """获取已写入的结果数量"""
+        """Get the number of results written."""
         return self.count
     
     @staticmethod
diff --git a/webmainbench/evaluator/evaluator.py b/webmainbench/evaluator/evaluator.py
index 4b94e3c..699b01c 100644
--- a/webmainbench/evaluator/evaluator.py
+++ b/webmainbench/evaluator/evaluator.py
@@ -199,22 +199,22 @@ def evaluate(self,
         if isinstance(extractor, str):
             extractor = ExtractorFactory.create(extractor, extractor_config)
         
-        # Filter samples if needed (避免不必要的副本)
+        # Filter samples if needed (avoid unnecessary copies)
         samples_iter = dataset.samples
         
-        # 只有在需要过滤时才创建副本
+        # Only create a copy when filtering is needed
         if categories:
             samples_iter = [
                 s for s in samples_iter 
                 if s.content_type in categories
             ]
         
-        # 如果有max_samples限制，使用itertools.islice避免完整列表
+        # If max_samples is set, use itertools.islice to avoid a full list
         if max_samples:
             import itertools
             samples_to_evaluate = list(itertools.islice(samples_iter, max_samples))
         else:
-            # 如果没有任何过滤，直接使用原始列表避免副本
+            # If no filtering at all, use the original list directly to avoid copies
             samples_to_evaluate = samples_iter if not categories else samples_iter
         
         # Run evaluation
@@ -283,19 +283,19 @@ def evaluate_batched(self,
                         categories: Optional[List[str]] = None,
                         output_file: Optional[Union[str, Path]] = None) -> EvaluationResult:
         """
-        分批处理评测，减少内存使用。
-        
+        Process batches of evaluation, reducing memory usage.
+
         Args:
-            jsonl_file_path: JSONL数据集文件路径
-            extractor: BaseExtractor实例或名称
-            batch_size: 批处理大小（默认50）
-            extractor_config: 抽取器配置
-            max_samples: 最大样本数限制
-            categories: 特定类别过滤
-            output_file: 可选的结果输出文件（用于大数据集）
-            
+            jsonl_file_path: Path to JSONL dataset file
+            extractor: BaseExtractor instance or name
+            batch_size: Batch processing size (default 50)
+            extractor_config: Extractor configuration
+            max_samples: Maximum sample count limit
+            categories: Specific category filter
+            output_file: Optional result output file (for large datasets)
+
         Returns:
-            EvaluationResult实例
+            EvaluationResult instance
         """
         # Create extractor if string name provided
         if isinstance(extractor, str):
@@ -303,7 +303,7 @@ def evaluate_batched(self,
         
         jsonl_file_path = Path(jsonl_file_path)
         
-        # 统计信息
+        # Statistics
         total_samples = 0
         processed_samples = 0
         all_sample_results = []
@@ -316,14 +316,14 @@ def evaluate_batched(self,
         
         start_time = time.time()
         
-        # 使用DataLoader的流式批处理方法
+        # Use DataLoader's streaming batch method
         for batch_samples in DataLoader.stream_jsonl_batched(
             file_path=jsonl_file_path,
             batch_size=batch_size,
             categories=categories,
             max_samples=max_samples
         ):
-            # 处理当前批次
+            # Process current batch
             batch_results, batch_errors = self._process_batch(batch_samples, extractor)
             all_sample_results.extend(batch_results)
             all_extraction_errors.extend(batch_errors)
@@ -333,19 +333,19 @@ def evaluate_batched(self,
             
             print(f"   Processed: {processed_samples} samples")
             
-            # 如果有输出文件，可以立即写入避免内存累积
+            # If output file is provided, write immediately to avoid memory accumulation
             if output_file and len(all_sample_results) > 1000:
                 DataSaver.append_intermediate_results(all_sample_results, output_file)
-                all_sample_results = []  # 清空已保存的结果
+                all_sample_results = []  # Clear saved results
         
         end_time = time.time()
         print(f"✅ Batched evaluation finished")
         print(f"   Elapsed: {end_time - start_time:.2f}s")
         print(f"   Samples processed: {processed_samples}")
         
-        # 聚合结果
+        # Aggregate results
         overall_metrics = self._aggregate_metrics(all_sample_results)
-        # 批处理模式下跳过分类指标（为了节约内存，不保存样本列表）
+        # Skip category metrics in batch mode (to save memory, sample lists are not retained)
         category_metrics = None
         error_analysis = self._analyze_errors(all_extraction_errors, all_sample_results)
         
@@ -365,7 +365,7 @@ def evaluate_batched(self,
         return evaluation_result
     
     def _process_batch(self, batch_samples: List[DataSample], extractor: BaseExtractor) -> tuple[List[Dict[str, Any]], List[Dict[str, str]]]:
-        """处理一批样本"""
+        """Process a batch of samples."""
         batch_results = []
         batch_errors = []
         
@@ -374,7 +374,7 @@ def _process_batch(self, batch_samples: List[DataSample], extractor: BaseExtract
                 sample_result = self._evaluate_sample(sample, extractor)
                 batch_results.append(sample_result)
                 
-                # 收集错误信息
+                # Collect error information
                 if not sample_result.get('extraction_success', False):
                     batch_errors.append({
                         'sample_id': sample.id,
@@ -398,7 +398,7 @@ def _evaluate_sample(self, sample: DataSample, extractor: BaseExtractor) -> Dict
         if extractor.__class__.__name__ == 'TestModelExtractor':
             extraction_result = extractor.extract_from_sample(sample)
         elif extractor.__class__.__name__ == 'LlmWebkitExtractor':
-            # LlmWebkitExtractor可以接受DataSample对象来支持预处理HTML
+            # LlmWebkitExtractor can accept DataSample objects to support pre-processed HTML
             extraction_result = extractor.extract(sample, sample.url)
         else:
             # Extract content
@@ -479,23 +479,23 @@ def _aggregate_metrics(self, sample_results: List[Dict[str, Any]]) -> Dict[str,
         #
         # return aggregated_metrics
         """
-            聚合所有样本的指标，计算全局平均值（每个指标单独聚合）
+            Aggregate metrics across all samples, computing global averages (each metric aggregated independently).
             """
         if not sample_results:
             return {}
 
-        # 初始化每个指标的总分和样本数
+        # Initialize total score and sample count for each metric
         metric_totals = {
             "text_edit": 0.0,
             "code_edit": 0.0,
             "table_edit": 0.0,
             "table_TEDS": 0.0,
             "formula_edit": 0.0,
-            "overall": 0.0  # 全局overall单独计算
+            "overall": 0.0  # Global overall calculated separately
         }
-        metric_counts = {k: 0 for k in metric_totals.keys()}  # 记录每个指标有效样本数
+        metric_counts = {k: 0 for k in metric_totals.keys()}  # Record valid sample count for each metric
 
-        # 累加所有样本的指标分数
+        # Accumulate metric scores across all samples
         for sample in sample_results:
             metrics = sample.get("metrics", {})
             for metric_name in metric_totals.keys():
@@ -503,16 +503,16 @@ def _aggregate_metrics(self, sample_results: List[Dict[str, Any]]) -> Dict[str,
                     metric_totals[metric_name] += metrics[metric_name]["score"]
                     metric_counts[metric_name] += 1
 
-        # 计算每个指标的平均值（全局overall为5个单项指标的平均值）
+        # Calculate average per metric (global overall = average of 5 core metrics)
         overall_metrics = {}
         for metric_name in metric_totals.keys():
             if metric_counts[metric_name] > 0:
                 overall_metrics[metric_name] = metric_totals[metric_name] / metric_counts[metric_name]
             else:
-                overall_metrics[metric_name] = 0.0  # 无有效样本时默认为0
+                overall_metrics[metric_name] = 0.0  # Default to 0 when no valid samples
 
-        # 特别处理全局overall：固定为5个单项指标的平均值（无论单项是否有有效样本）
-        # 排除样本级overall，仅用5个核心指标计算全局overall
+        # Handle global overall specially: fixed as average of 5 core metrics (regardless of valid samples)
+        # Exclude sample-level overall, use only 5 core metrics for global overall
         core_metrics = ["text_edit", "code_edit", "table_edit", "table_TEDS", "formula_edit"]
         core_scores = [overall_metrics[metric] for metric in core_metrics]
         overall_metrics["overall"] = sum(core_scores) / len(core_metrics)
diff --git a/webmainbench/evaluator/main_html_evaluator.py b/webmainbench/evaluator/main_html_evaluator.py
index 4d8cff2..b9304cc 100644
--- a/webmainbench/evaluator/main_html_evaluator.py
+++ b/webmainbench/evaluator/main_html_evaluator.py
@@ -49,22 +49,22 @@ def evaluate(self,
         if isinstance(extractor, str):
             extractor = ExtractorFactory.create(extractor, extractor_config)
         
-        # Filter samples if needed (避免不必要的副本)
+        # Filter samples if needed (avoid unnecessary copies)
         samples_iter = dataset.samples
         
-        # 只有在需要过滤时才创建副本
+        # Only create a copy when filtering is needed
         if categories:
             samples_iter = [
                 s for s in samples_iter 
                 if s.content_type in categories
             ]
         
-        # 如果有max_samples限制，使用itertools.islice避免完整列表
+        # If max_samples is set, use itertools.islice to avoid loading full list
         if max_samples:
             import itertools
             samples_to_evaluate = list(itertools.islice(samples_iter, max_samples))
         else:
-            # 如果没有任何过滤，直接使用原始列表避免副本
+            # If no filtering at all, use original list directly to avoid copies
             samples_to_evaluate = samples_iter if not categories else samples_iter
         
         # Run evaluation
@@ -186,20 +186,18 @@ def _evaluate_sample(self, sample: DataSample, extractor: BaseExtractor) -> Dict
         return sample_result
     
     def _aggregate_metrics(self, sample_results: List[Dict[str, Any]]) -> Dict[str, float]:
-        """
-            聚合所有样本的指标，计算全局平均值（每个指标单独聚合）
-            """
+        """Aggregate metrics across all samples, computing global averages (each metric aggregated independently)."""
         if not sample_results:
             return {}
 
-        # 初始化每个指标的总分和样本数
+        # Initialize totals and counts per metric
         metric_totals = {
             "human_rouge_n": 0.0,
             "convert_rouge_n": 0.0,
         }
-        metric_counts = {k: 0 for k in metric_totals.keys()}  # 记录每个指标有效样本数
+        metric_counts = {k: 0 for k in metric_totals.keys()}  # Track valid sample count per metric
 
-        # 累加所有样本的指标分数
+        # Accumulate metric scores across all samples
         for sample in sample_results:
             metrics = sample.get("metrics", {})
             for metric_name in metric_totals.keys():
@@ -207,13 +205,13 @@ def _aggregate_metrics(self, sample_results: List[Dict[str, Any]]) -> Dict[str,
                     metric_totals[metric_name] += metrics[metric_name]["score"]
                     metric_counts[metric_name] += 1
 
-        # 计算每个指标的平均值（全局overall为5个单项指标的平均值）
+        # Calculate average per metric
         overall_metrics = {}
         for metric_name in metric_totals.keys():
             if metric_counts[metric_name] > 0:
                 overall_metrics[metric_name] = metric_totals[metric_name] / metric_counts[metric_name]
             else:
-                overall_metrics[metric_name] = 0.0  # 无有效样本时默认为0
+                overall_metrics[metric_name] = 0.0  # Default to 0 when no valid samples
 
         return overall_metrics
 
diff --git a/webmainbench/extractors/base.py b/webmainbench/extractors/base.py
index 581725c..bc4eddf 100644
--- a/webmainbench/extractors/base.py
+++ b/webmainbench/extractors/base.py
@@ -108,21 +108,21 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
         
     def extract_from_sample(self, sample: Dict[str, Any]) -> ExtractionResult:
         """
-        直接从数据样本（如评测数据集的dict）中读取groundtruth内容，返回ExtractionResult。
-        适用于评测流程的基线测试或简单抽取器。
+        Read groundtruth content directly from a data sample (e.g., evaluation dataset dict) and return ExtractionResult.
+        Suitable for baseline testing or simple extractors in the evaluation pipeline.
 
-        参数:
-            sample: 包含groundtruth内容的数据样本dict
+        Args:
+            sample: Data sample dict containing groundtruth content
 
-        返回:
-            ExtractionResult实例，内容直接取自sample
+        Returns:
+            ExtractionResult instance with content taken directly from sample
         """
-        # 兼容常见字段
-        # 这里直接从sample中获取'llm-webkit_md'字段内容，注意字段名有'-'，不能用点操作符，需要用[]方式
+        # Handle common fields
+        # Directly get 'llm-webkit_md' field content from sample. Note: field name contains '-', cannot use dot operator, must use [] notation
         content = sample.llm_webkit_md
         content_list = sample.content_list
         language = sample.language
-        # 置信度直接设为1.0，表示“完美抽取”
+        # Set confidence to 1.0, indicating "perfect extraction"
         confidence_score = 1.0
 
         return ExtractionResult(
diff --git a/webmainbench/extractors/dripper_extractor.py b/webmainbench/extractors/dripper_extractor.py
index 3251a8b..5a149f4 100644
--- a/webmainbench/extractors/dripper_extractor.py
+++ b/webmainbench/extractors/dripper_extractor.py
@@ -27,11 +27,11 @@ def __init__(self, name: str, config: Optional[Dict[str, Any]] = None):
         except ImportError:
             raise ImportError("Please install dripper package")
         
-        # 先初始化inference_config，再调用父类初始化（因为父类会调用_setup()）
+        # Initialize inference_config first, then call parent initialization (since parent calls _setup())
         self.dripper = Dripper(config)
         self.html2text = HTML2TextWrapper()
 
-        # 现在可以安全地调用父类初始化（会调用_setup()）
+        # Now it is safe to call parent initialization (which calls _setup())
         super().__init__(name, config)
     
     def _setup(self) -> None:
@@ -40,14 +40,14 @@ def _setup(self) -> None:
 
     def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
         """
-        使用高级LLM推理提取内容.
+        Extract content using advanced LLM inference.
         
         Args:
-            html: HTML内容。如果配置了use_preprocessed_html=True，则由Evaluator传入预处理的HTML内容
-            url: 可选的页面URL
+            html: HTML content. If use_preprocessed_html=True is configured, preprocessed HTML is passed in by Evaluator.
+            url: Optional page URL
             
         Returns:
-            ExtractionResult实例
+            ExtractionResult instance
         """
         start_time = time.time()
         
@@ -60,7 +60,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
 
             extraction_time = time.time() - start_time
             
-            # 创建结果对象
+            # Create result object
             result = ExtractionResult(
                 content=main_content,
                 main_html=main_html,
@@ -71,7 +71,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
                 success=True
             )
             
-            # 添加调试信息到错误消息字段（用于开发调试）
+            # Add debug info to error message field (for development debugging)
             return result
             
         except Exception as e:
@@ -84,7 +84,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             )
     
     def _extract_title(self, html: str) -> Optional[str]:
-        """提取页面标题."""
+        """Extract page title."""
         try:
             import re
             title_match = re.search(r'<title[^>]*>(.*?)</title>', html, re.IGNORECASE | re.DOTALL)
diff --git a/webmainbench/extractors/llm_webkit_extractor.py b/webmainbench/extractors/llm_webkit_extractor.py
index f9c4e15..dd2edf9 100644
--- a/webmainbench/extractors/llm_webkit_extractor.py
+++ b/webmainbench/extractors/llm_webkit_extractor.py
@@ -19,17 +19,17 @@ class LLMInferenceConfig:
     """Configuration for LLM inference."""
     model_path: str = "/path/to/your/model"
     use_logits_processor: bool = True
-    max_tokens: int = 32768         # 最大输入token数
+    max_tokens: int = 32768         # Maximum input token count
     temperature: float = 0.0
     top_p: float = 0.95
-    max_output_tokens: int = 8192   # 最大输出token数
-    tensor_parallel_size: int = 1   # 张量并行大小
-    dtype: str = "bfloat16"         # 数据类型
-    max_item_count: int = 1000      # 最大item数量
-    gpu_memory_utilization: float = 0.8  # GPU内存利用率
-    enforce_eager: bool = True      # 使用eager模式
-    use_preprocessed_html: bool = False  # 是否使用预处理的HTML（跳过HTML简化步骤）
-    preprocessed_html_field: str = "llm_webkit_html"  # 预处理HTML字段名
+    max_output_tokens: int = 8192   # Maximum output token count
+    tensor_parallel_size: int = 1   # Tensor parallel size
+    dtype: str = "bfloat16"         # Data type
+    max_item_count: int = 1000      # Maximum item count
+    gpu_memory_utilization: float = 0.8  # GPU memory utilization
+    enforce_eager: bool = True      # Use eager mode
+    use_preprocessed_html: bool = False  # Whether to use preprocessed HTML (skip HTML simplification step)
+    preprocessed_html_field: str = "llm_webkit_html"  # Preprocessed HTML field name
 
 
 class TokenState(Enum):
@@ -158,7 +158,7 @@ class LlmWebkitExtractor(BaseExtractor):
     version = "4.0.1"
     description = "Advanced LLM-WebKit extractor with intelligent content classification"
     
-    # 分类提示模板
+    # Classification prompt template
     CLASSIFICATION_PROMPT = """As a front-end engineering expert in HTML, your task is to analyze the given HTML structure and accurately classify elements with the _item_id attribute as either "main" (primary content) or "other" (supplementary content). Your goal is to precisely extract the primary content of the page, ensuring that only the most relevant information is labeled as "main" while excluding navigation, metadata, and other non-essential elements. 
 
 Guidelines for Classification:
@@ -210,7 +210,7 @@ class LlmWebkitExtractor(BaseExtractor):
 Output format should be a JSON-formatted string representing a dictionary where keys are item_id strings and values are either 'main' or 'other'. Make sure to include ALL item_ids from the input HTML."""
 
     def __init__(self, name: str, config: Optional[Dict[str, Any]] = None):
-        # 先初始化inference_config，再调用父类初始化（因为父类会调用_setup()）
+        # Initialize inference_config first, then call parent initialization (since parent calls _setup())
         self.inference_config = LLMInferenceConfig()
         self.model = None
         self.tokenizer = None
@@ -222,37 +222,37 @@ def __init__(self, name: str, config: Optional[Dict[str, Any]] = None):
                 if hasattr(self.inference_config, key):
                     setattr(self.inference_config, key, value)
         
-        # 现在可以安全地调用父类初始化（会调用_setup()）
+        # Now it is safe to call parent initialization (which calls _setup())
         super().__init__(name, config)
-    
+
     def _setup(self) -> None:
         """Setup the LLM-WebKit extractor with advanced inference capabilities."""
-        # 初始化模块引用
+        # Initialize module references
         self._simplify_html = None
         self._PreDataJson = None
         self._PreDataJsonKey = None
         self._MapItemToHtmlTagsParser = None
         self._SamplingParams = None
         self._model_loaded = False
-        
-        # 检查各个依赖模块的可用性
+
+        # Check availability of each dependency module
         missing_modules = []
-        
-        # 如果使用预处理HTML模式，只需要检查llm_web_kit的基础功能
+
+        # If using preprocessed HTML mode, only need to check llm_web_kit basic functionality
         if self.inference_config.use_preprocessed_html:
-            # 预处理HTML模式：只检查内容提取相关的依赖
+            # Preprocessed HTML mode: only check content extraction related dependencies
             try:
                 from llm_web_kit.main_html_parser.parser.tag_mapping import MapItemToHtmlTagsParser
                 self._MapItemToHtmlTagsParser = MapItemToHtmlTagsParser
             except ImportError as e:
                 missing_modules.append(f"llm_web_kit (content extraction): {e}")
             
-            # 设置可用性标志（预处理模式下不需要LLM）
+            # Set availability flags (LLM not required in preprocessed mode)
             self._transformers_available = False
             self._vllm_available = False
         else:
-            # 标准模式：检查完整的依赖
-            # 检查 llm_web_kit
+            # Standard mode: check full dependencies
+            # Check llm_web_kit
             try:
                 from llm_web_kit.main_html_parser.simplify_html.simplify_html import simplify_html
                 from llm_web_kit.input.pre_data_json import PreDataJson, PreDataJsonKey
@@ -266,15 +266,15 @@ def _setup(self) -> None:
             except ImportError as e:
                 missing_modules.append(f"llm_web_kit: {e}")
             
-            # 检查 transformers（延迟到实际使用时）
+            # Check transformers (deferred until actual use)
             self._transformers_available = False
             try:
                 import transformers
                 self._transformers_available = True
             except ImportError as e:
                 missing_modules.append(f"transformers: {e}")
-            
-            # 检查 vllm（延迟到实际使用时）
+
+            # Check vllm (deferred until actual use)
             self._vllm_available = False
             try:
                 import vllm
@@ -284,7 +284,7 @@ def _setup(self) -> None:
             except ImportError as e:
                 missing_modules.append(f"vllm: {e}")
         
-        # 如果关键模块缺失，提供详细的错误信息
+        # If critical modules are missing, provide detailed error messages
         if missing_modules:
             if self.inference_config.use_preprocessed_html:
                 error_msg = "LLM-WebKit extractor (preprocessed HTML mode) requires:\n"
@@ -302,100 +302,100 @@ def _setup(self) -> None:
             raise RuntimeError(error_msg)
     
     def _load_model(self):
-        """延迟加载LLM模型和tokenizer."""
+        """Lazily load the LLM model and tokenizer."""
         if self._model_loaded:
             return
-        
-        # 检查依赖是否可用
+
+        # Check if dependencies are available
         if not self._transformers_available:
             raise RuntimeError("transformers library is not available. Please install it: pip install transformers")
         
         import torch
-        
-        # 检测运行环境
+
+        # Detect runtime environment
         is_apple_silicon = hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()
         has_cuda = torch.cuda.is_available()
-        
-        print(f"🔍 检测到运行环境:")
+
+        print(f"Detected runtime environment:")
         print(f"   CUDA: {has_cuda}")
         print(f"   Apple Silicon (MPS): {is_apple_silicon}")
-        
-        # 对于Apple Silicon，优先使用transformers而不是vLLM（避免兼容性问题）
+
+        # For Apple Silicon, prefer transformers over vLLM (to avoid compatibility issues)
         if is_apple_silicon and not has_cuda:
-            print("🍎 Apple Silicon环境检测到，使用transformers模式以避免vLLM兼容性问题")
+            print("Apple Silicon environment detected, using transformers mode to avoid vLLM compatibility issues")
             self._load_transformers_model()
         else:
-            # 其他环境尝试使用vLLM
+            # Other environments: attempt to use vLLM
             if not self._vllm_available:
-                print("⚠️  vLLM不可用，回退到transformers模式")
+                print("vLLM not available, falling back to transformers mode")
                 self._load_transformers_model()
             else:
                 self._load_vllm_model()
     
     def _load_transformers_model(self):
-        """使用transformers加载模型（兼容性更好）"""
+        """Load model using transformers (better compatibility)."""
         try:
             from transformers import AutoTokenizer, AutoModelForCausalLM
             import torch
-            
-            print(f"📦 使用transformers加载模型: {self.inference_config.model_path}")
-            
-            # 加载tokenizer
+
+            print(f"Loading model with transformers: {self.inference_config.model_path}")
+
+            # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(
                 self.inference_config.model_path, 
                 trust_remote_code=True
             )
             
-            # 设置设备
+            # Set device
             if torch.cuda.is_available():
                 device = "cuda"
                 torch_dtype = torch.bfloat16 if self.inference_config.dtype == "bfloat16" else torch.float16
             elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
                 device = "mps"
-                torch_dtype = torch.float16  # MPS目前不支持bfloat16
+                torch_dtype = torch.float16  # MPS does not currently support bfloat16
             else:
                 device = "cpu"
                 torch_dtype = torch.float32
-            
-            print(f"🎯 使用设备: {device}, 数据类型: {torch_dtype}")
-            
-            # 加载模型
+
+            print(f"Using device: {device}, dtype: {torch_dtype}")
+
+            # Load model
             self.model = AutoModelForCausalLM.from_pretrained(
                 self.inference_config.model_path,
                 trust_remote_code=True,
                 torch_dtype=torch_dtype,
-                device_map=device if device != "mps" else None  # MPS不支持device_map
+                device_map=device if device != "mps" else None  # MPS does not support device_map
             )
             
             if device == "mps":
                 self.model = self.model.to(device)
             
             self.model.eval()
-            
-            # 标记为transformers模式
+
+            # Mark as transformers mode
             self._use_transformers = True
             self._model_loaded = True
-            
-            print("✅ transformers模型加载成功!")
-            
+
+            print("Transformers model loaded successfully!")
+
         except Exception as e:
             raise RuntimeError(f"Failed to load transformers model: {e}")
-    
+
     def _load_vllm_model(self):
-        """使用vLLM加载模型（高性能但兼容性要求高）"""
+        """Load model using vLLM (high performance but strict compatibility requirements)."""
         try:
             from transformers import AutoTokenizer
             from vllm import LLM
-            
-            print(f"⚡ 使用vLLM加载模型: {self.inference_config.model_path}")
-            
-            # 加载tokenizer
+
+            print(f"Loading model with vLLM: {self.inference_config.model_path}")
+
+            # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(
                 self.inference_config.model_path, 
                 trust_remote_code=True
             )
             
-            # vLLM配置 - 参考ray_test_qa.py的简化配置
+            # vLLM configuration - simplified config based on ray_test_qa.py
             model_kwargs = {
                 "model": self.inference_config.model_path,
                 "trust_remote_code": True,
@@ -403,30 +403,30 @@ def _load_vllm_model(self):
                 "tensor_parallel_size": self.inference_config.tensor_parallel_size,
             }
             
-            print(f"🔧 vLLM配置: {model_kwargs}")
-            
+            print(f"vLLM configuration: {model_kwargs}")
+
             self.model = LLM(**model_kwargs)
-            
-            # 初始化token状态管理器
+
+            # Initialize token state manager
             if self.inference_config.use_logits_processor:
                 self.token_state_manager = TokenStateManager(self.tokenizer)
-            
-            # 标记为vLLM模式
+
+            # Mark as vLLM mode
             self._use_transformers = False
             self._model_loaded = True
-            
-            print("✅ vLLM模型加载成功!")
-            
+
+            print("vLLM model loaded successfully!")
+
         except Exception as e:
-            print(f"❌ vLLM加载失败: {e}")
-            raise RuntimeError(f"vLLM模型加载失败: {e}")
+            print(f"vLLM loading failed: {e}")
+            raise RuntimeError(f"vLLM model loading failed: {e}")
     
     def _create_prompt(self, simplified_html: str) -> str:
-        """创建分类提示."""
+        """Create the classification prompt."""
         return self.CLASSIFICATION_PROMPT.format(alg_html=simplified_html)
-    
+
     def _add_template(self, prompt: str) -> str:
-        """添加聊天模板."""
+        """Add the chat template."""
         messages = [
             {"role": "user", "content": prompt}
         ]
@@ -439,18 +439,18 @@ def _add_template(self, prompt: str) -> str:
         return chat_prompt
     
     def _generate_with_transformers(self, prompt: str) -> str:
-        """使用transformers生成文本"""
+        """Generate text using transformers."""
         try:
             import torch
-            
-            # Tokenize输入
+
+            # Tokenize input
             inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=self.inference_config.max_tokens)
             
-            # 移动到正确的设备
+            # Move to the correct device
             device = self.model.device
             inputs = {k: v.to(device) for k, v in inputs.items()}
-            
-            # 生成配置
+
+            # Generation configuration
             generation_config = {
                 "max_new_tokens": self.inference_config.max_output_tokens,
                 "temperature": self.inference_config.temperature,
@@ -460,73 +460,73 @@ def _generate_with_transformers(self, prompt: str) -> str:
                 "eos_token_id": self.tokenizer.eos_token_id,
             }
             
-            print(f"🔄 开始生成文本 (max_new_tokens: {generation_config['max_new_tokens']})")
-            
-            # 生成
+            print(f"Starting text generation (max_new_tokens: {generation_config['max_new_tokens']})")
+
+            # Generate
             with torch.no_grad():
                 outputs = self.model.generate(
                     **inputs,
                     **generation_config
                 )
             
-            # 解码输出（只取新生成的部分）
+            # Decode output (only take newly generated portion)
             input_length = inputs['input_ids'].shape[1]
             generated_ids = outputs[0][input_length:]
             generated_text = self.tokenizer.decode(generated_ids, skip_special_tokens=True)
-            
-            print(f"✅ 生成完成，输出长度: {len(generated_text)}")
-            print(f"🔍 LLM原始输出: {repr(generated_text[:200])}")  # 显示前200字符用于调试
-            
-            # 提取JSON部分
+
+            print(f"Generation complete, output length: {len(generated_text)}")
+            print(f"LLM raw output: {repr(generated_text[:200])}")  # Show first 200 chars for debugging
+
+            # Extract JSON part
             json_result = self._extract_json_from_text(generated_text)
-            print(f"🔍 提取的JSON: {repr(json_result[:200])}")  # 显示JSON结果
+            print(f"Extracted JSON: {repr(json_result[:200])}")  # Show JSON result
             return json_result
-            
+
         except Exception as e:
-            print(f"⚠️  transformers生成失败: {e}")
-            raise RuntimeError(f"transformers生成失败: {e}")
+            print(f"transformers generation failed: {e}")
+            raise RuntimeError(f"transformers generation failed: {e}")
     
     def _extract_json_from_text(self, text: str) -> str:
-        """从生成的文本中提取JSON"""
-        # 查找JSON部分
+        """Extract JSON from generated text."""
+        # Find JSON portion
         start_idx = text.find("{")
         end_idx = text.rfind("}") + 1
-        
+
         if start_idx != -1 and end_idx != 0:
             json_str = text[start_idx:end_idx]
-            # 清理JSON
+            # Clean up JSON
             json_str = json_str.strip()
             json_str = re.sub(r',\s*}', '}', json_str)
             try:
-                # 验证JSON
+                # Validate JSON
                 json.loads(json_str)
                 return json_str
             except:
                 pass
-        
+
         return "{}"
 
     def _clean_output(self, output) -> str:
-        """清理LLM输出，提取JSON."""
+        """Clean LLM output and extract JSON."""
         prediction = output[0].outputs[0].text
-        
-        # 提取JSON
+
+        # Extract JSON
         start_idx = prediction.rfind("{")
         end_idx = prediction.rfind("}") + 1
-        
+
         if start_idx != -1 and end_idx != -1:
             json_str = prediction[start_idx:end_idx]
-            json_str = re.sub(r',\s*}', '}', json_str)  # 清理JSON
+            json_str = re.sub(r',\s*}', '}', json_str)  # Clean up JSON
             try:
-                json.loads(json_str)  # 验证
+                json.loads(json_str)  # Validate
                 return json_str
             except:
                 return "{}"
         else:
             return "{}"
-    
+
     def _reformat_classification_result(self, json_str: str) -> Dict[str, int]:
-        """重新格式化分类结果."""
+        """Reformat classification result."""
         try:
             data = json.loads(json_str)
             return {"item_id " + k: 1 if v == "main" else 0 for k, v in data.items()}
@@ -534,20 +534,20 @@ def _reformat_classification_result(self, json_str: str) -> Dict[str, int]:
             return {}
     
     def _reconstruct_content(self, original_html: str, classification_result: Dict[str, int], url: str = None) -> tuple:
-        """根据分类结果重建主要内容."""
+        """Reconstruct main content based on classification results."""
         try:
-            # 按照ray_test_qa.py的正确流程
-            # 第一步：使用MapItemToHtmlTagsParser生成main_html
+            # Follow the correct flow from ray_test_qa.py
+            # Step 1: Use MapItemToHtmlTagsParser to generate main_html
             main_html = self._generate_main_html_with_parser(original_html, classification_result)
-            print(f"🔧 MapItemToHtmlTagsParser生成的main_html长度: {len(main_html)}")
-            
+            print(f"MapItemToHtmlTagsParser generated main_html length: {len(main_html)}")
+
             if not main_html.strip():
-                print("⚠️  没有生成main_html，返回空结果")
+                print("No main_html generated, returning empty result")
                 return "", []
-            
-            # 第二步：使用llm-webkit的方法将main_html提取成content，传入URL
+
+            # Step 2: Use llm-webkit method to extract content from main_html, pass in URL
             content, content_list = self._extract_content_from_main_html(main_html, url)
-            print(f"✅ content提取成功: {len(content)}字符, {len(content_list)}个内容块")
+            print(f"Content extraction successful: {len(content)} chars, {len(content_list)} content blocks")
             
             return content, content_list
             
@@ -556,130 +556,130 @@ def _reconstruct_content(self, original_html: str, classification_result: Dict[s
             return "", []
     
     def _generate_main_html_with_parser(self, original_html: str, classification_result: Dict[str, int]) -> str:
-        """使用MapItemToHtmlTagsParser生成main_html（按照ray_test_qa.py的流程）"""
+        """Generate main_html using MapItemToHtmlTagsParser (following ray_test_qa.py flow)."""
         try:
-            # 获取typical_raw_tag_html (简化的HTML)
+            # Get typical_raw_tag_html (simplified HTML)
             simplified_html, typical_raw_tag_html, _ = self._simplify_html(original_html)
-            print(f"🔧 simplified HTML长度: {len(simplified_html)}")
-            print(f"🔧 typical_raw_tag_html长度: {len(typical_raw_tag_html)}")
-            
-            # 按照ray_test_qa.py的流程
+            print(f"Simplified HTML length: {len(simplified_html)}")
+            print(f"typical_raw_tag_html length: {len(typical_raw_tag_html)}")
+
+            # Follow the flow from ray_test_qa.py
             pre_data = self._PreDataJson({})
             pre_data[self._PreDataJsonKey.LLM_RESPONSE] = classification_result
             pre_data[self._PreDataJsonKey.TYPICAL_RAW_HTML] = original_html
             pre_data[self._PreDataJsonKey.TYPICAL_RAW_TAG_HTML] = typical_raw_tag_html
-            
-            print(f"🔧 PreDataJson设置完成，开始解析...")
-            
-            # 使用MapItemToHtmlTagsParser解析
+
+            print(f"PreDataJson setup complete, starting parsing...")
+
+            # Parse using MapItemToHtmlTagsParser
             parser = self._MapItemToHtmlTagsParser({})
             pre_data = parser.parse_single(pre_data)
-            
-            # 获取生成的main_html
+
+            # Get generated main_html
             main_html = pre_data.get(self._PreDataJsonKey.TYPICAL_MAIN_HTML, "")
-            
-            print(f"✅ MapItemToHtmlTagsParser完成，main_html长度: {len(main_html)}")
+
+            print(f"MapItemToHtmlTagsParser complete, main_html length: {len(main_html)}")
             return main_html
-            
+
         except Exception as e:
-            print(f"❌ MapItemToHtmlTagsParser失败: {e}")
+            print(f"MapItemToHtmlTagsParser failed: {e}")
             return ""
-    
+
     def _extract_content_from_main_html(self, main_html: str, url: str = None) -> tuple:
-        """使用llm-webkit的方法将main_html提取成content"""
+        """Extract content from main_html using llm-webkit method."""
         import traceback
         try:
             from llm_web_kit.simple import extract_content_from_main_html
-            
-            print(f"🔧 开始使用llm-webkit简单接口提取content...")
-            
-            # 使用简单接口提取markdown，传入URL
+
+            print(f"Starting content extraction using llm-webkit simple interface...")
+
+            # Use simple interface to extract markdown, pass in URL
             content = extract_content_from_main_html(url or "", main_html)
-            
-            print(f"✅ llm-webkit提取完成: {len(content)}字符")
-            
-            # 暂不构建content_list，直接返回空列表
+
+            print(f"llm-webkit extraction complete: {len(content)} chars")
+
+            # content_list construction deferred; return empty list for now
             return content.strip(), []
-            
+
         except Exception as e:
-            print(f"❌ llm-webkit提取失败: {e}")
-            print(f"❌ 错误详情: {traceback.format_exc()}")
-            raise RuntimeError(f"llm-webkit提取失败: {str(e)}") from e
+            print(f"llm-webkit extraction failed: {e}")
+            print(f"Error details: {traceback.format_exc()}")
+            raise RuntimeError(f"llm-webkit extraction failed: {str(e)}") from e
 
 
     def extract(self, html_or_sample, url: str = None) -> ExtractionResult:
         """
-        重写extract方法以支持预处理HTML模式
-        
+        Override extract method to support preprocessed HTML mode.
+
         Args:
-            html_or_sample: HTML字符串或DataSample对象
-            url: 可选的页面URL
-            
+            html_or_sample: HTML string or DataSample object
+            url: Optional page URL
+
         Returns:
-            ExtractionResult实例
+            ExtractionResult instance
         """
-        # 判断输入类型
-        if type(html_or_sample).__name__ == 'DataSample':  # 这是一个DataSample对象
+        # Determine input type
+        if type(html_or_sample).__name__ == 'DataSample':  # This is a DataSample object
             sample = html_or_sample
-            
-            # 检查是否使用预处理的HTML
+
+            # Check whether to use preprocessed HTML
             try:
                 if self.inference_config.use_preprocessed_html:
                     preprocessed_field = self.inference_config.preprocessed_html_field
-                    
-                    # 从sample中获取预处理的HTML内容
+
+                    # Get preprocessed HTML content from sample
                     if hasattr(sample, preprocessed_field):
                         preprocessed_html = getattr(sample, preprocessed_field)
-                        print(f"📥 使用预处理HTML字段: {preprocessed_field}")
+                        print(f"Using preprocessed HTML field: {preprocessed_field}")
                         return super().extract(preprocessed_html, sample.url)
             except Exception as e:
                 return ExtractionResult.create_error_result(
-                    f"访问预处理HTML字段 {preprocessed_field} 时发生异常: {str(e)}"
+                    f"Exception while accessing preprocessed HTML field {preprocessed_field}: {str(e)}"
                 )
         else:
-            # 这是普通的HTML字符串，使用标准处理
+            # This is a plain HTML string, use standard processing
             return super().extract(html_or_sample, url)
 
     def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
         """
-        使用高级LLM推理提取内容.
-        
+        Extract content using advanced LLM inference.
+
         Args:
-            html: HTML内容。如果配置了use_preprocessed_html=True，则由Evaluator传入预处理的HTML内容
-            url: 可选的页面URL
-            
+            html: HTML content. If use_preprocessed_html=True is configured, the Evaluator passes in preprocessed HTML content.
+            url: Optional page URL
+
         Returns:
-            ExtractionResult实例
+            ExtractionResult instance
         """
         start_time = time.time()
-        
+
         try:
-            # 检查是否使用预处理的HTML（跳过HTML简化步骤）
+            # Check whether to use preprocessed HTML (skip HTML simplification step)
             if self.inference_config.use_preprocessed_html:
-                # 传入的html已经是预处理的内容（由Evaluator从指定字段提取），直接用作main_html
-                print(f"📥 使用预处理HTML，跳过HTML简化步骤")
+                # The passed-in html is already preprocessed content (extracted from the specified field by Evaluator), use it directly as main_html
+                print(f"Using preprocessed HTML, skipping HTML simplification step")
                 content, content_list = self._extract_content_from_main_html(html, url)
                 
                 extraction_time = time.time() - start_time
-                
-                # 创建结果对象
+
+                # Create result object
                 result = ExtractionResult(
                     content=content,
                     # content_list=content_list,
-                    title=self._extract_title(html),  # 从主内容提取标题
+                    title=self._extract_title(html),  # Extract title from main content
                     language=self._detect_language(content),
-                    confidence_score=0.9,  # 预处理HTML的置信度设为0.9
+                    confidence_score=0.9,  # Confidence score for preprocessed HTML is set to 0.9
                     extraction_time=extraction_time,
                     success=True
                 )
-                
+
                 return result
-            
-            # 标准流程：HTML简化 + LLM推理
-            # 步骤1: HTML简化处理
+
+            # Standard flow: HTML simplification + LLM inference
+            # Step 1: HTML simplification
             simplified_html, typical_raw_tag_html, _ = self._simplify_html(html)
-            
-            # 步骤2: 检查长度限制
+
+            # Step 2: Check length limit
             item_count = simplified_html.count('_item_id')
             if item_count > self.inference_config.max_item_count:
                 return ExtractionResult.create_error_result(
@@ -689,14 +689,14 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             if item_count == 0:
                 return ExtractionResult.create_error_result("No _item_id found in simplified HTML")
             
-            # 步骤3: 延迟加载模型
+            # Step 3: Lazy load model
             self._load_model()
-            
-            # 步骤4: 创建提示并进行LLM推理
+
+            # Step 4: Create prompt and perform LLM inference
             prompt = self._create_prompt(simplified_html)
             chat_prompt = self._add_template(prompt)
-            
-            # 配置采样参数
+
+            # Configure sampling parameters
             if self.inference_config.use_logits_processor and self.token_state_manager:
                 sampling_params = self._SamplingParams(
                     temperature=self.inference_config.temperature,
@@ -711,30 +711,30 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
                     max_tokens=self.inference_config.max_output_tokens
                 )
             
-            # 根据模型类型选择生成方式
+            # Choose generation method based on model type
             if hasattr(self, '_use_transformers') and self._use_transformers:
-                # 使用transformers生成
+                # Use transformers for generation
                 json_result = self._generate_with_transformers(chat_prompt)
             else:
-                # 使用vLLM生成
+                # Use vLLM for generation
                 output = self.model.generate(chat_prompt, sampling_params)
                 json_result = self._clean_output(output)
-            
-            # 步骤5: 格式转换和内容重建
-            print(f"🔄 开始格式转换...")
+
+            # Step 5: Format conversion and content reconstruction
+            print(f"Starting format conversion...")
             classification_result = self._reformat_classification_result(json_result)
-            print(f"🔍 格式转换结果: {len(classification_result)} 个分类项")
-            
-            print(f"🔄 开始重建内容...")
+            print(f"Format conversion result: {len(classification_result)} classification items")
+
+            print(f"Starting content reconstruction...")
             main_content, content_list = self._reconstruct_content(html, classification_result, url)
-            print(f"🔍 重建结果: 主内容长度={len(main_content)}, 内容块数量={len(content_list) if content_list else 0}")
+            print(f"Reconstruction result: main content length={len(main_content)}, content block count={len(content_list) if content_list else 0}")
             
-            # 计算置信度
+            # Calculate confidence
             confidence = self._calculate_confidence(main_content, content_list, item_count)
             
             extraction_time = time.time() - start_time
-            
-            # 创建结果对象
+
+            # Create result object
             result = ExtractionResult(
                 content=main_content,
                 # content_list=content_list,
@@ -744,8 +744,8 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
                 extraction_time=extraction_time,
                 success=True
             )
-            
-            # 添加调试信息到错误消息字段（用于开发调试）
+
+            # Add debug info to error message field (for development debugging)
             debug_info = f"item_count: {item_count}, llm_output_length: {len(json_result)}"
             if not result.success:
                 result.error_message = f"{result.error_message or ''} | {debug_info}".strip(' |')
@@ -762,7 +762,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             )
     
     def _extract_title(self, html: str) -> Optional[str]:
-        """提取页面标题."""
+        """Extract page title."""
         try:
             import re
             title_match = re.search(r'<title[^>]*>(.*?)</title>', html, re.IGNORECASE | re.DOTALL)
@@ -773,11 +773,11 @@ def _extract_title(self, html: str) -> Optional[str]:
         return None
     
     def _detect_language(self, content: str) -> Optional[str]:
-        """检测内容语言."""
+        """Detect content language."""
         if not content:
             return None
-            
-        # 简单的语言检测逻辑
+
+        # Simple language detection logic
         chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', content))
         english_chars = len(re.findall(r'[a-zA-Z]', content))
         
@@ -789,20 +789,20 @@ def _detect_language(self, content: str) -> Optional[str]:
             return None
     
     def _calculate_confidence(self, content: str, content_list: List[Dict], item_count: int) -> float:
-        """计算提取置信度."""
+        """Calculate extraction confidence."""
         if not content:
             return 0.0
-        
-        # 基于内容长度的评分
+
+        # Score based on content length
         length_score = min(len(content) / 1000, 1.0)
-        
-        # 基于结构化内容的评分
+
+        # Score based on structured content
         structure_score = min(len(content_list) / 10, 1.0) if content_list else 0.0
-        
-        # 基于处理复杂度的评分（item数量越多，置信度稍微降低）
-        complexity_penalty = max(0, (item_count - 100) / 900)  # 100-1000范围内线性降低
+
+        # Score based on processing complexity (more items slightly reduce confidence)
+        complexity_penalty = max(0, (item_count - 100) / 900)  # Linear decrease in range 100-1000
         complexity_score = max(0.5, 1.0 - complexity_penalty)
-        
-        # 综合评分
+
+        # Combined score
         confidence = (length_score * 0.5 + structure_score * 0.3 + complexity_score * 0.2)
-        return min(confidence, 1.0) 
\ No newline at end of file
+        return min(confidence, 1.0)
\ No newline at end of file
diff --git a/webmainbench/extractors/magic_html_extractor.py b/webmainbench/extractors/magic_html_extractor.py
index 79869f3..40f280f 100644
--- a/webmainbench/extractors/magic_html_extractor.py
+++ b/webmainbench/extractors/magic_html_extractor.py
@@ -31,16 +31,16 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             # Use Magic HTML for extraction
             data = self.extractor.extract(html)
 
-            # 从输出中提取所需信息
+            # Extract required information from output
             extracted_html = data.get('html', '')
-            # 使用内部HTML2Text方法生成markdown
+            # Use internal HTML2Text method to generate markdown
             h = HTML2TextWrapper()
             markdown = h(extracted_html)
             # markdown = html2text.html2text(extracted_html)
             title = data.get('title', '')
-            # 简单地将提取的 HTML 作为内容
+            # Use extracted HTML as content
             content = markdown
-            # 创建 content_list（简单分割段落）
+            # Create content_list (simple paragraph splitting)
             content_list = []
             if content:
                 paragraphs = content.split('\n\n')
@@ -67,11 +67,11 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
 
 
     def _detect_language(self, content: str) -> Optional[str]:
-        """检测内容语言."""
+        """Detect content language."""
         if not content:
             return None
 
-        # 简单的语言检测逻辑
+        # Simple language detection logic
         chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', content))
         english_chars = len(re.findall(r'[a-zA-Z]', content))
 
diff --git a/webmainbench/extractors/resiliparse_extractor.py b/webmainbench/extractors/resiliparse_extractor.py
index c08f4de..4833f67 100644
--- a/webmainbench/extractors/resiliparse_extractor.py
+++ b/webmainbench/extractors/resiliparse_extractor.py
@@ -11,20 +11,20 @@
 @dataclass
 class ResiliparseInferenceConfig:
     """Configuration for Resiliparse extractor."""
-    main_content: bool = True # 是否提取主要内容,默认开启.(丢弃<nav>（导航列表）、<footer>（版权信息) 、<aside>（侧边栏）、<footer>（页脚）等)
-    alt_texts: bool = True  # 是否提取 <img> 的 alt 属性文本,默认开启
-    links: bool = False  # 是否提取超链接,默认关闭
-    form_fields: bool = False  # 是否提取表单控件,默认关闭
-    noscript: bool = False  # 是否提取 <noscript> 标签的内容,默认关闭
-    list_bullets: bool = True # 是否用 • 标记列表项,默认开启
-    preserve_formatting: bool = True  ## 控制格式保留：True（默认）：保留列表、换行等基础格式,False：完全压缩（无换行、无列表，所有文本连在一起）
-    comments: bool = True # 是否保留用户评论,默认开启
-    post_meta: bool = True  # 是否保留文章元信息,默认开启
-    hidden_elements: bool = False  # 是否保留CSS隐藏元素,默认关闭
+    main_content: bool = True # Whether to extract main content, enabled by default. (Discards <nav> (navigation lists), <footer> (copyright info), <aside> (sidebars), <footer> (page footer), etc.)
+    alt_texts: bool = True  # Whether to extract alt attribute text from <img>, enabled by default
+    links: bool = False  # Whether to extract hyperlinks, disabled by default
+    form_fields: bool = False  # Whether to extract form controls, disabled by default
+    noscript: bool = False  # Whether to extract content of <noscript> tags, disabled by default
+    list_bullets: bool = True # Whether to mark list items with •, enabled by default
+    preserve_formatting: bool = True  ## Controls format preservation: True (default): preserve basic formatting like lists and line breaks, False: fully compress (no line breaks, no lists, all text concatenated)
+    comments: bool = True # Whether to keep user comments, enabled by default
+    post_meta: bool = True  # Whether to keep article metadata, enabled by default
+    hidden_elements: bool = False  # Whether to keep CSS-hidden elements, disabled by default
     
 
 
-    # 可根据需要添加更多resiliparse支持的参数
+    # Additional resiliparse-supported parameters can be added here as needed
 
 
 @extractor("resiliparse")
@@ -38,7 +38,7 @@ def __init__(self, name: str, config: Optional[Dict[str, Any]] = None):
         super().__init__(name, config)
         self.inference_config = ResiliparseInferenceConfig()
 
-        # 应用用户配置
+        # Apply user configuration
         if config:
             for key, value in config.items():
                 if hasattr(self.inference_config, key):
@@ -46,7 +46,7 @@ def __init__(self, name: str, config: Optional[Dict[str, Any]] = None):
 
     def _setup(self) -> None:
         """Set up the Resiliparse extractor."""
-        # 初始化操作
+        # Initialization operations
         pass
 
     def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
@@ -61,7 +61,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             ExtractionResult instance
         """
         try:
-            # 使用配置参数进行内容抽取
+            # Extract content using configuration parameters
             content = extract_plain_text(
                 html,
                 main_content=self.inference_config.main_content,
@@ -74,7 +74,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
                 comments=self.inference_config.comments
             )
 
-            # 创建 content_list（简单分割段落）
+            # Create content_list (simple paragraph splitting)
             content_list = []
             if content:
                 paragraphs = content.split('\n\n')
@@ -100,7 +100,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             )
 
     def _extract_title(self, html: str) -> Optional[str]:
-        """提取页面标题."""
+        """Extract page title."""
         try:
             import re
             title_match = re.search(r'<title[^>]*>(.*?)</title>', html, re.IGNORECASE | re.DOTALL)
@@ -111,11 +111,11 @@ def _extract_title(self, html: str) -> Optional[str]:
         return None
 
     def _detect_language(self, content: str) -> Optional[str]:
-        """检测内容语言."""
+        """Detect content language."""
         if not content:
             return None
 
-        # 简单的语言检测逻辑
+        # Simple language detection logic
         chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', content))
         english_chars = len(re.findall(r'[a-zA-Z]', content))
 
diff --git a/webmainbench/extractors/test_model_extractor.py b/webmainbench/extractors/test_model_extractor.py
index ed3d0d7..9fc1818 100644
--- a/webmainbench/extractors/test_model_extractor.py
+++ b/webmainbench/extractors/test_model_extractor.py
@@ -9,19 +9,19 @@
 @extractor("test-model")
 class TestModelExtractor(BaseExtractor):
     """
-    一个用于测试的抽取器，不做实际抽取，直接返回数据集中的content和content_list字段。
-    适用于评估流程的验证和基线测试。
+    A test extractor that does not perform actual extraction; returns the content and content_list fields directly from the dataset.
+    Suitable for validating the evaluation pipeline and baseline testing.
     """
 
     version = "1.0.0"
     description = "Test extractor that returns groundtruth content/content_list for evaluation baseline"
 
     def _setup(self) -> None:
-        """测试模型无需特殊初始化。"""
+        """No special initialization needed for test model."""
         pass
 
     def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
         """
-        直接从输入的html参数（假定为数据集样本的dict或json字符串）中读取content和content_list字段。
+        Read the content and content_list fields directly from the input html parameter (assumed to be a dataset sample dict or JSON string).
         """
         pass
diff --git a/webmainbench/extractors/trafilatura_extractor.py b/webmainbench/extractors/trafilatura_extractor.py
index 205c318..0fb907d 100644
--- a/webmainbench/extractors/trafilatura_extractor.py
+++ b/webmainbench/extractors/trafilatura_extractor.py
@@ -13,15 +13,15 @@
 @dataclass
 class TrafilaturaInferenceConfig:
     """Configuration for Trafilatura extractor."""
-    favor_precision: bool = True  # 优先精度：只提取最核心的内容，过滤更多冗余（如侧边栏、广告）,默认开启
-    favor_recall: bool = True  # 优先召回：尽可能提取所有潜在有效内容，减少遗漏,默认开启
-    include_comments: bool = False  # 是否保留评论,默认关闭
-    include_tables: bool = True  # 是否保留提取html表格,默认开启
-    include_images: bool = False  # 是否保留提取图片信息,默认开启
-    include_links: bool = False  # 是否保留链接,默认关闭
-    with_metadata: bool = False  # 是否保留元信息,默认关闭
-    skip_elements: bool = False  # 是否保留CSS隐藏元素,默认关闭
-    output_format: str = "markdown"  # 支持多种格式输出:"csv", "json", "html", "markdown", "txt", "xml"等
+    favor_precision: bool = True  # Favor precision: only extract the most core content, filter more redundancy (e.g. sidebars, ads), enabled by default
+    favor_recall: bool = True  # Favor recall: extract all potentially valid content as much as possible, minimize omissions, enabled by default
+    include_comments: bool = False  # Whether to keep comments, disabled by default
+    include_tables: bool = True  # Whether to keep extracted HTML tables, enabled by default
+    include_images: bool = False  # Whether to keep extracted image information, disabled by default
+    include_links: bool = False  # Whether to keep links, disabled by default
+    with_metadata: bool = False  # Whether to keep metadata, disabled by default
+    skip_elements: bool = False  # Whether to keep CSS-hidden elements, disabled by default
+    output_format: str = "markdown"  # Supports multiple output formats: "csv", "json", "html", "markdown", "txt", "xml", etc.
 
 
 @extractor("trafilatura")
@@ -35,7 +35,7 @@ def __init__(self, name: str, config: Optional[Dict[str, Any]] = None):
         super().__init__(name, config)
         self.inference_config = TrafilaturaInferenceConfig()
 
-        # 应用用户配置
+        # Apply user configuration
         if config:
             for key, value in config.items():
                 if hasattr(self.inference_config, key):
@@ -43,7 +43,7 @@ def __init__(self, name: str, config: Optional[Dict[str, Any]] = None):
 
     def _setup(self) -> None:
         """Set up the Trafilatura extractor."""
-        # 初始化操作
+        # Initialization operations
         pass
 
     def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
@@ -58,7 +58,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             ExtractionResult instance
         """
         try:
-            # 使用配置参数进行内容抽取
+            # Perform content extraction using configuration parameters
             content = extract(
                 html,
                 url=url,
@@ -69,11 +69,11 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
                 include_images=self.inference_config.include_images,
                 include_links=self.inference_config.include_links,
                 with_metadata=self.inference_config.with_metadata,
-                output_format=self.inference_config.output_format  # 传入输出格式
+                output_format=self.inference_config.output_format  # Pass in output format
 
             )
 
-            # 创建 content_list（简单分割段落）
+            # Create content_list (simple paragraph split)
             content_list = []
             if content:
                 paragraphs = content.split('\n\n')
@@ -99,7 +99,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             )
 
     def _extract_title(self, html: str) -> Optional[str]:
-        """提取页面标题."""
+        """Extract page title."""
         try:
             import re
             title_match = re.search(r'<title[^>]*>(.*?)</title>', html, re.IGNORECASE | re.DOTALL)
@@ -110,11 +110,11 @@ def _extract_title(self, html: str) -> Optional[str]:
         return None
 
     def _detect_language(self, content: str) -> Optional[str]:
-        """检测内容语言."""
+        """Detect content language."""
         if not content:
             return None
 
-        # 简单的语言检测逻辑
+        # Simple language detection logic
         chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', content))
         english_chars = len(re.findall(r'[a-zA-Z]', content))
 
diff --git a/webmainbench/extractors/trafilatura_txt_extractor.py b/webmainbench/extractors/trafilatura_txt_extractor.py
index abdce9b..55292c4 100644
--- a/webmainbench/extractors/trafilatura_txt_extractor.py
+++ b/webmainbench/extractors/trafilatura_txt_extractor.py
@@ -13,15 +13,15 @@
 @dataclass
 class TrafilaturaInferenceConfig:
     """Configuration for Trafilatura extractor."""
-    favor_precision: bool = True  # 优先精度：只提取最核心的内容，过滤更多冗余（如侧边栏、广告）,默认开启
-    favor_recall: bool = True  # 优先召回：尽可能提取所有潜在有效内容，减少遗漏,默认开启
-    include_comments: bool = False  # 是否保留评论,默认关闭
-    include_tables: bool = True  # 是否保留提取html表格,默认开启
-    include_images: bool = False  # 是否保留提取图片信息,默认开启
-    include_links: bool = False  # 是否保留链接,默认关闭
-    with_metadata: bool = False  # 是否保留元信息,默认关闭
-    skip_elements: bool = False  # 是否保留CSS隐藏元素,默认关闭
-    output_format: str = "markdown"  # 支持多种格式输出:"csv", "json", "html", "markdown", "txt", "xml"等
+    favor_precision: bool = True  # Favor precision: only extract the most core content, filter more redundancy (e.g. sidebars, ads), enabled by default
+    favor_recall: bool = True  # Favor recall: extract all potentially valid content as much as possible, minimize omissions, enabled by default
+    include_comments: bool = False  # Whether to keep comments, disabled by default
+    include_tables: bool = True  # Whether to keep extracted HTML tables, enabled by default
+    include_images: bool = False  # Whether to keep extracted image information, disabled by default
+    include_links: bool = False  # Whether to keep links, disabled by default
+    with_metadata: bool = False  # Whether to keep metadata, disabled by default
+    skip_elements: bool = False  # Whether to keep CSS-hidden elements, disabled by default
+    output_format: str = "markdown"  # Supports multiple output formats: "csv", "json", "html", "markdown", "txt", "xml", etc.
 
 
 @extractor("trafilatura_txt")
@@ -35,7 +35,7 @@ def __init__(self, name: str, config: Optional[Dict[str, Any]] = None):
         super().__init__(name, config)
         self.inference_config = TrafilaturaInferenceConfig()
 
-        # 应用用户配置
+        # Apply user configuration
         if config:
             for key, value in config.items():
                 if hasattr(self.inference_config, key):
@@ -43,7 +43,7 @@ def __init__(self, name: str, config: Optional[Dict[str, Any]] = None):
 
     def _setup(self) -> None:
         """Set up the Trafilatura extractor."""
-        # 初始化操作
+        # Initialization operations
         pass
 
     def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
@@ -58,7 +58,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             ExtractionResult instance
         """
         try:
-            # 使用配置参数进行内容抽取
+            # Perform content extraction using configuration parameters
             # content = extract(
             #     html,
             #     url=url,
@@ -69,17 +69,17 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             #     include_images=self.inference_config.include_images,
             #     include_links=self.inference_config.include_links,
             #     with_metadata=self.inference_config.with_metadata,
-            #     output_format=self.inference_config.output_format  # 传入输出格式
+            #     output_format=self.inference_config.output_format  # Pass in output format
             #
             # )
 
-            # 使用最大化召回率进行内容抽取成txt
+            # Extract content to txt with maximum recall
             # content = html2txt(html)
 
-            # 使用输出更准确的抽取txt结果
+            # Extract txt result with more accurate output
             postbody, content, len_text = baseline(html)
 
-            # 创建 content_list（简单分割段落）
+            # Create content_list (simple paragraph split)
             content_list = []
             if content:
                 paragraphs = content.split('\n\n')
@@ -105,7 +105,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
             )
 
     def _extract_title(self, html: str) -> Optional[str]:
-        """提取页面标题."""
+        """Extract page title."""
         try:
             import re
             title_match = re.search(r'<title[^>]*>(.*?)</title>', html, re.IGNORECASE | re.DOTALL)
@@ -116,11 +116,11 @@ def _extract_title(self, html: str) -> Optional[str]:
         return None
 
     def _detect_language(self, content: str) -> Optional[str]:
-        """检测内容语言."""
+        """Detect content language."""
         if not content:
             return None
 
-        # 简单的语言检测逻辑
+        # Simple language detection logic
         chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', content))
         english_chars = len(re.findall(r'[a-zA-Z]', content))
 
diff --git a/webmainbench/metrics/base.py b/webmainbench/metrics/base.py
index 41b3e2d..6be5a65 100644
--- a/webmainbench/metrics/base.py
+++ b/webmainbench/metrics/base.py
@@ -122,46 +122,46 @@ def batch_calculate(self, predicted_list: List[Any],
     @staticmethod
     def split_content(text: str, content_list: List[Dict[str, Any]] = None, field_name: str = None) -> Dict[str, str]:
         """
-        统一的内容分割方法，将文本分为代码、公式、表格和剩余文本4个部分。
+        Unified content splitting method that divides text into 4 parts: code, formula, table, and remaining text.
 
         Args:
-            text: 原始markdown文本
-            content_list: 结构化内容列表（来自llm-webkit等）
-            field_name: 当前处理的字段名称，传递给_extract_from_markdown
+            text: Raw markdown text
+            content_list: Structured content list (from llm-webkit etc.)
+            field_name: Name of the current field being processed, passed to _extract_from_markdown
         Returns:
             Dict with keys: 'code', 'formula', 'table', 'text'
         """
-        # 优先从content_list中提取
+        # Prefer extraction from content_list
         if content_list:
             extracted_content = BaseMetric._extract_from_content_list(content_list)
             if any(extracted_content.values()):
                 return extracted_content
 
-        # 从markdown文本中提取，传递字段名称
+        # Extract from markdown text, passing the field name
         return BaseMetric._extract_from_markdown(text or "", field_name=field_name)
 
     @staticmethod
     def _extract_from_content_list(content_list: List[Dict[str, Any]]) -> Dict[str, str]:
-        """从content_list中递归提取各种类型的内容"""
+        """Recursively extract various types of content from content_list"""
         extracted = {
             'code': [],
-            'formula': [],  
+            'formula': [],
             'table': [],
             'text': []
         }
-        
+
         def _recursive_extract(items):
             if not isinstance(items, list):
                 return
-            
+
             for item in items:
                 if not isinstance(item, dict):
                     continue
-                
+
                 item_type = item.get('type', '').lower()
                 content = item.get('content', '').strip()
-                
-                # 根据类型分类内容
+
+                # Classify content by type
                 if item_type in ['code', 'code_block', 'inline_code']:
                     if content:
                         extracted['code'].append(content)
@@ -174,15 +174,15 @@ def _recursive_extract(items):
                 elif item_type in ['text', 'paragraph', 'heading']:
                     if content:
                         extracted['text'].append(content)
-                
-                # 递归处理子元素
+
+                # Recursively process child elements
                 for child_key in ['children', 'items', 'content_list']:
                     if child_key in item and isinstance(item[child_key], list):
                         _recursive_extract(item[child_key])
-        
+
         _recursive_extract(content_list)
-        
-        # 将列表转换为字符串
+
+        # Convert lists to strings
         return {
             'code': '\n'.join(extracted['code']),
             'formula': '\n'.join(extracted['formula']),
@@ -192,14 +192,14 @@ def _recursive_extract(items):
 
     @staticmethod
     def _extract_from_markdown(text: str, field_name: str = None) -> Dict[str, str]:
-        """从markdown文本中提取各种类型的内容"""
+        """Extract various types of content from markdown text"""
         if not text:
             return {'code': '', 'formula': '', 'table': '', 'text': ''}
 
-        # 加载 llm 配置
+        # Load LLM config
         from ..config import LLM_CONFIG
 
-        # 直接创建具体的提取器实例
+        # Directly create concrete extractor instances
         from .code_extractor import CodeSplitter
         from .formula_extractor import FormulaSplitter
         from .table_extractor import TableSplitter
@@ -208,7 +208,7 @@ def _extract_from_markdown(text: str, field_name: str = None) -> Dict[str, str]:
         formula_extractor = FormulaSplitter(LLM_CONFIG)
         table_extractor = TableSplitter(LLM_CONFIG)
 
-        # 提取各类内容
+        # Extract each type of content
         code_content = code_extractor.extract(text, field_name)
         formula_content = formula_extractor.extract(text, field_name)
         table_content = table_extractor.extract(text, field_name)
@@ -217,7 +217,7 @@ def _extract_from_markdown(text: str, field_name: str = None) -> Dict[str, str]:
             'code': code_content,
             'formula': formula_content,
             'table': table_content,
-            'text': text  # 保留原始全部文本
+            'text': text  # Retain the original full text
         }
 
     def aggregate_results(self, results: List[MetricResult]) -> MetricResult:
diff --git a/webmainbench/metrics/base_content_splitter.py b/webmainbench/metrics/base_content_splitter.py
index 1967f02..051a639 100644
--- a/webmainbench/metrics/base_content_splitter.py
+++ b/webmainbench/metrics/base_content_splitter.py
@@ -17,21 +17,21 @@ def _metrics_debug(message: str) -> None:
 
 
 class BaseContentSplitter(ABC):
-    """抽象基类，用于从文本中提取特定类型的内容"""
+    """Abstract base class for extracting specific types of content from text."""
 
-    # 默认的LLM提示词模板
-    DEFAULT_LLM_PROMPT = """请处理以下内容：
+    # Default LLM prompt template
+    DEFAULT_LLM_PROMPT = """Please process the following content:
     {content}
     """
 
     def __init__(self, config: Dict[str, Any] = None):
-        """初始化提取器"""
+        """Initialize the extractor."""
         self.config = config or {}
 
-        # 保留这行代码，用于控制是否使用LLM
+        # Controls whether to use LLM
         self.use_llm = self.config.get('use_llm', True)
 
-        # 初始化OpenAI客户端（如果配置了LLM）
+        # Initialize OpenAI client (if LLM is configured)
         if self.use_llm and self.config.get('llm_base_url') and self.config.get('llm_api_key'):
             self.client = OpenAI(
                 base_url=self.config.get('llm_base_url', ""),
@@ -47,38 +47,37 @@ def __init__(self, config: Dict[str, Any] = None):
 
     @abstractmethod
     def extract(self, text: str, field_name: str = None) -> str:
-        """提取特定类型的内容"""
+        """Extract specific types of content."""
         pass
 
     @abstractmethod
     def extract_basic(self, text: str) -> List[str]:
-        """使用基本方法提取内容（通常是正则表达式）"""
+        """Extract content using basic methods (typically regular expressions)."""
         pass
 
     def should_use_llm(self, field_name: str) -> bool:
-        """判断是否应该使用LLM进行增强提取"""
+        """Determine whether to use LLM for enhanced extraction."""
         if not self.use_llm:
             return False
 
-        # 默认逻辑：对groundtruth内容不使用LLM，对其他内容使用
+        # Default: do not use LLM for groundtruth content, use for others
         if field_name == "groundtruth_content":
             return False
         return True
 
     def enhance_with_llm(self, basic_results: List[str], cache_key: str = None) -> List[str]:
-        """使用LLM增强基本提取结果"""
+        """Enhance basic extraction results using LLM."""
         if not basic_results:
-            _metrics_debug("Empty input; skipping LLM enhancement")
             return []
 
-        # 生成缓存键
+        # Generate cache key
         if cache_key is None:
             content_str = '\n'.join(basic_results)
             cache_key = hashlib.md5(content_str.encode('utf-8')).hexdigest()
 
         cache_file = os.path.join(self.cache_dir, f'{self.__class__.__name__.lower()}_cache_{cache_key}.json')
 
-        # 检查缓存
+        # Check cache
         if os.path.exists(cache_file):
             try:
                 with open(cache_file, 'r', encoding='utf-8') as f:
@@ -88,11 +87,11 @@ def enhance_with_llm(self, basic_results: List[str], cache_key: str = None) -> L
             except Exception as e:
                 _metrics_debug(f"Cache read failed: {e}")
 
-        # 实际的LLM增强逻辑
+        # Actual LLM enhancement logic
         try:
             enhanced_results = self._llm_enhance(basic_results)
 
-            # 保存缓存
+            # Save to cache
             try:
                 with open(cache_file, 'w', encoding='utf-8') as f:
                     json.dump(enhanced_results, f, ensure_ascii=False, indent=2)
@@ -107,5 +106,5 @@ def enhance_with_llm(self, basic_results: List[str], cache_key: str = None) -> L
 
     @abstractmethod
     def _llm_enhance(self, basic_results: List[str]) -> List[str]:
-        """使用LLM增强基本提取结果的具体实现"""
+        """Concrete implementation for enhancing basic extraction results with LLM."""
         pass
diff --git a/webmainbench/metrics/calculator.py b/webmainbench/metrics/calculator.py
index 49aa4d1..b07f9db 100644
--- a/webmainbench/metrics/calculator.py
+++ b/webmainbench/metrics/calculator.py
@@ -25,7 +25,7 @@ def __init__(self, config: Dict[str, Any] = None):
     
     def _setup_default_metrics(self) -> None:
         """Setup default metrics."""
-        # 注册新的内容类型指标
+        # Register new content-type metrics
         self.add_metric("code_edit", CodeEditMetric("code_edit"))
         self.add_metric("formula_edit", FormulaEditMetric("formula_edit"))
         self.add_metric("table_edit", TableEditMetric("table_edit"))
@@ -79,7 +79,7 @@ def calculate_all(self, predicted_content: str,
         #             result = metric.calculate(predicted_content, groundtruth_content, **kwargs)
         #         elif metric_name in ["code_edit", "formula_edit",
         #                            "table_edit", "table_TEDS", "text_edit"]:
-        #             # 新的内容类型指标，需要传递 content_list
+        #             # New content-type metrics, need to pass content_list
         #             result = metric.calculate(
         #                 predicted_content,
         #                 groundtruth_content,
@@ -101,10 +101,10 @@ def calculate_all(self, predicted_content: str,
 
         results: Dict[str, MetricResult] = {}
 
-        # 1. 先计算非表格指标（无依赖关系）
+        # 1. First calculate non-table metrics (no dependencies)
         for metric_name in list(self.metrics.keys()):
             if metric_name in ["table_edit", "table_TEDS"]:
-                continue  # 表格相关指标单独处理
+                continue  # Table-related metrics are handled separately
 
             metric = self.metrics[metric_name]
             result = metric.calculate(
@@ -115,8 +115,8 @@ def calculate_all(self, predicted_content: str,
             )
             results[metric_name] = result
 
-        # 2. 处理表格相关指标（有依赖关系）
-        # 2.1 计算 table_edit
+        # 2. Handle table-related metrics (have dependencies)
+        # 2.1 Calculate table_edit
         if "table_edit" in self.metrics:
             table_edit_result = self.metrics["table_edit"].calculate(
                 predicted=predicted_content,
@@ -127,19 +127,19 @@ def calculate_all(self, predicted_content: str,
             )
             results["table_edit"] = table_edit_result
 
-            # 2.2 计算 table_TEDS（依赖 table_edit 的结果）
+            # 2.2 Calculate table_TEDS (depends on table_edit result)
             if "table_TEDS" in self.metrics:
                 teds_result = self.metrics["table_TEDS"].calculate(
                     predicted=predicted_content,
                     groundtruth=groundtruth_content,
                     predicted_content_list=predicted_content_list,
                     groundtruth_content_list=groundtruth_content_list,
-                    table_edit_result=table_edit_result,  # 传递依赖结果
+                    table_edit_result=table_edit_result,  # pass dependency result
                     **kwargs
                 )
                 results["table_TEDS"] = teds_result
-        
-        # 3. 计算综合得分（所有成功指标的平均值）
+
+        # 3. Calculate composite score (average of all successful metrics)
         successful_scores = []
         failed_metrics = []
         
@@ -164,7 +164,7 @@ def calculate_all(self, predicted_content: str,
             )
             results["overall"] = overall_result
         else:
-            # 如果所有指标都失败了，overall分数为0
+            # If all metrics failed, overall score is 0
             overall_result = MetricResult.create_error_result(
                 "overall", "All individual metrics failed"
             )
diff --git a/webmainbench/metrics/code_extractor.py b/webmainbench/metrics/code_extractor.py
index bf62bd5..c66d969 100644
--- a/webmainbench/metrics/code_extractor.py
+++ b/webmainbench/metrics/code_extractor.py
@@ -6,24 +6,18 @@
 
 
 class CodeSplitter(BaseContentSplitter):
-    """从文本中提取代码块"""
+    """Extract code blocks from text."""
 
     def extract(self, text: str, field_name: str = None) -> str:
-        """提取代码块"""
+        """Extract code blocks."""
         code_blocks = self.extract_basic(text)
-
-        if self.should_use_llm(field_name):
-            code_parts = self.enhance_with_llm(code_blocks)
-        else:
-            code_parts = code_blocks
-
-        return '\n'.join(code_parts)
+        return '\n'.join(code_blocks)
 
     def extract_basic(self, text: str) -> List[str]:
-        """使用正则表达式提取代码块"""
+        """Extract code blocks using regular expressions."""
         code_parts = []
 
-        # 处理三个反引号包裹的代码块
+        # Handle fenced code blocks wrapped in triple backticks
         backtick_pattern = r'(```[\s\S]*?```)'
         for match in re.finditer(backtick_pattern, text):
             code_segment = match.group(0)
@@ -34,13 +28,13 @@ def extract_basic(self, text: str) -> List[str]:
                 if code_content:
                     code_parts.append(code_content)
 
-        # 处理缩进代码块 - 定义缺失的模式
+        # Handle indented code blocks
         indent_pattern = r'(?:\n\s*\n)((?:(?: {4,}|\t+)[^\n]*(?:\n|$)){2,})(?=\n\s*\n|$)'
 
         for match in re.finditer(indent_pattern, text, re.MULTILINE):
             code_segment = match.group(1)
 
-            # 验证：确保所有行都是缩进的
+            # Validate: ensure all lines are indented
             lines = code_segment.split('\n')
             all_indented = all(
                 line.startswith('    ') or line.startswith('\t') or not line.strip()
@@ -51,12 +45,12 @@ def extract_basic(self, text: str) -> List[str]:
             if not all_indented:
                 continue
 
-            # 进一步验证代码特征
+            # Further validate code characteristics
             non_empty_lines = [line.strip() for line in lines if line.strip()]
             if len(non_empty_lines) < 2:
                 continue
 
-            # 检查是否有明显的非代码特征
+            # Check for obvious non-code features
             has_list_features = any(
                 re.match(r'^[-•*]\s', line) or
                 re.match(r'^\d+\.\s', line) or
@@ -68,7 +62,7 @@ def extract_basic(self, text: str) -> List[str]:
             if has_list_features:
                 continue
 
-            # 清理代码段
+            # Clean up the code segment
             cleaned_lines = []
             for line in code_segment.split('\n'):
                 if line.strip():
@@ -86,6 +80,5 @@ def extract_basic(self, text: str) -> List[str]:
         return code_parts
 
     def _llm_enhance(self, basic_results: List[str]) -> List[str]:
-        """使用LLM增强代码提取结果（未实现）"""
-        _metrics_debug("Code LLM enhancement not implemented; returning raw results")
+        """Code extraction does not use LLM enhancement."""
         return basic_results
diff --git a/webmainbench/metrics/formula_extractor.py b/webmainbench/metrics/formula_extractor.py
index 64bbf65..ab5e108 100644
--- a/webmainbench/metrics/formula_extractor.py
+++ b/webmainbench/metrics/formula_extractor.py
@@ -4,77 +4,74 @@
 
 
 class FormulaSplitter(BaseContentSplitter):
-    """从文本中提取数学公式"""
+    """Extract mathematical formulas from text"""
 
-    DEFAULT_LLM_PROMPT = '''任务：请从以下正则表达式提取的内容中，识别并保留真正的LaTeX数学公式，剔除货币形式的内容。
+    DEFAULT_LLM_PROMPT = '''Task: From the content extracted by the following regular expressions, identify and keep genuine LaTeX mathematical formulas, and remove currency-formatted content.
 
-    ### 识别规则
-    **真正的数学公式**（保留）：
-    - 包含数学符号：+ - × ÷ = < > ≤ ≥ ± ∞ ∑ ∫ ∂ √ ^ _ { } 等
-    - 包含希腊字母：α β γ δ θ λ μ π σ ω 等
-    - 包含LaTeX命令：\\frac \\sum \\int \\sqrt \\alpha \\beta \\sin \\cos 等
-    - 包含数学表达式：变量、函数、方程等
+    ### Identification Rules
+    **Genuine mathematical formulas** (keep):
+    - Contains mathematical symbols: + - × ÷ = < > ≤ ≥ ± ∞ ∑ ∫ ∂ √ ^ _ { } etc.
+    - Contains Greek letters: α β γ δ θ λ μ π σ ω etc.
+    - Contains LaTeX commands: \\frac \\sum \\int \\sqrt \\alpha \\beta \\sin \\cos etc.
+    - Contains mathematical expressions: variables, functions, equations, etc.
 
-    **货币形式内容**（剔除）：
-    - 仅包含数字、逗号、小数点的价格：如 1,150.00
-    - 纯粹的金额数值：如 25.99、1,200、5.50
-    - 不包含任何数学运算符或数学符号的数字
+    **Currency-formatted content** (remove):
+    - Prices containing only digits, commas, and decimal points: e.g. 1,150.00
+    - Pure monetary values: e.g. 25.99, 1,200, 5.50
+    - Numbers without any mathematical operators or symbols
 
-    ### 处理要求
-    1. **严格区分**：只保留真正的数学公式，剔除所有货币价格
-    2. **格式标准化**：统一公式格式，确保LaTeX语法正确
-    3. **保持原意**：不修改数学公式内容
+    ### Processing Requirements
+    1. **Strict distinction**: Only keep genuine mathematical formulas, remove all currency prices
+    2. **Format standardization**: Unify formula format, ensure correct LaTeX syntax
+    3. **Preserve meaning**: Do not modify the content of mathematical formulas
 
-    ### 输出格式
-    - 每个有效的数学公式独占一行
-    - 只输出公式内容，不包含$符号或其他包装
-    - 如果输入不是有效的数学公式（如货币），则输出<空>
-    - 按原顺序输出保留的公式
+    ### Output Format
+    - Each valid mathematical formula occupies one line
+    - Output only the formula content, without $ symbols or other wrappers
+    - If the input is not a valid mathematical formula (e.g. currency), output <empty>
+    - Output retained formulas in their original order
 
-    ### 示例 1 (剔除后有有效公式)
-    输入：1,150.00 → 剔除（货币）
-    输入：x^2 + y^2 = r^2 → 保留（数学公式）
-    输入：25.99 → 剔除（货币）
-    输入：\\frac{a}{b} + c → 保留（数学公式）
+    ### Example 1 (valid formulas remain after removal)
+    Input: 1,150.00 → remove (currency)
+    Input: x^2 + y^2 = r^2 → keep (mathematical formula)
+    Input: 25.99 → remove (currency)
+    Input: \\frac{a}{b} + c → keep (mathematical formula)
 
-    ### 示例 2 (剔除后无有效公式)
-    输入：1,150.00 → 剔除（货币）
-    输入：25.99 → 剔除（货币）
+    ### Example 2 (no valid formulas remain after removal)
+    Input: 1,150.00 → remove (currency)
+    Input: 25.99 → remove (currency)
 
-    输出：<空>
+    Output: <empty>
 
-    注意，输出结果中不要添加任何解释！。
-    [输入内容列表开始]'''
+    Note: Do not add any explanations to the output!
+    [Input content list begins]'''
 
     def extract(self, text: str, field_name: str = None) -> str:
-        """提取数学公式"""
+        """Extract mathematical formulas"""
         regex_formulas = self.extract_basic(text)
         if self.should_use_llm(field_name):
-            _metrics_debug("Using LLM-enhanced formula extraction")
             formula_parts = self.enhance_with_llm(regex_formulas)
-            if not formula_parts:
-                _metrics_debug("No valid formulas after LLM enhancement")
         else:
             formula_parts = regex_formulas
             _metrics_debug("Skipping LLM enhancement; using regex-only results")
         return '\n'.join(formula_parts)
 
     def extract_basic(self, text: str) -> List[str]:
-        """使用正则表达式提取公式"""
+        """Extract formulas using regular expressions"""
 
         regex_formulas = []
 
-        # 排除Markdown代码块（```code```和`code`）
-        # 首先移除多行代码块
+        # Exclude Markdown code blocks (```code``` and `code`)
+        # First remove multi-line code blocks
         text_without_blocks = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
-        # 移除内联代码块
+        # Remove inline code blocks
         text_without_code = re.sub(r'`[^`]*`', '', text_without_blocks)
 
         latex_patterns = [
-            r'(?<!\\)\$\$(.*?)(?<!\\)\$\$',  # 行间 $$...$$
-            r'(?<!\\)\\\[(.*?)(?<!\\)\\\]',  # 行间 \[...\]
-            r'(?<!\\)\$(.*?)(?<!\\)\$',  # 行内 $...$
-            r'(?<!\\)\\\((.*?)(?<!\\)\\\)',  # 行内 \(...\)
+            r'(?<!\\)\$\$(.*?)(?<!\\)\$\$',  # display math $$...$$
+            r'(?<!\\)\\\[(.*?)(?<!\\)\\\]',  # display math \[...\]
+            r'(?<!\\)\$(.*?)(?<!\\)\$',  # inline math $...$
+            r'(?<!\\)\\\((.*?)(?<!\\)\\\)',  # inline math \(...\)
         ]
 
         for pattern in latex_patterns:
@@ -87,7 +84,7 @@ def extract_basic(self, text: str) -> List[str]:
         return regex_formulas
 
     def _llm_enhance(self, basic_results: List[str]) -> List[str]:
-        """使用LLM增强公式提取结果"""
+        """Use LLM to enhance formula extraction results"""
         if not self.client:
             _metrics_debug("OpenAI client not initialized; returning basic extraction results")
             return basic_results
@@ -98,16 +95,16 @@ def _llm_enhance(self, basic_results: List[str]) -> List[str]:
             model=self.config.get('llm_model', "deepseek-chat"),
             temperature=0,
             messages=[
-                {"role": "user", "content": self.DEFAULT_LLM_PROMPT + f"\n{formulas_text}\n" + '''[输入内容列表结束]
+                {"role": "user", "content": self.DEFAULT_LLM_PROMPT + f"\n{formulas_text}\n" + '''[Input content list ends]
         ---
-        请按要求识别并输出真正的数学公式，剔除货币形式的内容。
+        Please identify and output genuine mathematical formulas as required, removing currency-formatted content.
         ---'''}
             ]
         )
 
         result_text = response.choices[0].message.content.strip()
 
-        if '空' in result_text:
+        if '<empty>' in result_text or 'empty' in result_text.lower():
             return []
         elif not result_text:
             return []
diff --git a/webmainbench/metrics/formula_metrics.py b/webmainbench/metrics/formula_metrics.py
index 65d6292..3f860bc 100644
--- a/webmainbench/metrics/formula_metrics.py
+++ b/webmainbench/metrics/formula_metrics.py
@@ -9,22 +9,22 @@
 
 
 class FormulaEditMetric(EditDistanceMetric):
-    """公式编辑距离指标（包括行内和行间公式）"""
-    
+    """Formula edit distance metric (including inline and block formulas)"""
+
     version = "1.0.0"
     description = "Formula (inline and block) edit distance metric"
-    
+
     def _calculate_score(self, predicted: str, groundtruth: str,
                         predicted_content_list: List[Dict[str, Any]] = None,
                         groundtruth_content_list: List[Dict[str, Any]] = None,
                         **kwargs) -> MetricResult:
-        """计算公式的编辑距离"""
-        
-        # 从content_list中提取公式内容
+        """Calculate edit distance for formulas"""
+
+        # Extract formula content from content_list
         pred_formula = self._extract_formula_content(predicted, predicted_content_list)
         gt_formula = self._extract_formula_content(groundtruth, groundtruth_content_list)
-        
-        # 计算编辑距离
+
+        # Calculate edit distance
         result = super()._calculate_score(pred_formula, gt_formula, **kwargs)
         result.metric_name = self.name
         result.details.update({
@@ -32,43 +32,43 @@ def _calculate_score(self, predicted: str, groundtruth: str,
             "groundtruth_formula_length": len(gt_formula),
             "content_type": "formula"
         })
-        
+
         return result
-    
+
     def _extract_formula_content(self, text: str, content_list: List[Dict[str, Any]] = None) -> str:
-        """从文本和content_list中提取公式内容"""
-        # 使用统一的内容分割方法
+        """Extract formula content from text and content_list"""
+        # Use the unified content splitting method
         content_parts = self.split_content(text, content_list)
         return content_parts.get('formula', '')
-    
+
     def _extract_formulas_from_content_list(self, content_list: List[Dict[str, Any]]) -> List[str]:
-        """递归从content_list中提取公式内容"""
+        """Recursively extract formula content from content_list"""
         formulas = []
-        
+
         def _recursive_extract(items):
             if not isinstance(items, list):
                 return
-            
+
             for item in items:
                 if not isinstance(item, dict):
                     continue
-                
-                # 检查当前项是否为公式
+
+                # Check if the current item is a formula
                 item_type = item.get('type', '')
                 if item_type in ['equation-interline', 'equation-inline']:
                     content = item.get('content', '')
                     if content:
                         formulas.append(content)
-                
-                # 递归检查children字段
+
+                # Recursively check the children field
                 children = item.get('children')
                 if children:
                     _recursive_extract(children)
-                
-                # 递归检查items字段（有些实现可能使用items）
+
+                # Recursively check the items field (some implementations may use items)
                 items_field = item.get('items')
                 if items_field:
                     _recursive_extract(items_field)
-        
+
         _recursive_extract(content_list)
-        return formulas 
\ No newline at end of file
+        return formulas
\ No newline at end of file
diff --git a/webmainbench/metrics/table_extractor.py b/webmainbench/metrics/table_extractor.py
index d026c8e..4c23583 100644
--- a/webmainbench/metrics/table_extractor.py
+++ b/webmainbench/metrics/table_extractor.py
@@ -7,46 +7,40 @@
 
 
 class TableSplitter(BaseContentSplitter):
-    """从文本中提取表格"""
+    """Extract tables from text."""
 
     def extract(self, text: str, field_name: str = None) -> str:
-        """提取表格"""
+        """Extract tables."""
         tables = self.extract_basic(text)
-
-        if self.should_use_llm(field_name):
-            table_parts = self.enhance_with_llm(tables)
-        else:
-            table_parts = tables
-
-        return '\n'.join(table_parts)
+        return '\n'.join(tables)
 
     def extract_basic(self, text: str) -> List[str]:
-        """基本表格提取方法"""
+        """Basic table extraction method."""
         table_parts = []
 
-        # 移除代码块内容
+        # Remove code block content
         text_without_code = self._remove_code_blocks(text)
 
-        # HTML表格提取（在清理后的文本中）
+        # Extract HTML tables (from cleaned text)
         soup = BeautifulSoup(text_without_code, "html.parser")
 
         for table in soup.find_all("table"):
             if not table.find_parent(["td", "tr", "tbody", "table"]):
                 table_parts.append(str(table))
 
-        # Markdown表格提取
+        # Extract Markdown tables
         lines = text.split('\n')
         table_lines = []
         in_markdown_table = False
 
         def is_md_table_line(line):
-            """判断是否可能是 Markdown 表格行"""
+            """Check if a line could be a Markdown table row."""
             if line.count("|") < 1:
                 return False
             return True
 
         def is_md_separator_line(line):
-            """判断是否为 Markdown 分隔行"""
+            """Check if a line is a Markdown separator row."""
             parts = [p.strip() for p in line.split("|")]
             for p in parts:
                 if p and not re.match(r"^:?\-{3,}:?$", p):
@@ -54,7 +48,7 @@ def is_md_separator_line(line):
             return True
 
         def save_table():
-            """保存当前表格并清空缓存"""
+            """Save the current table and clear the buffer."""
             nonlocal table_lines
             if len(table_lines) >= 2 and is_md_separator_line(table_lines[1]):
                 md_table = '\n'.join(table_lines)
@@ -70,21 +64,20 @@ def save_table():
                     table_lines = []
                     in_markdown_table = False
 
-        # 处理文档末尾的 Markdown 表格
+        # Handle Markdown tables at the end of the document
         if in_markdown_table:
             save_table()
 
         return table_parts
 
     def _remove_code_blocks(self, text: str) -> str:
-        """移除Markdown代码块"""
-        # 移除多行代码块 ```
+        """Remove Markdown code blocks."""
+        # Remove multi-line fenced code blocks ```
         text_without_blocks = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
-        # 移除内联代码块 `
+        # Remove inline code blocks `
         text_without_code = re.sub(r'`[^`]*`', '', text_without_blocks)
         return text_without_code
 
     def _llm_enhance(self, basic_results: List[str]) -> List[str]:
-        """使用LLM增强表格提取结果（未实现）"""
-        _metrics_debug("Table LLM enhancement not implemented; returning raw results")
+        """Table extraction does not use LLM enhancement."""
         return basic_results
diff --git a/webmainbench/metrics/table_metrics.py b/webmainbench/metrics/table_metrics.py
index 3698627..520730c 100644
--- a/webmainbench/metrics/table_metrics.py
+++ b/webmainbench/metrics/table_metrics.py
@@ -10,105 +10,105 @@
 from bs4 import BeautifulSoup
 
 class TableEditMetric(EditDistanceMetric):
-    """表格编辑距离指标"""
-    
+    """Table edit distance metric."""
+
     version = "1.0.0"
     description = "Table content edit distance metric"
-    
+
     def _calculate_score(self, predicted: str, groundtruth: str,
                         predicted_content_list: List[Dict[str, Any]] = None,
                         groundtruth_content_list: List[Dict[str, Any]] = None,
                         **kwargs) -> MetricResult:
-        """计算表格内容的编辑距离"""
+        """Calculate edit distance for table content."""
 
-        # 1. 提取原始表格内容
+        # 1. Extract raw table content
         pred_raw = self._extract_table_content(predicted, predicted_content_list)
         gt_raw = self._extract_table_content(groundtruth, groundtruth_content_list)
 
-        # 2. 复用TEDSMetric的归一化方法，统一转换为HTML格式
-        teds = TEDSMetric("temp_teds")  # 实例化TEDSMetric以调用其方法
-        pred_html = teds._normalize_to_html(pred_raw)  # 调用TEDS的归一化方法
+        # 2. Reuse TEDSMetric's normalization method to convert to HTML format uniformly
+        teds = TEDSMetric("temp_teds")  # Instantiate TEDSMetric to call its methods
+        pred_html = teds._normalize_to_html(pred_raw)  # Call TEDS normalization
         gt_html = teds._normalize_to_html(gt_raw)
 
-        # 3. 基于归一化后的文本计算编辑距离
+        # 3. Calculate edit distance based on normalized text
         result = super()._calculate_score(pred_html, gt_html, **kwargs)
         result.metric_name = self.name
         result.details.update({
             "predicted_table_length": len(pred_html),
             "groundtruth_table_length": len(gt_html),
             "content_type": "table",
-            "normalization": "teds_based"  # 标记使用TEDS的归一化方法
+            "normalization": "teds_based"  # Mark as using TEDS normalization method
         })
-        
+
         return result
-    
+
     def _extract_table_content(self, text: str, content_list: List[Dict[str, Any]] = None) -> str:
-        """从文本和content_list中提取表格内容"""
-        # 使用统一的内容分割方法
+        """Extract table content from text and content_list."""
+        # Use unified content splitting method
         content_parts = self.split_content(text, content_list)
         return content_parts.get('table', '')
 
     def _extract_tables_from_content_list(self, content_list: List[Dict[str, Any]]) -> List[str]:
-        """递归从content_list中提取表格内容"""
+        """Recursively extract table content from content_list."""
         tables = []
-        
+
         def _recursive_extract(items):
             if not isinstance(items, list):
                 return
-            
+
             for item in items:
                 if not isinstance(item, dict):
                     continue
-                
-                # 检查当前项是否为表格
+
+                # Check if current item is a table
                 item_type = item.get('type', '')
                 if item_type in ['table']:
                     content = item.get('content', '')
                     if content:
                         tables.append(content)
-                
-                # 递归检查children字段
+
+                # Recursively check children field
                 children = item.get('children')
                 if children:
                     _recursive_extract(children)
-                
-                # 递归检查items字段
+
+                # Recursively check items field
                 items_field = item.get('items')
                 if items_field:
                     _recursive_extract(items_field)
-        
+
         _recursive_extract(content_list)
         return tables
 
 
 class TableTEDSMetric(TEDSMetric):
-    """表格TEDS指标"""
-    
+    """Table TEDS metric."""
+
     version = "1.0.0"
     description = "Table TEDS (Tree-Edit Distance based Similarity) metric"
-    
+
     def _calculate_score(self, predicted: str, groundtruth: str,
                         predicted_content_list: List[Dict[str, Any]] = None,
                         groundtruth_content_list: List[Dict[str, Any]] = None,
                         **kwargs) -> MetricResult:
-        """计算表格的TEDS分数"""
-        
-        # 从content_list中提取表格内容
+        """Calculate TEDS score for tables."""
+
+        # Extract table content from content_list
         pred_table = self._extract_table_content(predicted, predicted_content_list)
         gt_table = self._extract_table_content(groundtruth, groundtruth_content_list)
-        
-        # 使用父类的TEDS计算
+
+        # Use parent class TEDS calculation
         result = super()._calculate_score(pred_table, gt_table, **kwargs)
         result.metric_name = self.name
         result.details.update({
             "content_type": "table",
             "algorithm": "TEDS"
         })
-        
+
         return result
-    
+
     def _extract_table_content(self, text: str, content_list: List[Dict[str, Any]] = None) -> str:
-        """从文本和content_list中提取表格内容"""
-        # 使用统一的内容分割方法
+        """Extract table content from text and content_list."""
+        # Use unified content splitting method
         content_parts = self.split_content(text, content_list)
-        return content_parts.get('table', '') 
\ No newline at end of file
+        return content_parts.get('table', '')
\ No newline at end of file
diff --git a/webmainbench/metrics/teds_metrics.py b/webmainbench/metrics/teds_metrics.py
index 583f196..9a67e0b 100644
--- a/webmainbench/metrics/teds_metrics.py
+++ b/webmainbench/metrics/teds_metrics.py
@@ -1,29 +1,45 @@
 """
 TEDS (Tree-Edit Distance based Similarity) metrics for WebMainBench.
 
-一、核心算法升级：树编辑距离计算更精准高效
-替换自定义简化 DP 算法为专业 APTED 库
-v1 问题：自定义动态规划算法仅支持基础编辑操作，对嵌套表格（如多层表头、合并单元格）的层级差异处理不准确，且复杂表格计算效率低（DP 矩阵膨胀导致速度慢）。
-v2 优化：采用 apted 库（专门用于有序树编辑距离计算），严格遵循学术级算法，能精准识别子节点顺序、嵌套关系等复杂差异，计算效率提升 5-10 倍（100 节点内表格），彻底解决 v1 对复杂表格的误判问题。
-新增算法失败回退机制
-v1 问题：算法异常（如嵌套过深）直接返回错误，中断评测流程。
-v2 优化：apted 计算失败时，自动回退到 “节点数量差” 兜底（如预测 5 节点、真实 3 节点，距离为 2），确保批量评测不中断，鲁棒性显著提升。
-二、文本差异计算：从 “非黑即白” 到 “量化分级”
-引入 Levenshtein 文本编辑距离
-v1 问题：文本必须完全一致才判定节点相等（如 “产品 A” vs “产品 A” 因空格差异被判定为不相等），文本差异成本固定为 1.0，无法区分 “微小差异” 与 “巨大差异”。
-v2 优化：通过 rapidfuzz.distance.Levenshtein 量化文本差异，将差异归一化为 0-1 区间的成本
-三、边界场景处理：鲁棒性大幅增强
-空输入逻辑修正
-v1 问题：空字符串强制转为 <table><tr><td></td></tr></table>（无效空表格），违背 “空输入即无表格” 的语义，导致空输入与有效表格的分数计算失真。
-v2 优化：空字符串直接返回空，_parse_html_table 识别为空表格，避免生成无效 HTML 结构，空输入场景的评测结果更符合实际语义。
-节点序列化标准化
-v1 问题：用字典存储节点信息，无统一格式，易因字典键值差异导致解析异常。
-v2 优化：新增 _to_bracket_notation 方法，将节点转为 apted 兼容的 “括号表示法”（如 table(tr(th:产品))），标准化节点描述格式，消除解析格式差异问题。
-四、整体价值提升
-准确性：复杂表格（嵌套、合并单元格）的 TEDS 分数更贴近真实结构差异，文本微小差异的量化使结果更客观。
-效率：apted 库的优化算法大幅提升复杂表格的计算速度，支持更大规模批量评测。
-鲁棒性：空输入处理修正、算法失败回退机制，确保评测流程不中断，适配更多异常场景。
-灵活性：文本差异的分级量化，支持 OCR 识别误差、格式微小偏差等实际场景的评测需求。
+I. Core algorithm upgrade: more accurate and efficient tree edit distance calculation
+Replaced custom simplified DP algorithm with professional APTED library.
+v1 issue: Custom DP algorithm only supported basic edit operations; inaccurate for nested tables
+(multi-level headers, merged cells) and slow for complex tables (DP matrix expansion).
+v2 improvement: Uses apted library (dedicated to ordered tree edit distance), strictly follows
+academic-grade algorithm, accurately identifies child order, nesting, and complex differences.
+5-10x speed improvement for tables under 100 nodes, resolves v1 misclassification of complex tables.
+Added algorithm failure fallback mechanism.
+v1 issue: Algorithm exceptions (e.g. excessive nesting) returned errors and interrupted evaluation.
+v2 improvement: When apted fails, falls back to “node count difference” (e.g. predicted 5 nodes,
+actual 3 nodes, distance=2), ensuring batch evaluation is not interrupted.
+
+II. Text difference: from binary to quantified scoring
+Introduced Levenshtein text edit distance.
+v1 issue: Text must be identical for nodes to be equal (e.g. “Product A” vs “Product A” with
+whitespace difference were considered unequal); text difference cost fixed at 1.0.
+v2 improvement: Uses rapidfuzz.distance.Levenshtein to quantify text differences,
+normalizing them to 0-1 cost range.
+
+III. Edge case handling: greatly improved robustness
+Empty input correction.
+v1 issue: Empty strings were forced into <table><tr><td></td></tr></table> (invalid empty table),
+violating the semantics of “empty input = no table”, distorting score calculation.
+v2 improvement: Empty strings return empty directly; _parse_html_table recognizes empty tables,
+avoiding invalid HTML structures.
+Node serialization standardization.
+v1 issue: Node info stored in dicts without unified format, prone to key-value parsing errors.
+v2 improvement: Added _to_bracket_notation to convert nodes to apted-compatible bracket notation
+(e.g. table(tr(th:Product))), eliminating parsing format discrepancies.
+
+IV. Overall value improvement
+Accuracy: TEDS scores for complex tables (nested, merged cells) better reflect true structural
+differences; quantified text differences make results more objective.
+Efficiency: APTED optimized algorithm greatly improves speed for complex tables, supporting
+larger-scale batch evaluation.
+Robustness: Empty input handling correction and algorithm failure fallback ensure evaluation
+pipeline is not interrupted, adaptable to more edge cases.
+Flexibility: Quantified text differences support evaluation needs for OCR recognition errors
+and minor format deviations.
 """
 
 from typing import Dict, Any, List, Optional
@@ -42,7 +58,7 @@ def insert(self, node):
         return 1
 
     def rename(self, node1, node2):
-        # 解析节点标签，格式为 "tag:text"
+        # Parse node label, format: "tag:text"
         tag1, text1 = self._parse_node(node1)
         tag2, text2 = self._parse_node(node2)
 
@@ -52,11 +68,11 @@ def rename(self, node1, node2):
         if text1 == text2:
             return 0
 
-        # 计算文本编辑距离
+        # Calculate text edit distance
         return self._levenshtein(text1, text2)
 
     def _parse_node(self, node_str):
-        """解析节点字符串，格式为 'tag:text' 或 'tag'"""
+        """Parse node string in 'tag:text' or 'tag' format."""
         if ':' in node_str:
             tag, text = node_str.split(':', 1)
             return tag, text
@@ -64,23 +80,23 @@ def _parse_node(self, node_str):
             return node_str, ""
 
     def _levenshtein(self, a, b):
-        """计算文本编辑距离"""
+        """Calculate text edit distance."""
         if not a and not b:
             return 0
         if not a:
-            return len(b)  # 空字符串到非空字符串，成本为字符串长度
+            return len(b)  # cost from empty string to non-empty string is string length
         if not b:
-            return len(a)  # 非空字符串到空字符串，成本为字符串长度
+            return len(a)  # cost from non-empty string to empty string is string length
 
-        # 计算原始编辑距离
+        # Calculate raw edit distance
         raw_distance = Levenshtein.distance(a, b)
 
-        # 使用与text_metrics.py相同的归一化方式：基于较长字符串的长度
+        # Normalize using same approach as text_metrics.py: based on length of longer string
         max_len = max(len(a), len(b))
         if max_len == 0:
             return 0
 
-        # 归一化到0-1范围
+        # Normalize to 0-1 range
         return raw_distance / max_len
 
 
@@ -131,9 +147,9 @@ def _calculate_score(self, predicted: Any, groundtruth: Any, **kwargs) -> Metric
             edit_distance = self._tree_edit_distance(pred_tree, gt_tree)
             max_nodes = max(self._count_nodes(pred_tree), self._count_nodes(gt_tree))
 
-            # 使用归一化方式计算TEDS分数
+            # Calculate TEDS score using normalized formula
             if max_nodes > 0:
-                # 标准TEDS公式：1.0 - (edit_distance / max_nodes)
+                # Standard TEDS formula: 1.0 - (edit_distance / max_nodes)
                 teds_score = 1.0 - (edit_distance / max_nodes)
             else:
                 teds_score = 1.0
@@ -162,7 +178,7 @@ def _normalize_to_html(self, table_data: Any) -> str:
         if table_data is None:
             return ""
         if isinstance(table_data, str):
-            # 空字符串应该保持为空，不应该转换成默认HTML
+            # Empty strings should remain empty, not converted to default HTML
             if not table_data.strip():
                 return ""
             if '<table' in table_data.lower():
@@ -235,47 +251,47 @@ def _element_to_tree(self, element) -> Dict:
         return tree
 
     def _tree_edit_distance(self, tree1: Dict, tree2: Dict) -> float:
-        """使用APTED计算树编辑距离"""
+        """Compute tree edit distance using APTED."""
         try:
-            # 转换为APTED的括号表示法
+            # Convert to APTED bracket notation
             t1 = self._to_bracket_notation(tree1)
             t2 = self._to_bracket_notation(tree2)
 
-            # 使用APTED计算编辑距离
+            # Compute edit distance using APTED
             apted = APTED(t1, t2, self.config_apted)
             edit_distance = apted.compute_edit_distance()
 
             return float(edit_distance)
         except Exception as e:
-            # 如果APTED失败，回退到简单的节点计数差异
+            # If APTED fails, fall back to simple node count difference
             print(f"APTED calculation failed: {e}, falling back to simple distance")
             nodes1 = self._count_nodes(tree1)
             nodes2 = self._count_nodes(tree2)
             return abs(nodes1 - nodes2)
 
     def _to_bracket_notation(self, node: Dict) -> str:
-        """将字典树转换为APTED的括号表示法"""
-        # 构建节点标签
+        """Convert dict tree to APTED bracket notation."""
+        # Build node label
         tag = node['tag']
         text = node.get('text', '')
 
-        # 在结构模式下，忽略文本内容
+        # In structure-only mode, ignore text content
         if self.structure_only:
             label = tag
         else:
-            # 在完整模式下，包含文本内容
+            # In full mode, include text content
             if text:
-                # 处理文本中的特殊字符，避免APTED解析错误
+                # Escape special characters to avoid APTED parsing errors
                 safe_text = text.replace('(', '[').replace(')', ']').replace(',', ';')
                 label = f"{tag}:{safe_text}"
             else:
                 label = tag
 
-        # 如果没有子节点，返回标签
+        # If no children, return the label
         if not node.get('children'):
             return label
 
-        # 有子节点，递归处理
+        # Recursively process children
         children_str = ",".join([self._to_bracket_notation(c) for c in node['children']])
         return f"{label}({children_str})"
 
diff --git a/webmainbench/metrics/text_metrics.py b/webmainbench/metrics/text_metrics.py
index c3c66b3..fda8e14 100644
--- a/webmainbench/metrics/text_metrics.py
+++ b/webmainbench/metrics/text_metrics.py
@@ -46,7 +46,7 @@ def _calculate_score(self, predicted: str, groundtruth: str, **kwargs) -> Metric
             # else:
             #     score = 1.0 - (distance / max_len)
             if max_len == 0:
-                # 两者都为空时标记为失败
+                # Mark as failed when both are empty
                 return MetricResult.create_error_result(
                     self.name,
                     "Both predicted and groundtruth are empty"
@@ -215,22 +215,22 @@ def _calculate_score(self, predicted: str, groundtruth: str, **kwargs) -> Metric
 
 
 class CodeEditMetric(EditDistanceMetric):
-    """代码编辑距离指标"""
-    
+    """Code edit distance metric"""
+
     version = "1.0.0"
     description = "Code block edit distance metric"
-    
-    def _calculate_score(self, predicted: str, groundtruth: str, 
+
+    def _calculate_score(self, predicted: str, groundtruth: str,
                         predicted_content_list: List[Dict[str, Any]] = None,
                         groundtruth_content_list: List[Dict[str, Any]] = None,
                         **kwargs) -> MetricResult:
-        """计算代码块的编辑距离"""
-        
-        # 从content_list中提取代码内容
+        """Calculate edit distance for code blocks"""
+
+        # Extract code content from content_list
         pred_code = self._extract_code_content(predicted, predicted_content_list)
         gt_code = self._extract_code_content(groundtruth, groundtruth_content_list)
-        
-        # 计算编辑距离
+
+        # Calculate edit distance
         result = super()._calculate_score(pred_code, gt_code, **kwargs)
         result.metric_name = self.name
         result.details.update({
@@ -238,65 +238,65 @@ def _calculate_score(self, predicted: str, groundtruth: str,
             "groundtruth_code_length": len(gt_code),
             "content_type": "code"
         })
-        
+
         return result
-    
+
     def _extract_code_content(self, text: str, content_list: List[Dict[str, Any]] = None) -> str:
-        """从文本和content_list中提取代码内容"""
-        # 使用统一的内容分割方法
+        """Extract code content from text and content_list"""
+        # Use the unified content splitting method
         content_parts = self.split_content(text, content_list)
         return content_parts.get('code', '')
-    
+
     def _extract_codes_from_content_list(self, content_list: List[Dict[str, Any]]) -> List[str]:
-        """递归从content_list中提取代码内容"""
+        """Recursively extract code content from content_list"""
         codes = []
-        
+
         def _recursive_extract(items):
             if not isinstance(items, list):
                 return
-            
+
             for item in items:
                 if not isinstance(item, dict):
                     continue
-                
-                # 检查当前项是否为代码
+
+                # Check if the current item is code
                 item_type = item.get('type', '')
                 if item_type in ['code']:
                     content = item.get('content', '')
                     if content:
                         codes.append(content)
-                
-                # 递归检查children字段
+
+                # Recursively check the children field
                 children = item.get('children')
                 if children:
                     _recursive_extract(children)
-                
-                # 递归检查items字段
+
+                # Recursively check the items field
                 items_field = item.get('items')
                 if items_field:
                     _recursive_extract(items_field)
-        
+
         _recursive_extract(content_list)
         return codes
 
 
 class TextEditMetric(EditDistanceMetric):
-    """纯文本编辑距离指标（除代码、表格、公式外的文本）"""
-    
+    """Pure text edit distance metric (excluding code, tables, and formulas)"""
+
     version = "1.0.0"
     description = "Pure text edit distance metric (excluding code, tables, formulas)"
-    
+
     def _calculate_score(self, predicted: str, groundtruth: str,
                         predicted_content_list: List[Dict[str, Any]] = None,
                         groundtruth_content_list: List[Dict[str, Any]] = None,
                         **kwargs) -> MetricResult:
-        """计算纯文本的编辑距离"""
-        
-        # 从文本中移除代码、表格、公式
+        """Calculate edit distance for pure text"""
+
+        # Remove code, tables, and formulas from text
         pred_text = self._extract_pure_text(predicted, predicted_content_list)
         gt_text = self._extract_pure_text(groundtruth, groundtruth_content_list)
-        
-        # 计算编辑距离
+
+        # Calculate edit distance
         result = super()._calculate_score(pred_text, gt_text, **kwargs)
         result.metric_name = self.name
         result.details.update({
@@ -304,51 +304,51 @@ def _calculate_score(self, predicted: str, groundtruth: str,
             "groundtruth_text_length": len(gt_text),
             "content_type": "text"
         })
-        
+
         return result
-    
+
     def _extract_pure_text(self, text: str, content_list: List[Dict[str, Any]] = None) -> str:
-        """提取纯文本内容（排除代码、表格、公式）"""
-        # 使用统一的内容分割方法
+        """Extract pure text content (excluding code, tables, and formulas)"""
+        # Use the unified content splitting method
         content_parts = self.split_content(text, content_list)
         return content_parts.get('text', '')
-    
+
     def _extract_text_from_content_list(self, content_list: List[Dict[str, Any]]) -> List[str]:
-        """递归从content_list中提取纯文本内容（排除代码、表格、公式）"""
+        """Recursively extract pure text content from content_list (excluding code, tables, formulas)"""
         texts = []
-        
+
         def _recursive_extract(items):
             if not isinstance(items, list):
                 return
-            
+
             for item in items:
                 if not isinstance(item, dict):
                     continue
-                
-                # 检查当前项是否为纯文本内容
+
+                # Check if the current item is pure text content
                 item_type = item.get('type', '')
-                # 排除代码、表格、公式等特殊内容类型
+                # Exclude special content types such as code, tables, and formulas
                 if item_type in ['paragraph', 'heading', 'text', 'list_item', 'list-item']:
                     content = item.get('content', '')
                     if content:
                         texts.append(content)
-                
-                # 递归检查children字段
+
+                # Recursively check the children field
                 children = item.get('children')
                 if children:
                     _recursive_extract(children)
-                
-                # 递归检查items字段
+
+                # Recursively check the items field
                 items_field = item.get('items')
                 if items_field:
                     _recursive_extract(items_field)
-        
+
         _recursive_extract(content_list)
         return texts
 
 
 class TextRougeNgramMetric(BaseMetric):
-    """文本Rouge-Ngram相似度指标"""
+    """Text Rouge-Ngram similarity metric"""
     
     version = "1.0.0"
     description = "Text Rouge-Ngram similarity metric"
@@ -411,7 +411,7 @@ def calc_rouge_n_score(self, target_input: str, prediction_input: str, n: int =
 
         score = self._score_ngrams(target_ngrams, prediction_ngrams)
 
-        # 将scoress转换为rouge-L的precision, recall, f1-score
+        # Convert scores to precision, recall, f1-score in ROUGE-L format
         result = {'prec': score.precision, 'rec': score.recall, 'f1': score.fmeasure}
         return result
 
diff --git a/webmainbench/utils/main_html.py b/webmainbench/utils/main_html.py
index 0aa5f61..238aee2 100644
--- a/webmainbench/utils/main_html.py
+++ b/webmainbench/utils/main_html.py
@@ -64,11 +64,11 @@ def walk_tree_to_add_elements(element: html.HtmlElement):
         if 'display: none' in style_attr or 'display:none' in style_attr:
             return
         if element.get(SELECT_ATTR) == 'true':
-            # 当该元素本身被selected的，则以该元素为根的子树中的所有元素都保留
+            # When the element itself is selected, keep all elements in the subtree rooted at it
             for item in element.iter():
                 elements_to_remained.add(item)
         else:
-            # 检查该元素的子元素是否被selected
+            # Check if child elements of this element are selected
             for item in element.iterchildren():
                 walk_tree_to_add_elements(item)