diff --git a/examples/multi_extractor_compare.py b/examples/multi_extractor_compare.py index c02bae2..daa7f5f 100644 --- a/examples/multi_extractor_compare.py +++ b/examples/multi_extractor_compare.py @@ -1,13 +1,7 @@ from webmainbench import DataLoader, Evaluator, ExtractorFactory, DataSaver from pathlib import Path -# 全局LLM配置 -LLM_CONFIG = { - 'llm_base_url': '', - 'llm_api_key': '', - 'llm_model': '', - 'use_llm': True -} +# 如需调用LLM修正抽取结果,在 webmainbench/config.py 中配置 LLM api def all_extractor_comparison(): """演示多抽取器对比""" @@ -15,7 +9,7 @@ def all_extractor_comparison(): print("\n=== 多抽取器对比演示 ===\n") # 创建数据集 - dataset_path = Path("../data/test_math.jsonl") + dataset_path = Path("../data/WebMainBench_llm-webkit_v1_WebMainBench_7887_within_formula.jsonl") dataset = DataLoader.load_jsonl(dataset_path) # 创建webkit抽取器 diff --git a/webmainbench/config.py b/webmainbench/config.py new file mode 100644 index 0000000..d04ddd0 --- /dev/null +++ b/webmainbench/config.py @@ -0,0 +1,11 @@ +""" +全局配置文件 +""" + +# LLM配置,用于修正抽取工具的抽取结果 +LLM_CONFIG = { + 'llm_base_url': '', + 'llm_api_key': '', + 'llm_model': 'deepseek-chat', + 'use_llm': True +} diff --git a/webmainbench/evaluator/evaluator.py b/webmainbench/evaluator/evaluator.py index 3a6cc76..a66fbc3 100644 --- a/webmainbench/evaluator/evaluator.py +++ b/webmainbench/evaluator/evaluator.py @@ -4,7 +4,7 @@ from dataclasses import dataclass from typing import Dict, Any, List, Optional, Union, Iterator -import time +import time, sys import itertools from datetime import datetime from pathlib import Path @@ -85,10 +85,78 @@ def __init__(self, metric_config: Dict[str, Any] = None): Args: metric_config: Configuration for metrics """ + + self._validate_llm_config() + self.metric_calculator = MetricCalculator(metric_config) self.metric_config = metric_config or {} - - def evaluate(self, + + def _validate_llm_config(self): + """验证LLM配置的完整性和有效性""" + import time + from ..config import LLM_CONFIG + + if LLM_CONFIG.get('use_llm', False): + # 检查配置完整性 + if not LLM_CONFIG.get('llm_base_url') or not LLM_CONFIG.get('llm_api_key'): + print("\n" + "=" * 60) + print("❌ 错误:LLM配置不完整!") + print("-" * 60) + print("当前 use_llm = True,但缺少必要的API配置。") + print("\n请在 webmainbench/config.py 中完成以下配置:") + print(" 1. llm_base_url (例如: 'https://api.deepseek.com')") + print(" 2. llm_api_key (例如: 'sk-xxxxxxxxxxxx')") + print("\n或者设置 use_llm = False 来禁用LLM功能。") + print("=" * 60 + "\n") + sys.exit(1) + + # 验证API有效性 + try: + from openai import OpenAI + + print("正在验证LLM API配置...") + client = OpenAI( + base_url=LLM_CONFIG.get('llm_base_url'), + api_key=LLM_CONFIG.get('llm_api_key') + ) + + # 发送测试请求 + response = client.chat.completions.create( + model=LLM_CONFIG.get('llm_model', 'deepseek-chat'), + messages=[{"role": "user", "content": "test"}], + max_tokens=5, + temperature=0 + ) + + print("✅ LLM API配置验证成功!\n使用 基础方案➕LLM增强提取效果 进行评测。") + + except Exception as e: + print("\n" + "=" * 60) + print("❌ 错误:LLM API配置无效!") + print("-" * 60) + print(f"验证失败原因: {str(e)}") + print("\n请检查 webmainbench/config.py 中的配置:") + print(" 1. llm_base_url 是否正确") + print(" 2. llm_api_key 是否有效") + print(" 3. llm_model 是否支持") + print(" 4. 网络连接是否正常") + print("\n或者设置 use_llm = False 来禁用LLM功能。") + print("=" * 60 + "\n") + sys.exit(1) + else: + # 未启用LLM的提示 + print("\n" + "=" * 60) + print("⚠️ 注意:当前未启用LLM增强提取效果功能") + print(" 如需启用LLM增强提取效果,请在 webmainbench/config.py 中配置:") + print(" - 设置 use_llm = True") + print(" - 填写 llm_base_url") + print(" - 填写 llm_api_key") + print("=" * 60) + print(" (5秒后使用基础方案进行对比...)") + time.sleep(5) + print() + + def evaluate(self, dataset: BenchmarkDataset, extractor: Union[BaseExtractor, str], extractor_config: Dict[str, Any] = None, diff --git a/webmainbench/metrics/base.py b/webmainbench/metrics/base.py index 46b23c6..41b3e2d 100644 --- a/webmainbench/metrics/base.py +++ b/webmainbench/metrics/base.py @@ -197,7 +197,8 @@ def _extract_from_markdown(text: str, field_name: str = None) -> Dict[str, str]: return {'code': '', 'formula': '', 'table': '', 'text': ''} # 加载 llm 配置 - from examples.multi_extractor_compare import LLM_CONFIG + from ..config import LLM_CONFIG + # 直接创建具体的提取器实例 from .code_extractor import CodeSplitter from .formula_extractor import FormulaSplitter diff --git a/webmainbench/metrics/base_content_splitter.py b/webmainbench/metrics/base_content_splitter.py index 0b9ced6..c0685a9 100644 --- a/webmainbench/metrics/base_content_splitter.py +++ b/webmainbench/metrics/base_content_splitter.py @@ -52,7 +52,6 @@ def should_use_llm(self, field_name: str) -> bool: # 默认逻辑:对groundtruth内容不使用LLM,对其他内容使用 if field_name == "groundtruth_content": - print(f"[DEBUG] 检测到groundtruth内容,不使用LLM") return False return True