Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
全局配置文件
"""

# LLM配置,用于修正抽取工具的抽取结果
LLM_CONFIG = {
'llm_base_url': '',
'llm_api_key': '',
'llm_model': 'deepseek-chat',
'use_llm': True
}
10 changes: 2 additions & 8 deletions examples/multi_extractor_compare.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
from webmainbench import DataLoader, Evaluator, ExtractorFactory, DataSaver
from pathlib import Path

# 全局LLM配置
LLM_CONFIG = {
'llm_base_url': '',
'llm_api_key': '',
'llm_model': '',
'use_llm': True
}
# 如需调用LLM修正抽取结果,在config.py中配置 LLM api

def all_extractor_comparison():
"""演示多抽取器对比"""

print("\n=== 多抽取器对比演示 ===\n")

# 创建数据集
dataset_path = Path("../data/test_math.jsonl")
dataset_path = Path("../data/WebMainBench_llm-webkit_v1_WebMainBench_7887_within_formula.jsonl")
dataset = DataLoader.load_jsonl(dataset_path)

# 创建webkit抽取器
Expand Down
2 changes: 1 addition & 1 deletion webmainbench/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def _extract_from_markdown(text: str, field_name: str = None) -> Dict[str, str]:
return {'code': '', 'formula': '', 'table': '', 'text': ''}

# 加载 llm 配置
from examples.multi_extractor_compare import LLM_CONFIG
from config import LLM_CONFIG
# 直接创建具体的提取器实例
from .code_extractor import CodeSplitter
from .formula_extractor import FormulaSplitter
Expand Down