Commit 3cd77d9d 3cd77d9dce4d84fe01a5d01f440a9156482a059e by 沈秋雨

简化测试流程

1 parent 5a879284
......@@ -57,18 +57,21 @@ cp .env.example .env
```bash
python scripts/00_create_kb.py
python scripts/00_check_models.py
python scripts/01_upload_docs.py
python scripts/02_wait_ingestion.py
python scripts/03_export_chunks.py
python scripts/04_parse_docs.py
python scripts/05_generate_testset.py
python scripts/06_review_testset.py
python scripts/07_run_weknora_qa.py
python scripts/08_build_ragas_input.py
python scripts/09_run_ragas_eval.py
python scripts/10_report.py
python workflows/01_ingest_export.py
python workflows/02_generate_testset.py
python workflows/03_review_testset.py
python workflows/04_evaluate_report.py
```
合并后的 workflow 只调用原有脚本,不复制业务逻辑。新旧步骤对应关系:
- `workflows/01_ingest_export.py` = `scripts/01_upload_docs.py` + `scripts/02_wait_ingestion.py` + `scripts/03_export_chunks.py`
- `workflows/02_generate_testset.py` = `scripts/04_parse_docs.py` + `scripts/05_generate_testset.py`
- `workflows/03_review_testset.py` = `scripts/06_review_testset.py`
- `workflows/04_evaluate_report.py` = `scripts/07_run_weknora_qa.py` + `scripts/08_build_ragas_input.py` + `scripts/09_run_ragas_eval.py` + `scripts/10_report.py`
每个 workflow 结束时都会打印本阶段生成的文件路径。
首轮建议只使用 2 个 PDF、1 个 XLSX 和 10 条审核通过 QA,确认 `retrieved_contexts``response`、Ragas 输入字段都正常后再扩展样本量。
默认 `04_parse_docs.py` 从 WeKnora 导出的 `data/exported/chunks.jsonl` 构造测试集来源,不再重复调用外部 PDF 解析器。`05_generate_testset.py` 默认使用 Ragas 结合评估侧 LLM 自动生成 QA;生成阶段使用 `TESTSET_RAGAS_MODE=direct`,直接把 WeKnora chunks 组装成 Ragas KnowledgeGraph 并生成单跳 QA,避免 Ragas 默认文档预处理链路重新抽标题、摘要和实体。生成阶段还会用 `TESTSET_MAX_DOCUMENT_CHARS` 限制单条来源上下文长度,用 `TESTSET_GENERATOR_MAX_TOKENS` 控制生成输出预算,并按来源文件轮询抽样,避免测试集集中在单个文件。`local``mineru``rule_based` 只作为可选实验/兜底配置保留。
......
......@@ -150,30 +150,19 @@ cp /path/to/*.xlsx data/raw_docs/xlsx/
按顺序执行:
```bash
python scripts/01_upload_docs.py
python scripts/02_wait_ingestion.py
python scripts/03_export_chunks.py
python scripts/04_parse_docs.py
python scripts/05_generate_testset.py
python scripts/06_review_testset.py
python scripts/07_run_weknora_qa.py
python scripts/08_build_ragas_input.py
python scripts/09_run_ragas_eval.py
python scripts/10_report.py
python workflows/01_ingest_export.py
python workflows/02_generate_testset.py
python workflows/03_review_testset.py
python workflows/04_evaluate_report.py
```
说明:
- `01_upload_docs.py` 上传 `data/raw_docs/` 下的 PDF/XLSX,也兼容 `pdf/``xlsx/` 子目录。
- `02_wait_ingestion.py` 等待 WeKnora 解析完成。
- `03_export_chunks.py` 导出 WeKnora chunks。
- `04_parse_docs.py` 默认从 WeKnora 导出的 chunks 构造 Ragas 测试集来源,不再重复解析原始 PDF。
- `05_generate_testset.py` 默认使用 Ragas 结合评估侧 LLM 生成候选 QA。
- `06_review_testset.py` 当前会把候选 QA 标为 approved,后续可替换为人工审核。
- `07_run_weknora_qa.py` 逐条调用 WeKnora 问答并解析 SSE。
- `08_build_ragas_input.py` 合并 QA 和 WeKnora 输出。
- `09_run_ragas_eval.py` 调用 Ragas 打分。
- `10_report.py` 生成 Markdown 报告。
- `workflows/01_ingest_export.py` 对齐原 `01_upload_docs.py``02_wait_ingestion.py``03_export_chunks.py`:上传原始文件、等待 WeKnora 解析、导出 chunks。
- `workflows/02_generate_testset.py` 对齐原 `04_parse_docs.py``05_generate_testset.py`:从 chunks 构造测试集来源并生成候选 QA。
- `workflows/03_review_testset.py` 对齐原 `06_review_testset.py`:把候选 QA 标为 approved,后续可替换为人工审核。
- `workflows/04_evaluate_report.py` 对齐原 `07_run_weknora_qa.py``08_build_ragas_input.py``09_run_ragas_eval.py``10_report.py`:逐条调用 WeKnora 问答、构造 Ragas 输入、调用 Ragas 打分、生成 Markdown 报告。
- 每个 workflow 结束时会打印本阶段生成的文件路径。
## 6. 产物验收
......
from __future__ import annotations
import sys
from _runner import print_artifacts, run_scripts
SCRIPTS = [
"scripts/01_upload_docs.py",
"scripts/02_wait_ingestion.py",
"scripts/03_export_chunks.py",
]
ARTIFACTS = [
"data/exported/knowledge_uploads.jsonl",
"data/exported/failed_uploads.jsonl",
"data/exported/knowledge.jsonl",
"data/exported/chunks.jsonl",
]
def main() -> int:
code = run_scripts(SCRIPTS)
print_artifacts(ARTIFACTS)
return code
if __name__ == "__main__":
sys.exit(main())
from __future__ import annotations
import sys
from _runner import print_artifacts, run_scripts
SCRIPTS = [
"scripts/04_parse_docs.py",
"scripts/05_generate_testset.py",
]
ARTIFACTS = [
"data/parsed_docs/documents.jsonl",
"data/parsed_docs/failed_parse.jsonl",
"data/parsed_docs/parse_summary.json",
"data/testsets/testset.raw.jsonl",
]
def main() -> int:
code = run_scripts(SCRIPTS)
print_artifacts(ARTIFACTS)
return code
if __name__ == "__main__":
sys.exit(main())
from __future__ import annotations
import sys
from _runner import print_artifacts, run_scripts
SCRIPTS = ["scripts/06_review_testset.py"]
ARTIFACTS = ["data/testsets/testset.reviewed.jsonl"]
def main() -> int:
code = run_scripts(SCRIPTS)
print_artifacts(ARTIFACTS)
return code
if __name__ == "__main__":
sys.exit(main())
from __future__ import annotations
import sys
from _runner import print_artifacts, run_scripts
SCRIPTS = [
"scripts/07_run_weknora_qa.py",
"scripts/08_build_ragas_input.py",
"scripts/09_run_ragas_eval.py",
"scripts/10_report.py",
]
ARTIFACTS = [
"data/runs/weknora_answers.jsonl",
"data/runs/failed_requests.jsonl",
"data/runs/ragas_input.jsonl",
"data/reports/ragas_scores.csv",
"data/reports/summary.md",
]
def main() -> int:
code = run_scripts(SCRIPTS)
print_artifacts(ARTIFACTS)
return code
if __name__ == "__main__":
sys.exit(main())
from __future__ import annotations
import importlib.util
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SCRIPTS_DIR = ROOT / "scripts"
if str(SCRIPTS_DIR) not in sys.path:
sys.path.insert(0, str(SCRIPTS_DIR))
def run_script(path: str) -> int:
script_path = ROOT / path
module_name = f"_workflow_{script_path.stem}"
spec = importlib.util.spec_from_file_location(module_name, script_path)
if spec is None or spec.loader is None:
raise RuntimeError(f"Cannot load script: {script_path}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
if not hasattr(module, "main"):
raise RuntimeError(f"Script has no main(): {script_path}")
result = module.main()
return int(result or 0)
def run_scripts(paths: list[str]) -> int:
for path in paths:
print(f"\n==> Running {path}")
code = run_script(path)
if code != 0:
print(f"Stopped at {path} with exit code {code}")
return code
return 0
def print_artifacts(paths: list[str]) -> None:
existing = [ROOT / path for path in paths if (ROOT / path).exists()]
if not existing:
print("\nGenerated files: none")
return
print("\nGenerated files:")
for path in existing:
print(f"- {path.relative_to(ROOT)}")