Commit d808af81 d808af81eba4ee3cf7b1ef5ee64e29b0b0d9df06 by 沈秋雨

评估使用agent模式检索

1 parent 7ba212d1
......@@ -2,6 +2,10 @@ WEKNORA_BASE_URL=http://localhost:8080/api/v1
WEKNORA_API_KEY=
WEKNORA_KB_ID=
WEKNORA_KB_NAME=ragas-eval-pilot
WEKNORA_AGENT_ID=builtin-quick-answer
WEKNORA_AGENT_ENABLED=false
WEKNORA_WEB_SEARCH_ENABLED=false
WEKNORA_SUMMARY_MODEL_ID=
# MinerU HTTP parser. Use cpu, cuda, cuda:0, etc. according to the deployed
# MinerU backend.
......
......@@ -613,7 +613,7 @@ Content-Type: application/json
请求:
```http
POST /api/v1/knowledge-chat/{session_id}
POST /api/v1/agent-chat/{session_id}
X-API-Key: <api-key>
Content-Type: application/json
```
......@@ -623,6 +623,8 @@ Content-Type: application/json
```json
{
"query": "合同中的付款期限是什么?",
"agent_id": "builtin-quick-answer",
"agent_enabled": false,
"knowledge_base_ids": ["kb-0001"],
"disable_title": true,
"enable_memory": false,
......@@ -635,6 +637,8 @@ Content-Type: application/json
```json
{
"query": "合同中的付款期限是什么?",
"agent_id": "builtin-quick-answer",
"agent_enabled": false,
"knowledge_ids": ["knowledge-0001"],
"disable_title": true,
"enable_memory": false,
......@@ -682,7 +686,7 @@ data: {
event: message
data: {
"id": "request-0001",
"response_type": "answer",
"response_type": "final_answer",
"content": "合同约定,付款期限为收到合法有效发票后30日内。",
"done": false,
"knowledge_references": null
......@@ -695,7 +699,7 @@ data: {
event: message
data: {
"id": "request-0001",
"response_type": "answer",
"response_type": "final_answer",
"content": "",
"done": true,
"knowledge_references": null
......@@ -887,9 +891,9 @@ Content-Type: application/json
对每条审核通过的 QA:
1. 创建一个干净 session。
2. 调用 `POST /knowledge-chat/{session_id}`
2. 调用 `POST /agent-chat/{session_id}`,默认使用 `agent_id=builtin-quick-answer`
3. 解析 SSE 中的 references 事件。
4. 解析 SSE 中的 answer 事件。
4. 解析 SSE 中的 final_answer 事件。
5. 构造一条 Ragas 输入记录。
`data/runs/ragas_input.jsonl`
......@@ -1040,9 +1044,9 @@ Content-Type: application/json
### 阶段 6:运行 WeKnora QA
- [ ] 每条 QA 创建一个干净 session。
- [ ] 调用 `knowledge-chat`
- [ ] 调用 `agent-chat`,默认使用 `builtin-quick-answer`
- [ ] 解析 SSE references 事件。
- [ ] 解析 SSE answer 事件。
- [ ] 解析 SSE final_answer 事件。
- [ ] 按 chunk ID 去重引用。
- [ ] 保存原始答案和引用。
- [ ] 记录空答案失败。
......
......@@ -84,6 +84,8 @@ python workflows/04_evaluate_report.py
首轮建议只使用 2 个 PDF、1 个 XLSX 和 10 条审核通过 QA,确认 `retrieved_contexts``response`、Ragas 输入字段都正常后再扩展样本量。
WeKnora 回答收集阶段使用 `POST /api/v1/agent-chat/{session_id}`,默认 `WEKNORA_AGENT_ID=builtin-quick-answer``WEKNORA_AGENT_ENABLED=false`,即使用内置快速问答的 RAG 模式并携带 `WEKNORA_KB_ID`。如需评测智能推理链路,可改为 `builtin-smart-reasoning` 并开启 `WEKNORA_AGENT_ENABLED=true`
默认 `04_parse_docs.py` 从 WeKnora 导出的 `data/exported/chunks.jsonl` 构造测试集来源,不再重复调用外部 PDF 解析器。`05_generate_testset.py` 默认使用 Ragas 结合评估侧 LLM 自动生成 QA;生成阶段使用 `TESTSET_RAGAS_MODE=direct`,直接把 WeKnora chunks 组装成 Ragas KnowledgeGraph 并生成单跳 QA,避免 Ragas 默认文档预处理链路重新抽标题、摘要和实体。生成阶段还会用 `TESTSET_MAX_DOCUMENT_CHARS` 限制单条来源上下文长度,用 `TESTSET_GENERATOR_MAX_TOKENS` 控制生成输出预算,并按来源文件轮询抽样,避免测试集集中在单个文件。`local``mineru``rule_based` 只作为可选实验/兜底配置保留。
## 主要产物
......
......@@ -317,7 +317,8 @@ python scripts/07_run_weknora_qa.py
- 知识库是否解析完成。
- chunks 是否导出非空。
- WeKnora 问答 SSE 是否返回 `references` 事件。
- WeKnora `agent-chat` 问答 SSE 是否返回 `references` 事件。
- `.env``WEKNORA_AGENT_ID` 是否为 `builtin-quick-answer`,且 `WEKNORA_AGENT_ENABLED=false`
- `data/runs/failed_requests.jsonl` 中是否记录 `empty_retrieval`
## 8. 扩大样本规模
......
......@@ -50,6 +50,10 @@ parsing:
qa:
one_session_per_question: true
agent_id: "${WEKNORA_AGENT_ID:-builtin-quick-answer}"
agent_enabled: "${WEKNORA_AGENT_ENABLED:-false}"
web_search_enabled: "${WEKNORA_WEB_SEARCH_ENABLED:-false}"
summary_model_id: "${WEKNORA_SUMMARY_MODEL_ID:-}"
disable_title: true
enable_memory: false
channel: "api"
......
......@@ -24,9 +24,13 @@ def main() -> int:
session_id = session.get("id")
if not session_id:
raise RuntimeError(f"create_session returned no id for {sample_id}")
result = client.knowledge_chat_sse(
result = client.agent_chat_sse(
session_id=session_id,
query=row["user_input"],
agent_id=str(qa_config.get("agent_id", "builtin-quick-answer")),
agent_enabled=bool(qa_config.get("agent_enabled", False)),
web_search_enabled=bool(qa_config.get("web_search_enabled", False)),
summary_model_id=qa_config.get("summary_model_id") or None,
disable_title=bool(qa_config.get("disable_title", True)),
enable_memory=bool(qa_config.get("enable_memory", False)),
channel=str(qa_config.get("channel", "api")),
......
......@@ -201,6 +201,44 @@ class WeKnoraClient:
"raw_events": raw_events,
}
def agent_chat_sse(
self,
*,
session_id: str,
query: str,
agent_id: str,
agent_enabled: bool = False,
knowledge_ids: list[str] | None = None,
knowledge_base_ids: list[str] | None = None,
mentioned_items: list[dict[str, Any]] | None = None,
web_search_enabled: bool | None = False,
summary_model_id: str | None = None,
disable_title: bool = True,
enable_memory: bool = False,
channel: str = "api",
) -> dict[str, Any]:
payload: dict[str, Any] = {
"query": query,
"agent_id": agent_id,
"agent_enabled": agent_enabled,
"disable_title": disable_title,
"enable_memory": enable_memory,
"channel": channel,
}
if web_search_enabled is not None:
payload["web_search_enabled"] = web_search_enabled
if summary_model_id:
payload["summary_model_id"] = summary_model_id
if mentioned_items:
payload["mentioned_items"] = mentioned_items
if knowledge_ids:
payload["knowledge_ids"] = knowledge_ids
else:
self._ensure_knowledge_base_id()
payload["knowledge_base_ids"] = knowledge_base_ids or [self.knowledge_base_id]
return self._chat_sse(f"agent-chat/{session_id}", payload)
def load_messages(self, session_id: str, *, limit: int = 10) -> list[dict[str, Any]]:
payload = self._json_request("GET", f"messages/{session_id}/load", params={"limit": limit})
if isinstance(payload, list):
......@@ -223,6 +261,55 @@ class WeKnoraClient:
data = self._json_request("POST", "knowledge-search", json=payload)
return data if isinstance(data, list) else []
def _chat_sse(self, path: str, payload: dict[str, Any]) -> dict[str, Any]:
url = self._url(path)
response = self.session.post(
url,
json=payload,
timeout=self.timeout_seconds,
stream=True,
headers={"Accept": "text/event-stream"},
)
if response.status_code >= 400:
self._log_error("POST", url, response)
raise WeKnoraApiError(f"POST {url} failed with HTTP {response.status_code}")
answer_parts: list[str] = []
references: list[dict[str, Any]] = []
raw_events: list[dict[str, Any]] = []
request_id: str | None = None
seen_reference_ids: set[str] = set()
for event in parse_sse_events(response.iter_lines(decode_unicode=True)):
raw_events.append(event)
data = event.get("data")
if not isinstance(data, dict):
continue
request_id = request_id or data.get("id")
response_type = data.get("response_type")
if response_type == "references":
for reference in data.get("knowledge_references") or []:
normalized = normalize_reference(reference)
reference_id = str(normalized.get("id") or "")
if reference_id and reference_id in seen_reference_ids:
continue
if reference_id:
seen_reference_ids.add(reference_id)
references.append(normalized)
elif response_type in {"answer", "final_answer"} and data.get("content"):
answer_parts.append(data.get("content") or "")
elif response_type == "error":
raise WeKnoraApiError(str(data.get("content") or data))
retrieved_contexts = [ref["content"] for ref in references if ref.get("content")]
return {
"request_id": request_id,
"response": "".join(answer_parts).strip(),
"retrieved_contexts": retrieved_contexts,
"weknora_references": references,
"raw_events": raw_events,
}
def _paginate(self, path: str, *, page_size: int = 100) -> list[dict[str, Any]]:
page = 1
rows: list[dict[str, Any]] = []
......