08_build_ragas_input.py
1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from __future__ import annotations
import sys
import _bootstrap # noqa: F401
from weknora_eval.loaders import append_jsonl, read_jsonl, setup_logging, write_jsonl
def main() -> int:
setup_logging()
testset = {
row["sample_id"]: row
for row in read_jsonl("data/testsets/testset.reviewed.jsonl")
if row.get("review_status") == "approved"
}
answers = {row["sample_id"]: row for row in read_jsonl("data/runs/weknora_answers.jsonl")}
ragas_rows = []
for sample_id, qa in testset.items():
answer = answers.get(sample_id)
if not answer:
append_jsonl("data/runs/failed_requests.jsonl", {"sample_id": sample_id, "error": "missing_answer"})
continue
row = {
"sample_id": sample_id,
"user_input": qa["user_input"],
"response": answer.get("response") or "",
"retrieved_contexts": answer.get("retrieved_contexts") or [],
"reference": qa["reference"],
"reference_contexts": qa.get("reference_contexts") or [],
"session_id": answer.get("session_id"),
"request_id": answer.get("request_id"),
"weknora_references": answer.get("weknora_references") or [],
"source_file": qa.get("source_file"),
"gold_chunk_ids": qa.get("gold_chunk_ids") or [],
}
missing = [
key
for key in ("user_input", "response", "retrieved_contexts", "reference", "reference_contexts")
if not row.get(key)
]
if missing:
append_jsonl(
"data/runs/failed_requests.jsonl",
{"sample_id": sample_id, "error": f"missing_ragas_fields:{','.join(missing)}"},
)
continue
ragas_rows.append(row)
write_jsonl("data/runs/ragas_input.jsonl", ragas_rows)
print(f"Built {len(ragas_rows)} Ragas input rows")
return 0 if ragas_rows else 1
if __name__ == "__main__":
sys.exit(main())