eval.yaml
2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
weknora:
base_url: "${WEKNORA_BASE_URL}"
api_key: "${WEKNORA_API_KEY}"
knowledge_base_id: "${WEKNORA_KB_ID}"
knowledge_base_name: "${WEKNORA_KB_NAME:-ragas-eval-pilot}"
knowledge_base_description: "Knowledge base for independent Ragas evaluation."
timeout_seconds: 300
request_interval_seconds: "${REQUEST_INTERVAL_SECONDS:-0.2}"
testset:
size: "${TESTSET_SIZE:-50}"
include_pdf: true
include_xlsx: true
min_context_chars: 80
require_manual_review: true
parsing:
provider: "mineru"
output_path: "data/parsed_docs/documents.jsonl"
failed_path: "data/parsed_docs/failed_parse.jsonl"
summary_path: "data/parsed_docs/parse_summary.json"
local:
pdf_backend: "pymupdf"
xlsx_mode: "row_text"
min_chars: 80
mineru:
mode: "http"
cli_bin: "mineru"
output_dir: "data/parsed_docs/mineru_raw"
http_base_url: "http://172.23.184.9:8002"
http_parse_path: "/file_parse"
http_form_fields:
device: "${MINERU_DEVICE:-cpu}"
api_key: "mineru"
timeout_seconds: 600
fallback_to_local: false
qa:
one_session_per_question: true
disable_title: true
enable_memory: false
channel: "api"
verify_with_messages: false
ragas:
provider: "openai-compatible"
# vLLM OpenAI-compatible endpoint, for example http://localhost:8000/v1.
llm_api_key: "${RAGAS_LLM_API_KEY}"
llm_base_url: "${RAGAS_LLM_BASE_URL}"
# Infinity OpenAI-compatible embedding endpoint, for example
# http://localhost:7997/v1.
embedding_api_key: "${RAGAS_EMBEDDING_API_KEY}"
embedding_base_url: "${RAGAS_EMBEDDING_BASE_URL}"
# Reserved for future retrieval/rerank metrics. The current Ragas pipeline
# does not call reranker APIs.
reranker_api_key: "${RAGAS_RERANKER_API_KEY}"
reranker_base_url: "${RAGAS_RERANKER_BASE_URL}"
reranker_model: "${RAGAS_RERANKER_MODEL}"
generator_model: "${RAGAS_GENERATOR_MODEL}"
judge_model: "${RAGAS_JUDGE_MODEL}"
embedding_model: "${RAGAS_EMBEDDING_MODEL}"
temperature: 0
max_tokens: 4096
timeout_seconds: 600
max_workers: 1
metrics:
- faithfulness
- response_relevancy
- context_precision
- context_recall
- factual_correctness