eval.yaml 2.11 KB

Raw Blame History Permalink

weknora:
  base_url: "${WEKNORA_BASE_URL}"
  api_key: "${WEKNORA_API_KEY}"
  knowledge_base_id: "${WEKNORA_KB_ID}"
  knowledge_base_name: "${WEKNORA_KB_NAME:-ragas-eval-pilot}"
  knowledge_base_description: "Knowledge base for independent Ragas evaluation."
  timeout_seconds: 300
  request_interval_seconds: "${REQUEST_INTERVAL_SECONDS:-0.2}"

testset:
  size: "${TESTSET_SIZE:-50}"
  include_pdf: true
  include_xlsx: true
  min_context_chars: 80
  require_manual_review: true

parsing:
  provider: "mineru"
  output_path: "data/parsed_docs/documents.jsonl"
  failed_path: "data/parsed_docs/failed_parse.jsonl"
  summary_path: "data/parsed_docs/parse_summary.json"
  local:
    pdf_backend: "pymupdf"
    xlsx_mode: "row_text"
    min_chars: 80
  mineru:
    mode: "http"
    cli_bin: "mineru"
    output_dir: "data/parsed_docs/mineru_raw"
    http_base_url: "http://172.23.184.9:8002"
    http_parse_path: "/file_parse"
    http_form_fields:
      device: "${MINERU_DEVICE:-cpu}"
    api_key: "mineru"
    timeout_seconds: 600
    fallback_to_local: false

qa:
  one_session_per_question: true
  disable_title: true
  enable_memory: false
  channel: "api"
  verify_with_messages: false

ragas:
  provider: "openai-compatible"
  # vLLM OpenAI-compatible endpoint, for example http://localhost:8000/v1.
  llm_api_key: "${RAGAS_LLM_API_KEY}"
  llm_base_url: "${RAGAS_LLM_BASE_URL}"
  # Infinity OpenAI-compatible embedding endpoint, for example
  # http://localhost:7997/v1.
  embedding_api_key: "${RAGAS_EMBEDDING_API_KEY}"
  embedding_base_url: "${RAGAS_EMBEDDING_BASE_URL}"
  # Reserved for future retrieval/rerank metrics. The current Ragas pipeline
  # does not call reranker APIs.
  reranker_api_key: "${RAGAS_RERANKER_API_KEY}"
  reranker_base_url: "${RAGAS_RERANKER_BASE_URL}"
  reranker_model: "${RAGAS_RERANKER_MODEL}"
  generator_model: "${RAGAS_GENERATOR_MODEL}"
  judge_model: "${RAGAS_JUDGE_MODEL}"
  embedding_model: "${RAGAS_EMBEDDING_MODEL}"
  temperature: 0
  max_tokens: 4096
  timeout_seconds: 600
  max_workers: 1
  metrics:
    - faithfulness
    - response_relevancy
    - context_precision
    - context_recall
    - factual_correctness