04_parse_docs.py 786 Bytes
from __future__ import annotations

import sys

import _bootstrap  # noqa: F401

from weknora_eval.config import load_config
from weknora_eval.loaders import setup_logging
from weknora_eval.parsers.local import parse_raw_docs
from weknora_eval.parsers.mineru import parse_with_mineru


def main() -> int:
    setup_logging()
    config = load_config()
    provider = config.get("parsing", {}).get("provider", "local")
    if provider == "local":
        rows, summary = parse_raw_docs(config)
    elif provider == "mineru":
        rows, summary = parse_with_mineru(config)
    else:
        raise ValueError(f"Unsupported parsing provider: {provider}")
    print(f"Parsed {len(rows)} documents: {summary}")
    return 0 if rows else 1


if __name__ == "__main__":
    sys.exit(main())