01_upload_docs.py 1.69 KB
from __future__ import annotations

import sys

import _bootstrap  # noqa: F401

from weknora_eval.api import client_from_config
from weknora_eval.config import load_config
from weknora_eval.loaders import append_jsonl, setup_logging, write_jsonl
from weknora_eval.raw_docs import iter_raw_doc_files


def main() -> int:
    setup_logging()
    config = load_config()
    client = client_from_config(config)
    files = iter_raw_doc_files()
    rows = []
    failures = []
    for path in files:
        print(f"Uploading {path}...")
        try:
            data = client.upload_file(path)
        except Exception as exc:  # noqa: BLE001
            failure = {
                "file_path": str(path),
                "file_name": path.name,
                "file_type": path.suffix.lstrip("."),
                "error": str(exc),
            }
            failures.append(failure)
            append_jsonl("data/exported/failed_uploads.jsonl", failure)
            print(f"Upload failed for {path}: {exc}")
            continue
        rows.append(
            {
                "knowledge_id": data.get("id"),
                "file_name": data.get("file_name") or data.get("title") or path.name,
                "file_type": data.get("file_type") or path.suffix.lstrip("."),
                "parse_status": data.get("parse_status"),
                "enable_status": data.get("enable_status"),
                "raw": data,
            }
        )
        print(f"Uploaded {path}: knowledge_id={data.get('id')}")
    write_jsonl("data/exported/knowledge_uploads.jsonl", rows)
    print(f"Uploaded {len(rows)} files, failed {len(failures)} files")
    return 1 if failures else 0


if __name__ == "__main__":
    sys.exit(main())