01_upload_docs.py
1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from __future__ import annotations
import sys
import _bootstrap # noqa: F401
from weknora_eval.api import client_from_config
from weknora_eval.config import load_config
from weknora_eval.loaders import append_jsonl, setup_logging, write_jsonl
from weknora_eval.raw_docs import iter_raw_doc_files
def main() -> int:
setup_logging()
config = load_config()
client = client_from_config(config)
files = iter_raw_doc_files()
rows = []
failures = []
for path in files:
print(f"Uploading {path}...")
try:
data = client.upload_file(path)
except Exception as exc: # noqa: BLE001
failure = {
"file_path": str(path),
"file_name": path.name,
"file_type": path.suffix.lstrip("."),
"error": str(exc),
}
failures.append(failure)
append_jsonl("data/exported/failed_uploads.jsonl", failure)
print(f"Upload failed for {path}: {exc}")
continue
rows.append(
{
"knowledge_id": data.get("id"),
"file_name": data.get("file_name") or data.get("title") or path.name,
"file_type": data.get("file_type") or path.suffix.lstrip("."),
"parse_status": data.get("parse_status"),
"enable_status": data.get("enable_status"),
"raw": data,
}
)
print(f"Uploaded {path}: knowledge_id={data.get('id')}")
write_jsonl("data/exported/knowledge_uploads.jsonl", rows)
print(f"Uploaded {len(rows)} files, failed {len(failures)} files")
return 1 if failures else 0
if __name__ == "__main__":
sys.exit(main())