Commit 56b1b00a 56b1b00afbc2f4e98da064db9ef815d808dc0cfc by 沈秋雨

Improve WeKnora upload error diagnostics

1 parent 854ed21c
......@@ -6,7 +6,7 @@ import _bootstrap # noqa: F401
from weknora_eval.api import client_from_config
from weknora_eval.config import load_config
from weknora_eval.loaders import setup_logging, write_jsonl
from weknora_eval.loaders import append_jsonl, setup_logging, write_jsonl
from weknora_eval.raw_docs import iter_raw_doc_files
......@@ -16,8 +16,22 @@ def main() -> int:
client = client_from_config(config)
files = iter_raw_doc_files()
rows = []
failures = []
for path in files:
data = client.upload_file(path)
print(f"Uploading {path}...")
try:
data = client.upload_file(path)
except Exception as exc: # noqa: BLE001
failure = {
"file_path": str(path),
"file_name": path.name,
"file_type": path.suffix.lstrip("."),
"error": str(exc),
}
failures.append(failure)
append_jsonl("data/exported/failed_uploads.jsonl", failure)
print(f"Upload failed for {path}: {exc}")
continue
rows.append(
{
"knowledge_id": data.get("id"),
......@@ -28,9 +42,10 @@ def main() -> int:
"raw": data,
}
)
print(f"Uploaded {path}: knowledge_id={data.get('id')}")
write_jsonl("data/exported/knowledge_uploads.jsonl", rows)
print(f"Uploaded {len(rows)} files")
return 0
print(f"Uploaded {len(rows)} files, failed {len(failures)} files")
return 1 if failures else 0
if __name__ == "__main__":
......
......@@ -244,10 +244,13 @@ class WeKnoraClient:
continue
if response.status_code >= 400:
self._log_error(method, url, response)
raise WeKnoraApiError(f"{method} {url} failed with HTTP {response.status_code}")
body = response.text[:1000]
raise WeKnoraApiError(
f"{method} {url} failed with HTTP {response.status_code}: {body}"
)
time.sleep(self.request_interval_seconds)
return response.json()
except (requests.RequestException, ValueError, WeKnoraApiError) as exc:
except (requests.RequestException, ValueError) as exc:
last_error = exc
if attempt >= self.max_retries:
break
......