Commit 56b1b00a 56b1b00afbc2f4e98da064db9ef815d808dc0cfc by 沈秋雨

Improve WeKnora upload error diagnostics

1 parent 854ed21c
...@@ -6,7 +6,7 @@ import _bootstrap # noqa: F401 ...@@ -6,7 +6,7 @@ import _bootstrap # noqa: F401
6 6
7 from weknora_eval.api import client_from_config 7 from weknora_eval.api import client_from_config
8 from weknora_eval.config import load_config 8 from weknora_eval.config import load_config
9 from weknora_eval.loaders import setup_logging, write_jsonl 9 from weknora_eval.loaders import append_jsonl, setup_logging, write_jsonl
10 from weknora_eval.raw_docs import iter_raw_doc_files 10 from weknora_eval.raw_docs import iter_raw_doc_files
11 11
12 12
...@@ -16,8 +16,22 @@ def main() -> int: ...@@ -16,8 +16,22 @@ def main() -> int:
16 client = client_from_config(config) 16 client = client_from_config(config)
17 files = iter_raw_doc_files() 17 files = iter_raw_doc_files()
18 rows = [] 18 rows = []
19 failures = []
19 for path in files: 20 for path in files:
21 print(f"Uploading {path}...")
22 try:
20 data = client.upload_file(path) 23 data = client.upload_file(path)
24 except Exception as exc: # noqa: BLE001
25 failure = {
26 "file_path": str(path),
27 "file_name": path.name,
28 "file_type": path.suffix.lstrip("."),
29 "error": str(exc),
30 }
31 failures.append(failure)
32 append_jsonl("data/exported/failed_uploads.jsonl", failure)
33 print(f"Upload failed for {path}: {exc}")
34 continue
21 rows.append( 35 rows.append(
22 { 36 {
23 "knowledge_id": data.get("id"), 37 "knowledge_id": data.get("id"),
...@@ -28,9 +42,10 @@ def main() -> int: ...@@ -28,9 +42,10 @@ def main() -> int:
28 "raw": data, 42 "raw": data,
29 } 43 }
30 ) 44 )
45 print(f"Uploaded {path}: knowledge_id={data.get('id')}")
31 write_jsonl("data/exported/knowledge_uploads.jsonl", rows) 46 write_jsonl("data/exported/knowledge_uploads.jsonl", rows)
32 print(f"Uploaded {len(rows)} files") 47 print(f"Uploaded {len(rows)} files, failed {len(failures)} files")
33 return 0 48 return 1 if failures else 0
34 49
35 50
36 if __name__ == "__main__": 51 if __name__ == "__main__":
......
...@@ -244,10 +244,13 @@ class WeKnoraClient: ...@@ -244,10 +244,13 @@ class WeKnoraClient:
244 continue 244 continue
245 if response.status_code >= 400: 245 if response.status_code >= 400:
246 self._log_error(method, url, response) 246 self._log_error(method, url, response)
247 raise WeKnoraApiError(f"{method} {url} failed with HTTP {response.status_code}") 247 body = response.text[:1000]
248 raise WeKnoraApiError(
249 f"{method} {url} failed with HTTP {response.status_code}: {body}"
250 )
248 time.sleep(self.request_interval_seconds) 251 time.sleep(self.request_interval_seconds)
249 return response.json() 252 return response.json()
250 except (requests.RequestException, ValueError, WeKnoraApiError) as exc: 253 except (requests.RequestException, ValueError) as exc:
251 last_error = exc 254 last_error = exc
252 if attempt >= self.max_retries: 255 if attempt >= self.max_retries:
253 break 256 break
......