Improve WeKnora upload error diagnostics
Showing
2 changed files
with
24 additions
and
6 deletions
| ... | @@ -6,7 +6,7 @@ import _bootstrap # noqa: F401 | ... | @@ -6,7 +6,7 @@ import _bootstrap # noqa: F401 |
| 6 | 6 | ||
| 7 | from weknora_eval.api import client_from_config | 7 | from weknora_eval.api import client_from_config |
| 8 | from weknora_eval.config import load_config | 8 | from weknora_eval.config import load_config |
| 9 | from weknora_eval.loaders import setup_logging, write_jsonl | 9 | from weknora_eval.loaders import append_jsonl, setup_logging, write_jsonl |
| 10 | from weknora_eval.raw_docs import iter_raw_doc_files | 10 | from weknora_eval.raw_docs import iter_raw_doc_files |
| 11 | 11 | ||
| 12 | 12 | ||
| ... | @@ -16,8 +16,22 @@ def main() -> int: | ... | @@ -16,8 +16,22 @@ def main() -> int: |
| 16 | client = client_from_config(config) | 16 | client = client_from_config(config) |
| 17 | files = iter_raw_doc_files() | 17 | files = iter_raw_doc_files() |
| 18 | rows = [] | 18 | rows = [] |
| 19 | failures = [] | ||
| 19 | for path in files: | 20 | for path in files: |
| 20 | data = client.upload_file(path) | 21 | print(f"Uploading {path}...") |
| 22 | try: | ||
| 23 | data = client.upload_file(path) | ||
| 24 | except Exception as exc: # noqa: BLE001 | ||
| 25 | failure = { | ||
| 26 | "file_path": str(path), | ||
| 27 | "file_name": path.name, | ||
| 28 | "file_type": path.suffix.lstrip("."), | ||
| 29 | "error": str(exc), | ||
| 30 | } | ||
| 31 | failures.append(failure) | ||
| 32 | append_jsonl("data/exported/failed_uploads.jsonl", failure) | ||
| 33 | print(f"Upload failed for {path}: {exc}") | ||
| 34 | continue | ||
| 21 | rows.append( | 35 | rows.append( |
| 22 | { | 36 | { |
| 23 | "knowledge_id": data.get("id"), | 37 | "knowledge_id": data.get("id"), |
| ... | @@ -28,9 +42,10 @@ def main() -> int: | ... | @@ -28,9 +42,10 @@ def main() -> int: |
| 28 | "raw": data, | 42 | "raw": data, |
| 29 | } | 43 | } |
| 30 | ) | 44 | ) |
| 45 | print(f"Uploaded {path}: knowledge_id={data.get('id')}") | ||
| 31 | write_jsonl("data/exported/knowledge_uploads.jsonl", rows) | 46 | write_jsonl("data/exported/knowledge_uploads.jsonl", rows) |
| 32 | print(f"Uploaded {len(rows)} files") | 47 | print(f"Uploaded {len(rows)} files, failed {len(failures)} files") |
| 33 | return 0 | 48 | return 1 if failures else 0 |
| 34 | 49 | ||
| 35 | 50 | ||
| 36 | if __name__ == "__main__": | 51 | if __name__ == "__main__": | ... | ... |
| ... | @@ -244,10 +244,13 @@ class WeKnoraClient: | ... | @@ -244,10 +244,13 @@ class WeKnoraClient: |
| 244 | continue | 244 | continue |
| 245 | if response.status_code >= 400: | 245 | if response.status_code >= 400: |
| 246 | self._log_error(method, url, response) | 246 | self._log_error(method, url, response) |
| 247 | raise WeKnoraApiError(f"{method} {url} failed with HTTP {response.status_code}") | 247 | body = response.text[:1000] |
| 248 | raise WeKnoraApiError( | ||
| 249 | f"{method} {url} failed with HTTP {response.status_code}: {body}" | ||
| 250 | ) | ||
| 248 | time.sleep(self.request_interval_seconds) | 251 | time.sleep(self.request_interval_seconds) |
| 249 | return response.json() | 252 | return response.json() |
| 250 | except (requests.RequestException, ValueError, WeKnoraApiError) as exc: | 253 | except (requests.RequestException, ValueError) as exc: |
| 251 | last_error = exc | 254 | last_error = exc |
| 252 | if attempt >= self.max_retries: | 255 | if attempt >= self.max_retries: |
| 253 | break | 256 | break | ... | ... |
-
Please register or sign in to post a comment