Send MinerU device as query parameter
Showing
2 changed files
with
13 additions
and
1 deletions
| ... | @@ -31,6 +31,8 @@ parsing: | ... | @@ -31,6 +31,8 @@ parsing: |
| 31 | http_parse_path: "/file_parse" | 31 | http_parse_path: "/file_parse" |
| 32 | http_form_fields: | 32 | http_form_fields: |
| 33 | device: "${MINERU_DEVICE:-cpu}" | 33 | device: "${MINERU_DEVICE:-cpu}" |
| 34 | http_query_params: | ||
| 35 | device: "${MINERU_DEVICE:-cpu}" | ||
| 34 | api_key: "mineru" | 36 | api_key: "mineru" |
| 35 | timeout_seconds: 600 | 37 | timeout_seconds: 600 |
| 36 | fallback_to_local: false | 38 | fallback_to_local: false | ... | ... |
| ... | @@ -136,10 +136,16 @@ def parse_pdf_with_http( | ... | @@ -136,10 +136,16 @@ def parse_pdf_with_http( |
| 136 | for key, value in (mineru_config.get("http_form_fields") or {}).items() | 136 | for key, value in (mineru_config.get("http_form_fields") or {}).items() |
| 137 | if value not in {None, ""} | 137 | if value not in {None, ""} |
| 138 | } | 138 | } |
| 139 | query_params = { | ||
| 140 | str(key): str(value) | ||
| 141 | for key, value in (mineru_config.get("http_query_params") or form_fields).items() | ||
| 142 | if value not in {None, ""} | ||
| 143 | } | ||
| 139 | 144 | ||
| 140 | with target.open("rb") as file: | 145 | with target.open("rb") as file: |
| 141 | response = requests.post( | 146 | response = requests.post( |
| 142 | f"{base_url}{endpoint}", | 147 | f"{base_url}{endpoint}", |
| 148 | params=query_params, | ||
| 143 | files=[("files", (target.name, file, "application/pdf"))], | 149 | files=[("files", (target.name, file, "application/pdf"))], |
| 144 | data=form_fields, | 150 | data=form_fields, |
| 145 | headers=headers, | 151 | headers=headers, |
| ... | @@ -147,7 +153,10 @@ def parse_pdf_with_http( | ... | @@ -147,7 +153,10 @@ def parse_pdf_with_http( |
| 147 | ) | 153 | ) |
| 148 | if response.status_code >= 400: | 154 | if response.status_code >= 400: |
| 149 | error_detail = _mineru_error_detail(response) | 155 | error_detail = _mineru_error_detail(response) |
| 150 | raise MinerUParseError(f"MinerU HTTP failed with {response.status_code}: {error_detail}") | 156 | raise MinerUParseError( |
| 157 | f"MinerU HTTP failed with {response.status_code}: {error_detail}; " | ||
| 158 | f"sent_form={form_fields}; sent_query={query_params}" | ||
| 159 | ) | ||
| 151 | 160 | ||
| 152 | payload = response.json() | 161 | payload = response.json() |
| 153 | contents = extract_mineru_contents(payload) | 162 | contents = extract_mineru_contents(payload) |
| ... | @@ -172,6 +181,7 @@ def parse_pdf_with_http( | ... | @@ -172,6 +181,7 @@ def parse_pdf_with_http( |
| 172 | "parser": "mineru:http", | 181 | "parser": "mineru:http", |
| 173 | "mineru_endpoint": endpoint, | 182 | "mineru_endpoint": endpoint, |
| 174 | "mineru_form_fields": form_fields, | 183 | "mineru_form_fields": form_fields, |
| 184 | "mineru_query_params": query_params, | ||
| 175 | }, | 185 | }, |
| 176 | ) | 186 | ) |
| 177 | ) | 187 | ) | ... | ... |
-
Please register or sign in to post a comment