Commit 8096ca31 8096ca31d6a62c93cf5c93066f13039217de0a8b by 沈秋雨

Send MinerU device as query parameter

1 parent b9e14ffa
...@@ -31,6 +31,8 @@ parsing: ...@@ -31,6 +31,8 @@ parsing:
31 http_parse_path: "/file_parse" 31 http_parse_path: "/file_parse"
32 http_form_fields: 32 http_form_fields:
33 device: "${MINERU_DEVICE:-cpu}" 33 device: "${MINERU_DEVICE:-cpu}"
34 http_query_params:
35 device: "${MINERU_DEVICE:-cpu}"
34 api_key: "mineru" 36 api_key: "mineru"
35 timeout_seconds: 600 37 timeout_seconds: 600
36 fallback_to_local: false 38 fallback_to_local: false
......
...@@ -136,10 +136,16 @@ def parse_pdf_with_http( ...@@ -136,10 +136,16 @@ def parse_pdf_with_http(
136 for key, value in (mineru_config.get("http_form_fields") or {}).items() 136 for key, value in (mineru_config.get("http_form_fields") or {}).items()
137 if value not in {None, ""} 137 if value not in {None, ""}
138 } 138 }
139 query_params = {
140 str(key): str(value)
141 for key, value in (mineru_config.get("http_query_params") or form_fields).items()
142 if value not in {None, ""}
143 }
139 144
140 with target.open("rb") as file: 145 with target.open("rb") as file:
141 response = requests.post( 146 response = requests.post(
142 f"{base_url}{endpoint}", 147 f"{base_url}{endpoint}",
148 params=query_params,
143 files=[("files", (target.name, file, "application/pdf"))], 149 files=[("files", (target.name, file, "application/pdf"))],
144 data=form_fields, 150 data=form_fields,
145 headers=headers, 151 headers=headers,
...@@ -147,7 +153,10 @@ def parse_pdf_with_http( ...@@ -147,7 +153,10 @@ def parse_pdf_with_http(
147 ) 153 )
148 if response.status_code >= 400: 154 if response.status_code >= 400:
149 error_detail = _mineru_error_detail(response) 155 error_detail = _mineru_error_detail(response)
150 raise MinerUParseError(f"MinerU HTTP failed with {response.status_code}: {error_detail}") 156 raise MinerUParseError(
157 f"MinerU HTTP failed with {response.status_code}: {error_detail}; "
158 f"sent_form={form_fields}; sent_query={query_params}"
159 )
151 160
152 payload = response.json() 161 payload = response.json()
153 contents = extract_mineru_contents(payload) 162 contents = extract_mineru_contents(payload)
...@@ -172,6 +181,7 @@ def parse_pdf_with_http( ...@@ -172,6 +181,7 @@ def parse_pdf_with_http(
172 "parser": "mineru:http", 181 "parser": "mineru:http",
173 "mineru_endpoint": endpoint, 182 "mineru_endpoint": endpoint,
174 "mineru_form_fields": form_fields, 183 "mineru_form_fields": form_fields,
184 "mineru_query_params": query_params,
175 }, 185 },
176 ) 186 )
177 ) 187 )
......