Commit 8096ca31 8096ca31d6a62c93cf5c93066f13039217de0a8b by 沈秋雨

Send MinerU device as query parameter

1 parent b9e14ffa
......@@ -31,6 +31,8 @@ parsing:
http_parse_path: "/file_parse"
http_form_fields:
device: "${MINERU_DEVICE:-cpu}"
http_query_params:
device: "${MINERU_DEVICE:-cpu}"
api_key: "mineru"
timeout_seconds: 600
fallback_to_local: false
......
......@@ -136,10 +136,16 @@ def parse_pdf_with_http(
for key, value in (mineru_config.get("http_form_fields") or {}).items()
if value not in {None, ""}
}
query_params = {
str(key): str(value)
for key, value in (mineru_config.get("http_query_params") or form_fields).items()
if value not in {None, ""}
}
with target.open("rb") as file:
response = requests.post(
f"{base_url}{endpoint}",
params=query_params,
files=[("files", (target.name, file, "application/pdf"))],
data=form_fields,
headers=headers,
......@@ -147,7 +153,10 @@ def parse_pdf_with_http(
)
if response.status_code >= 400:
error_detail = _mineru_error_detail(response)
raise MinerUParseError(f"MinerU HTTP failed with {response.status_code}: {error_detail}")
raise MinerUParseError(
f"MinerU HTTP failed with {response.status_code}: {error_detail}; "
f"sent_form={form_fields}; sent_query={query_params}"
)
payload = response.json()
contents = extract_mineru_contents(payload)
......@@ -172,6 +181,7 @@ def parse_pdf_with_http(
"parser": "mineru:http",
"mineru_endpoint": endpoint,
"mineru_form_fields": form_fields,
"mineru_query_params": query_params,
},
)
)
......