Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
沈秋雨
/
weknora_ragas
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
Commit
8096ca31
...
8096ca31d6a62c93cf5c93066f13039217de0a8b
authored
2026-04-21 16:58:02 +0800
by
沈秋雨
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
Send MinerU device as query parameter
1 parent
b9e14ffa
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
1 deletions
configs/eval.yaml
src/weknora_eval/parsers/mineru.py
configs/eval.yaml
View file @
8096ca3
...
...
@@ -31,6 +31,8 @@ parsing:
http_parse_path
:
"
/file_parse"
http_form_fields
:
device
:
"
${MINERU_DEVICE:-cpu}"
http_query_params
:
device
:
"
${MINERU_DEVICE:-cpu}"
api_key
:
"
mineru"
timeout_seconds
:
600
fallback_to_local
:
false
...
...
src/weknora_eval/parsers/mineru.py
View file @
8096ca3
...
...
@@ -136,10 +136,16 @@ def parse_pdf_with_http(
for
key
,
value
in
(
mineru_config
.
get
(
"http_form_fields"
)
or
{})
.
items
()
if
value
not
in
{
None
,
""
}
}
query_params
=
{
str
(
key
):
str
(
value
)
for
key
,
value
in
(
mineru_config
.
get
(
"http_query_params"
)
or
form_fields
)
.
items
()
if
value
not
in
{
None
,
""
}
}
with
target
.
open
(
"rb"
)
as
file
:
response
=
requests
.
post
(
f
"{base_url}{endpoint}"
,
params
=
query_params
,
files
=
[(
"files"
,
(
target
.
name
,
file
,
"application/pdf"
))],
data
=
form_fields
,
headers
=
headers
,
...
...
@@ -147,7 +153,10 @@ def parse_pdf_with_http(
)
if
response
.
status_code
>=
400
:
error_detail
=
_mineru_error_detail
(
response
)
raise
MinerUParseError
(
f
"MinerU HTTP failed with {response.status_code}: {error_detail}"
)
raise
MinerUParseError
(
f
"MinerU HTTP failed with {response.status_code}: {error_detail}; "
f
"sent_form={form_fields}; sent_query={query_params}"
)
payload
=
response
.
json
()
contents
=
extract_mineru_contents
(
payload
)
...
...
@@ -172,6 +181,7 @@ def parse_pdf_with_http(
"parser"
:
"mineru:http"
,
"mineru_endpoint"
:
endpoint
,
"mineru_form_fields"
:
form_fields
,
"mineru_query_params"
:
query_params
,
},
)
)
...
...
Please
register
or
sign in
to post a comment