Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
沈秋雨
/
weknora_ragas
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
Commit
66bac6d5
...
66bac6d5362a372f5bcc3627eb1a33c147795bbc
authored
2026-04-21 16:09:25 +0800
by
沈秋雨
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
Improve ingestion status diagnostics
1 parent
56b1b00a
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
43 additions
and
3 deletions
scripts/02_wait_ingestion.py
src/weknora_eval/api.py
scripts/02_wait_ingestion.py
View file @
66bac6d
from
__future__
import
annotations
from
collections
import
Counter
import
sys
import
_bootstrap
# noqa: F401
...
...
@@ -18,12 +19,37 @@ def main() -> int:
result
=
client
.
wait_ingestion_completed
(
knowledge_ids
=
knowledge_ids
)
knowledge
=
client
.
list_knowledge
()
write_jsonl
(
"data/exported/knowledge.jsonl"
,
knowledge
)
target_knowledge
=
[
row
for
row
in
knowledge
if
not
knowledge_ids
or
row
.
get
(
"id"
)
in
knowledge_ids
]
print
(
"Ingestion status: "
f
"completed={len(result['completed'])} failed={len(result['failed'])} "
f
"pending={len(result['pending'])}"
)
print
(
"Status distribution: "
f
"parse_status={dict(Counter(str(row.get('parse_status')) for row in target_knowledge))} "
f
"enable_status={dict(Counter(str(row.get('enable_status')) for row in target_knowledge))}"
)
if
result
[
"pending"
]:
print
(
"Pending samples:"
)
for
row
in
result
[
"pending"
][:
5
]:
print
(
"- "
f
"id={row.get('id')} title={row.get('title') or row.get('file_name')} "
f
"parse_status={row.get('parse_status')} enable_status={row.get('enable_status')} "
f
"error={row.get('error_message') or ''}"
)
if
result
[
"failed"
]:
print
(
"Failed samples:"
)
for
row
in
result
[
"failed"
][:
10
]:
print
(
"- "
f
"id={row.get('id')} title={row.get('title') or row.get('file_name')} "
f
"error={row.get('error_message') or ''}"
)
return
1
if
result
[
"failed"
]
or
result
[
"pending"
]
else
0
...
...
src/weknora_eval/api.py
View file @
66bac6d
...
...
@@ -2,6 +2,7 @@ from __future__ import annotations
import
logging
import
time
from
collections
import
Counter
from
pathlib
import
Path
from
typing
import
Any
from
urllib.parse
import
urljoin
...
...
@@ -90,7 +91,7 @@ class WeKnoraClient:
completed
=
[
row
for
row
in
rows
if
row
.
get
(
"parse_status"
)
==
"completed"
and
row
.
get
(
"enable_status"
)
==
"enabled"
if
self
.
_is_ingestion_completed
(
row
)
]
failed
=
[
row
for
row
in
rows
if
row
.
get
(
"parse_status"
)
==
"failed"
]
...
...
@@ -100,7 +101,13 @@ class WeKnoraClient:
return
{
"completed"
:
completed
,
"failed"
:
[],
"pending"
:
[]}
pending
=
[
row
for
row
in
rows
if
row
not
in
completed
]
logger
.
info
(
"Waiting for ingestion: completed=
%
s pending=
%
s"
,
len
(
completed
),
len
(
pending
))
logger
.
info
(
"Waiting for ingestion: completed=
%
s pending=
%
s parse_status=
%
s enable_status=
%
s"
,
len
(
completed
),
len
(
pending
),
dict
(
Counter
(
str
(
row
.
get
(
"parse_status"
))
for
row
in
rows
)),
dict
(
Counter
(
str
(
row
.
get
(
"enable_status"
))
for
row
in
rows
)),
)
time
.
sleep
(
poll_interval_seconds
)
rows
=
self
.
list_knowledge
()
...
...
@@ -109,7 +116,7 @@ class WeKnoraClient:
completed
=
[
row
for
row
in
rows
if
row
.
get
(
"parse_status"
)
==
"completed"
and
row
.
get
(
"enable_status"
)
==
"enabled"
if
self
.
_is_ingestion_completed
(
row
)
]
failed
=
[
row
for
row
in
rows
if
row
.
get
(
"parse_status"
)
==
"failed"
]
pending
=
[
row
for
row
in
rows
if
row
not
in
completed
and
row
not
in
failed
]
...
...
@@ -118,6 +125,13 @@ class WeKnoraClient:
def
list_chunks
(
self
,
knowledge_id
:
str
,
*
,
page_size
:
int
=
100
)
->
list
[
dict
[
str
,
Any
]]:
return
self
.
_paginate
(
f
"chunks/{knowledge_id}"
,
page_size
=
page_size
)
def
_is_ingestion_completed
(
self
,
row
:
dict
[
str
,
Any
])
->
bool
:
parse_status
=
row
.
get
(
"parse_status"
)
enable_status
=
row
.
get
(
"enable_status"
)
parsed
=
parse_status
in
{
"completed"
,
"success"
,
"done"
}
or
parse_status
in
{
2
,
"2"
}
enabled
=
enable_status
in
{
"enabled"
,
"success"
,
"done"
}
or
enable_status
in
{
1
,
2
,
"1"
,
"2"
}
return
parsed
and
enabled
def
knowledge_chat_sse
(
self
,
*
,
...
...
Please
register
or
sign in
to post a comment