Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
沈秋雨
/
weknora_ragas
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
Commit
f0c4e2ec
...
f0c4e2ec8072c4eeba5b2bb327e19a32b9867980
authored
2026-04-22 13:16:58 +0800
by
沈秋雨
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
设置ragas思考模式开关
1 parent
6c7e5043
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
49 additions
and
0 deletions
.env.example
README.md
TESTING_GUIDE.md
configs/eval.yaml
scripts/00_check_models.py
scripts/00_diagnose_ragas_llm.py
src/weknora_eval/llm_options.py
src/weknora_eval/ragas_runner.py
src/weknora_eval/testset.py
.env.example
View file @
f0c4e2e
...
...
@@ -20,6 +20,7 @@ RAGAS_RERANKER_MODEL=replace-me
RAGAS_GENERATOR_MODEL=gpt-4o-mini
RAGAS_JUDGE_MODEL=gpt-4o-mini
RAGAS_EMBEDDING_MODEL=text-embedding-3-small
RAGAS_ENABLE_THINKING=false
TESTSET_SIZE=50
TESTSET_RAGAS_MODE=direct
...
...
README.md
View file @
f0c4e2e
...
...
@@ -39,6 +39,7 @@ cp .env.example .env
-
`RAGAS_LLM_BASE_URL`
指向 vLLM 的 OpenAI-compatible
`/v1`
-
`RAGAS_EMBEDDING_BASE_URL`
指向 Infinity embedding 的 OpenAI-compatible
`/v1`
-
`RAGAS_*_MODEL`
是评估侧模型名称
-
`RAGAS_ENABLE_THINKING=false`
只会在本评估项目的 RAGAS LLM 请求中发送
`chat_template_kwargs.enable_thinking=false`
,不会改变 WeKnora 检索/问答服务的模型配置
## 首轮 Pilot
...
...
TESTING_GUIDE.md
View file @
f0c4e2e
...
...
@@ -296,10 +296,13 @@ max_tokens: 4096
TESTSET_RAGAS_MODE
=
direct
TESTSET_GENERATOR_MAX_TOKENS
=
4096
TESTSET_MAX_DOCUMENT_CHARS
=
2000
RAGAS_ENABLE_THINKING
=
false
```
`direct`
模式会跳过 Ragas 默认的
`HeadlinesExtractor`
、
`SummaryExtractor`
、
`NERExtractor`
文档预处理链路,直接把 WeKnora chunks 组装成 Ragas KnowledgeGraph 并生成单跳 QA。
`prechunked`
和
`langchain_docs`
仅用于对比实验,遇到本地 vLLM 结构化输出不稳定时不建议使用。
如果使用 Qwen thinking 模型,
`RAGAS_ENABLE_THINKING=false`
会只在 RAGAS 请求里附加
`chat_template_kwargs.enable_thinking=false`
,避免 RAGAS 的 JSON/Pydantic 结构化输出被
`Thinking Process`
前缀破坏;WeKnora 本身的检索问答链路不经过这些脚本,不会受影响。
如果 vLLM 仍然报生成未完成,先把
`TESTSET_SIZE`
降到 3,再把
`TESTSET_MAX_DOCUMENT_CHARS`
调到 1000-1500 验证链路;
`ragas.max_tokens`
主要用于后续评测阶段,不应该拿来无限放大测试集生成阶段的输出长度。
### WeKnora 问答没有 retrieved_contexts
...
...
configs/eval.yaml
View file @
f0c4e2e
...
...
@@ -72,6 +72,7 @@ ragas:
generator_model
:
"
${RAGAS_GENERATOR_MODEL}"
judge_model
:
"
${RAGAS_JUDGE_MODEL}"
embedding_model
:
"
${RAGAS_EMBEDDING_MODEL}"
enable_thinking
:
"
${RAGAS_ENABLE_THINKING:-false}"
temperature
:
0
max_tokens
:
4096
timeout_seconds
:
600
...
...
scripts/00_check_models.py
View file @
f0c4e2e
...
...
@@ -10,6 +10,7 @@ import requests
from
langchain_openai
import
ChatOpenAI
,
OpenAIEmbeddings
from
weknora_eval.config
import
load_config
from
weknora_eval.llm_options
import
chat_openai_kwargs
def
main
()
->
int
:
...
...
@@ -27,6 +28,7 @@ def main() -> int:
model
=
require_value
(
ragas
,
"generator_model"
),
temperature
=
float
(
ragas
.
get
(
"temperature"
,
0
)),
max_tokens
=
min
(
int
(
ragas
.
get
(
"max_tokens"
,
1024
)),
1024
),
extra_kwargs
=
chat_openai_kwargs
(
ragas
),
)
)
failures
.
extend
(
...
...
@@ -37,6 +39,7 @@ def main() -> int:
model
=
require_value
(
ragas
,
"judge_model"
),
temperature
=
float
(
ragas
.
get
(
"temperature"
,
0
)),
max_tokens
=
min
(
int
(
ragas
.
get
(
"max_tokens"
,
1024
)),
1024
),
extra_kwargs
=
chat_openai_kwargs
(
ragas
),
)
)
failures
.
extend
(
...
...
@@ -78,6 +81,7 @@ def check_chat_model(
model
:
str
,
temperature
:
float
,
max_tokens
:
int
,
extra_kwargs
:
dict
[
str
,
Any
],
)
->
list
[
str
]:
print
(
f
"[CHECK] {title}: model={model} base_url={base_url}"
)
started
=
time
.
monotonic
()
...
...
@@ -89,6 +93,7 @@ def check_chat_model(
temperature
=
temperature
,
max_tokens
=
max_tokens
,
timeout
=
120
,
**
extra_kwargs
,
)
response
=
llm
.
invoke
(
"Reply with exactly: OK"
)
content
=
str
(
response
.
content
or
""
)
.
strip
()
...
...
scripts/00_diagnose_ragas_llm.py
View file @
f0c4e2e
...
...
@@ -21,6 +21,7 @@ from ragas.testset.synthesizers.single_hop.prompts import (
)
from
weknora_eval.config
import
load_config
from
weknora_eval.llm_options
import
chat_extra_body
,
chat_openai_kwargs
class
SimpleQA
(
BaseModel
):
...
...
@@ -38,6 +39,7 @@ def main() -> int:
max_tokens
=
int
(
testset
.
get
(
"generator_max_tokens"
,
ragas
.
get
(
"max_tokens"
,
4096
)))
temperature
=
float
(
ragas
.
get
(
"temperature"
,
0
))
timeout
=
int
(
ragas
.
get
(
"timeout_seconds"
,
600
))
extra_body
=
chat_extra_body
(
ragas
)
print
(
"Diagnosing Ragas generator LLM compatibility
\n
"
)
print
(
f
"model={model}"
)
...
...
@@ -54,6 +56,7 @@ def main() -> int:
max_tokens
=
min
(
max_tokens
,
256
),
temperature
=
temperature
,
timeout
=
timeout
,
extra_body
=
extra_body
,
)
json_prompt
=
(
...
...
@@ -70,6 +73,7 @@ def main() -> int:
max_tokens
=
max_tokens
,
temperature
=
temperature
,
timeout
=
timeout
,
extra_body
=
extra_body
,
)
validate_json_payload
(
structured
.
get
(
"content"
)
or
""
)
...
...
@@ -81,6 +85,7 @@ def main() -> int:
max_tokens
=
max_tokens
,
temperature
=
temperature
,
timeout
=
timeout
,
extra_kwargs
=
chat_openai_kwargs
(
ragas
),
)
run_ragas_prompt_probe
(
...
...
@@ -90,6 +95,7 @@ def main() -> int:
max_tokens
=
max_tokens
,
temperature
=
temperature
,
timeout
=
timeout
,
extra_kwargs
=
chat_openai_kwargs
(
ragas
),
)
explain_result
(
plain
,
structured
,
langchain_result
)
...
...
@@ -106,6 +112,7 @@ def run_raw_chat(
max_tokens
:
int
,
temperature
:
float
,
timeout
:
int
,
extra_body
:
dict
[
str
,
Any
],
)
->
dict
[
str
,
Any
]:
print
(
f
"[RAW] {title}"
)
started
=
time
.
monotonic
()
...
...
@@ -120,6 +127,7 @@ def run_raw_chat(
"messages"
:
messages
,
"temperature"
:
temperature
,
"max_tokens"
:
max_tokens
,
**
extra_body
,
},
timeout
=
timeout
,
)
...
...
@@ -169,6 +177,7 @@ def run_langchain_probe(
max_tokens
:
int
,
temperature
:
float
,
timeout
:
int
,
extra_kwargs
:
dict
[
str
,
Any
],
)
->
dict
[
str
,
Any
]:
print
(
"[LANGCHAIN] generation metadata"
)
llm
=
ChatOpenAI
(
...
...
@@ -178,6 +187,7 @@ def run_langchain_probe(
temperature
=
temperature
,
max_tokens
=
max_tokens
,
timeout
=
timeout
,
**
extra_kwargs
,
)
prompt_value
=
StringPromptValue
(
text
=
prompt
)
result
=
llm
.
generate_prompt
([
prompt_value
])
...
...
@@ -206,6 +216,7 @@ def run_ragas_prompt_probe(
max_tokens
:
int
,
temperature
:
float
,
timeout
:
int
,
extra_kwargs
:
dict
[
str
,
Any
],
)
->
None
:
print
(
"[RAGAS] QueryAnswerGenerationPrompt"
)
llm
=
ChatOpenAI
(
...
...
@@ -215,6 +226,7 @@ def run_ragas_prompt_probe(
temperature
=
temperature
,
max_tokens
=
max_tokens
,
timeout
=
timeout
,
**
extra_kwargs
,
)
ragas_llm
=
LangchainLLMWrapper
(
llm
)
ragas_llm
.
set_run_config
(
RunConfig
(
timeout
=
timeout
,
max_workers
=
1
))
...
...
src/weknora_eval/llm_options.py
0 → 100644
View file @
f0c4e2e
from
__future__
import
annotations
from
typing
import
Any
def
chat_extra_body
(
config
:
dict
[
str
,
Any
])
->
dict
[
str
,
Any
]:
if
not
_as_bool
(
config
.
get
(
"enable_thinking"
,
False
)):
return
{
"chat_template_kwargs"
:
{
"enable_thinking"
:
False
}}
return
{}
def
chat_openai_kwargs
(
config
:
dict
[
str
,
Any
])
->
dict
[
str
,
Any
]:
extra_body
=
chat_extra_body
(
config
)
return
{
"extra_body"
:
extra_body
}
if
extra_body
else
{}
def
_as_bool
(
value
:
Any
)
->
bool
:
if
isinstance
(
value
,
bool
):
return
value
if
isinstance
(
value
,
str
):
return
value
.
strip
()
.
lower
()
in
{
"1"
,
"true"
,
"yes"
,
"on"
}
return
bool
(
value
)
src/weknora_eval/ragas_runner.py
View file @
f0c4e2e
...
...
@@ -8,6 +8,7 @@ import pandas as pd
from
weknora_eval.config
import
require_config
from
weknora_eval.loaders
import
read_jsonl
from
weknora_eval.llm_options
import
chat_openai_kwargs
def
run_ragas_eval
(
...
...
@@ -64,6 +65,7 @@ def run_ragas_eval(
base_url
=
llm_base_url
or
None
,
temperature
=
temperature
,
max_tokens
=
max_tokens
,
**
chat_openai_kwargs
(
ragas_config
),
)
embeddings
=
OpenAIEmbeddings
(
model
=
embedding_model
,
...
...
src/weknora_eval/testset.py
View file @
f0c4e2e
...
...
@@ -20,6 +20,7 @@ from ragas.testset.synthesizers.single_hop.base import (
from
weknora_eval.config
import
require_config
from
weknora_eval.loaders
import
read_jsonl
,
write_jsonl
from
weknora_eval.llm_options
import
chat_openai_kwargs
from
weknora_eval.ragas_runner
import
_wrap_langchain_models
from
weknora_eval.schemas
import
TestsetRecord
...
...
@@ -95,6 +96,7 @@ def generate_ragas_testset(
temperature
=
float
(
ragas_config
.
get
(
"temperature"
,
0
)),
max_tokens
=
generator_max_tokens
,
timeout
=
int
(
ragas_config
.
get
(
"timeout_seconds"
,
600
)),
**
chat_openai_kwargs
(
ragas_config
),
)
run_config
=
RunConfig
(
timeout
=
int
(
ragas_config
.
get
(
"timeout_seconds"
,
600
)),
...
...
Please
register
or
sign in
to post a comment