Commit 96c9ce7d 96c9ce7d40ae89231e6f9d1a37d73e2c5bb380ff by cnb.bofCdSsphPA

Validate the PostgreSQL ACR storage path with live evidence

Constraint: The new data model had to be proven against the user-provided PostgreSQL instance and stay aligned with Phase-1 encoder-only decisions
Rejected: Document-only schema guidance without a live database run | It would leave retrieval correctness and table intent unproven
Confidence: high
Scope-risk: narrow
Directive: Keep future retrieval experiments writing through model/feature/reference registries instead of adding fixed per-model columns
Tested: /usr/local/miniconda3/bin/python scripts/live_pgvector_music20_eval.py --dsn 'postgres://d2:d2pass@127.0.0.1:5432/d2' --schema acr_test --reset-schema --output data/pgvector_eval/music20/live_pgvector_report.json; /usr/local/miniconda3/bin/python scripts/evaluate_songid_pgvector_path.py --reference-embeddings-jsonl data/pgvector_eval/music20/reference_embeddings.jsonl --query-embeddings-jsonl data/pgvector_eval/music20/query_embeddings.jsonl --output data/pgvector_eval/music20/songid_eval_report_fresh.json; /usr/local/miniconda3/bin/python -m py_compile scripts/live_pgvector_music20_eval.py scripts/evaluate_songid_pgvector_path.py; git diff --check -- docs/README.md docs/CHANGELOG.md docs/postgres_db_schema_samples.md acr-engine/scripts/live_pgvector_music20_eval.py acr-engine/data/pgvector_eval/music20/live_pgvector_report.json acr-engine/data/pgvector_eval/music20/songid_eval_report_fresh.json
Not-tested: MERT/MuQ live embeddings, type_8/type_16 live JSONL coverage, multi-recording/cover-lane decision flow
1 parent b220751b
{
"schema": "acr_test",
"dsn_redacted": "postgres://d2:***@127.0.0.1:5432/d2",
"input": {
"reference_embeddings_jsonl": "/workspace/acr-engine/data/pgvector_eval/music20/reference_embeddings.jsonl",
"query_embeddings_jsonl": "/workspace/acr-engine/data/pgvector_eval/music20/query_embeddings.jsonl",
"reference_count": 20,
"query_count": 22
},
"registry": {
"model_id": 1,
"feature_set_id": 1,
"reference_set_id": 1,
"retrieval_index_id": 1
},
"table_counts": {
"canonical_song": 20,
"work": 20,
"recording": 20,
"recording_asset": 20,
"audio_window": 20,
"audio_embedding": 20,
"retrieval_candidate": 220,
"match_decision": 22
},
"lineage_negative_test": {
"passed": true,
"error_type": "RaiseException",
"message": "Invalid asset_id=1 or recording_id=1000000 for audio_window"
},
"evaluation": {
"backend": "postgresql+pgvector-live",
"note": "Reference embeddings are stored in schema v2; 24-d logical embeddings are zero-padded to vector(192) for physical storage.",
"overall": {
"count": 22,
"top1": 0.909091,
"top3": 0.954545,
"top10": 0.954545,
"mrr": 0.934343,
"mean_rank": 1.8182,
"median_rank": 1.0
},
"by_query_type": {
"1": {
"count": 20,
"top1": 1.0,
"top3": 1.0,
"top10": 1.0,
"mrr": 1.0,
"mean_rank": 1.0,
"median_rank": 1.0
},
"7": {
"count": 2,
"top1": 0.0,
"top3": 0.5,
"top10": 0.5,
"mrr": 0.277778,
"mean_rank": 10.0,
"median_rank": 10.0
}
},
"confusion_focus": {
"7": {
"query_type": 7,
"metrics": {
"count": 2,
"top1": 0.0,
"top3": 0.5,
"top10": 0.5,
"mrr": 0.277778,
"mean_rank": 10.0,
"median_rank": 10.0
},
"interpretation": "light confusion / transformed query"
},
"8": {
"query_type": 8,
"metrics": {
"count": 0
},
"interpretation": "harder confusion bucket"
},
"16": {
"query_type": 16,
"metrics": {
"count": 0
},
"interpretation": "strong confusion or far-domain bucket"
}
},
"examples": {
"1": [
{
"query_id": "music20-q0000-t1-song100",
"song_id": "100",
"rank": 1,
"top3": [
{
"song_id": "100",
"canonical_song_id": 1,
"evidence_window_id": 1,
"combined_score": 0.9099869376417087,
"max_sim": 0.9999854862685651,
"top3_avg": 0.9999854862685651,
"vote": 1
},
{
"song_id": "116",
"canonical_song_id": 17,
"evidence_window_id": 17,
"combined_score": 0.8674688834706314,
"max_sim": 0.9527432038562573,
"top3_avg": 0.9527432038562573,
"vote": 1
},
{
"song_id": "103",
"canonical_song_id": 4,
"evidence_window_id": 4,
"combined_score": 0.8665370278518509,
"max_sim": 0.9517078087242788,
"top3_avg": 0.9517078087242788,
"vote": 1
}
]
},
{
"query_id": "music20-q0001-t1-song101",
"song_id": "101",
"rank": 1,
"top3": [
{
"song_id": "101",
"canonical_song_id": 2,
"evidence_window_id": 2,
"combined_score": 0.9099997586011674,
"max_sim": 0.999999731779075,
"top3_avg": 0.999999731779075,
"vote": 1
},
{
"song_id": "118",
"canonical_song_id": 19,
"evidence_window_id": 19,
"combined_score": 0.8930541242989376,
"max_sim": 0.9811712492210417,
"top3_avg": 0.9811712492210417,
"vote": 1
},
{
"song_id": "116",
"canonical_song_id": 17,
"evidence_window_id": 17,
"combined_score": 0.892017854392,
"max_sim": 0.9800198382133333,
"top3_avg": 0.9800198382133333,
"vote": 1
}
]
},
{
"query_id": "music20-q0002-t1-song102",
"song_id": "102",
"rank": 1,
"top3": [
{
"song_id": "102",
"canonical_song_id": 3,
"evidence_window_id": 3,
"combined_score": 0.9099973714353238,
"max_sim": 0.9999970793725819,
"top3_avg": 0.9999970793725819,
"vote": 1
},
{
"song_id": "113",
"canonical_song_id": 14,
"evidence_window_id": 14,
"combined_score": 0.878619819365752,
"max_sim": 0.9651331326286134,
"top3_avg": 0.9651331326286134,
"vote": 1
},
{
"song_id": "118",
"canonical_song_id": 19,
"evidence_window_id": 19,
"combined_score": 0.8727551417721799,
"max_sim": 0.9586168241913111,
"top3_avg": 0.9586168241913111,
"vote": 1
}
]
},
{
"query_id": "music20-q0003-t1-song103",
"song_id": "103",
"rank": 1,
"top3": [
{
"song_id": "103",
"canonical_song_id": 4,
"evidence_window_id": 4,
"combined_score": 0.9078967457382905,
"max_sim": 0.9976630508203228,
"top3_avg": 0.9976630508203228,
"vote": 1
},
{
"song_id": "116",
"canonical_song_id": 17,
"evidence_window_id": 17,
"combined_score": 0.8892688048103843,
"max_sim": 0.9769653386782048,
"top3_avg": 0.9769653386782048,
"vote": 1
},
{
"song_id": "109",
"canonical_song_id": 10,
"evidence_window_id": 10,
"combined_score": 0.8786497490793317,
"max_sim": 0.9651663878659241,
"top3_avg": 0.9651663878659241,
"vote": 1
}
]
},
{
"query_id": "music20-q0004-t1-song104",
"song_id": "104",
"rank": 1,
"top3": [
{
"song_id": "104",
"canonical_song_id": 5,
"evidence_window_id": 5,
"combined_score": 0.9099890834089845,
"max_sim": 0.9999878704544272,
"top3_avg": 0.9999878704544272,
"vote": 1
},
{
"song_id": "109",
"canonical_song_id": 10,
"evidence_window_id": 10,
"combined_score": 0.8646899513807881,
"max_sim": 0.9496555015342091,
"top3_avg": 0.9496555015342091,
"vote": 1
},
{
"song_id": "116",
"canonical_song_id": 17,
"evidence_window_id": 17,
"combined_score": 0.8414633946738618,
"max_sim": 0.9238482163042909,
"top3_avg": 0.9238482163042909,
"vote": 1
}
]
}
],
"7": [
{
"query_id": "music20-q0020-t7-song111",
"song_id": "111",
"rank": 18,
"top3": [
{
"song_id": "109",
"canonical_song_id": 10,
"evidence_window_id": 10,
"combined_score": 0.8765411333280498,
"max_sim": 0.9628234814756109,
"top3_avg": 0.9628234814756109,
"vote": 1
},
{
"song_id": "116",
"canonical_song_id": 17,
"evidence_window_id": 17,
"combined_score": 0.8749381679370203,
"max_sim": 0.9610424088189115,
"top3_avg": 0.9610424088189115,
"vote": 1
},
{
"song_id": "118",
"canonical_song_id": 19,
"evidence_window_id": 19,
"combined_score": 0.8641276021561776,
"max_sim": 0.9490306690624195,
"top3_avg": 0.9490306690624195,
"vote": 1
}
]
},
{
"query_id": "music20-q0021-t7-song116",
"song_id": "116",
"rank": 2,
"top3": [
{
"song_id": "109",
"canonical_song_id": 10,
"evidence_window_id": 10,
"combined_score": 0.8701787704282636,
"max_sim": 0.9557541893647373,
"top3_avg": 0.9557541893647373,
"vote": 1
},
{
"song_id": "116",
"canonical_song_id": 17,
"evidence_window_id": 17,
"combined_score": 0.8674951972070233,
"max_sim": 0.9527724413411371,
"top3_avg": 0.9527724413411371,
"vote": 1
},
{
"song_id": "103",
"canonical_song_id": 4,
"evidence_window_id": 4,
"combined_score": 0.8659579133987426,
"max_sim": 0.9510643482208252,
"top3_avg": 0.9510643482208252,
"vote": 1
}
]
}
]
}
}
}
\ No newline at end of file
{
"backend": "faiss-as-pgvector-standin",
"note": "Uses song-level aggregation compatible with a future pgvector online path.",
"overall": {
"count": 22,
"top1": 0.909091,
"top3": 0.954545,
"top10": 0.954545,
"mrr": 0.934343,
"mean_rank": 1.8182,
"median_rank": 1.0
},
"by_query_type": {
"1": {
"count": 20,
"top1": 1.0,
"top3": 1.0,
"top10": 1.0,
"mrr": 1.0,
"mean_rank": 1.0,
"median_rank": 1.0
},
"7": {
"count": 2,
"top1": 0.0,
"top3": 0.5,
"top10": 0.5,
"mrr": 0.277778,
"mean_rank": 10.0,
"median_rank": 10.0
}
},
"examples": {
"1": [
{
"song_id": "100",
"rank": 1,
"top3": [
[
"100",
0.9099869644641876,
0.9999855160713196,
0.9999855160713196,
1
],
[
"116",
0.8674689626693726,
0.9527432918548584,
0.9527432918548584,
1
],
[
"103",
0.8665370559692382,
0.9517078399658203,
0.9517078399658203,
1
]
]
},
{
"song_id": "101",
"rank": 1,
"top3": [
[
"101",
0.9099996781349182,
0.9999996423721313,
0.9999996423721313,
1
],
[
"118",
0.8930539643764497,
0.9811710715293884,
0.9811710715293884,
1
],
[
"116",
0.8920178270339967,
0.9800198078155518,
0.9800198078155518,
1
]
]
},
{
"song_id": "102",
"rank": 1,
"top3": [
[
"102",
0.9099974250793457,
0.9999971389770508,
0.9999971389770508,
1
],
[
"113",
0.878619978427887,
0.9651333093643188,
0.9651333093643188,
1
],
[
"118",
0.8727551674842834,
0.9586168527603149,
0.9586168527603149,
1
]
]
},
{
"song_id": "103",
"rank": 1,
"top3": [
[
"103",
0.9078967189788818,
0.9976630210876465,
0.9976630210876465,
1
],
[
"116",
0.8892688846588135,
0.9769654273986816,
0.9769654273986816,
1
],
[
"109",
0.8786498045921325,
0.965166449546814,
0.965166449546814,
1
]
]
},
{
"song_id": "104",
"rank": 1,
"top3": [
[
"104",
0.9099890029430389,
0.999987781047821,
0.999987781047821,
1
],
[
"109",
0.8646899795532226,
0.9496555328369141,
0.9496555328369141,
1
],
[
"116",
0.8414634442329406,
0.9238482713699341,
0.9238482713699341,
1
]
]
}
],
"7": [
{
"song_id": "111",
"rank": 18,
"top3": [
[
"109",
0.8765411591529846,
0.9628235101699829,
0.9628235101699829,
1
],
[
"116",
0.8749382710456848,
0.9610425233840942,
0.9610425233840942,
1
],
[
"118",
0.8641276276111602,
0.9490306973457336,
0.9490306973457336,
1
]
]
},
{
"song_id": "116",
"rank": 2,
"top3": [
[
"109",
0.8701787447929383,
0.9557541608810425,
0.9557541608810425,
1
],
[
"116",
0.8674952483177185,
0.9527724981307983,
0.9527724981307983,
1
],
[
"103",
0.8659579670429229,
0.95106440782547,
0.95106440782547,
1
]
]
}
]
}
}
\ No newline at end of file
## 2026-06-04
- 新增 [PostgreSQL 落库样例与 live 测试链路](./postgres_db_schema_samples.md),补齐 `acr_pg_schema_v2.sql` 的真实落库样例、`pgvector` live 检索验证、lineage trigger 负例测试,以及当前召回/混淆结果解读。
- 新增 `acr-engine/scripts/live_pgvector_music20_eval.py`,支持对用户提供的 PostgreSQL 执行隔离 schema 建表、样例数据导入、`pgvector` live 检索、`retrieval_candidate` / `match_decision` 落表与评测报告生成。
- 新增 `acr-engine/data/pgvector_eval/music20/live_pgvector_report.json``songid_eval_report_fresh.json`,记录本轮 live PostgreSQL + pgvector 与 FAISS stand-in 的对齐结果:overall `top1=0.9091` / `top3=0.9545`,但 `type_7` 仍明显偏弱。
- 重写 [session-handoff 交接文档](./session-handoff.md),将其从历史流水账收敛为“下次启动即用”的启动手册,明确当前稳定结论、推荐阅读顺序、已验证/未验证边界,以及下一步应从 PostgreSQL v2 schema 与 Phase-1 encoder-only 执行链开始推进。
- 新增 [Phase-1 实施清单](./phase1-implementation-checklist.md),把 encoder-only 路线拆成主数据、reference set、feature set、索引、评测的可执行阶段。
- 新增 [模型与 Feature Set 初始化手册](./model-feature-registry-bootstrap.md),补齐 model_registry / feature_set_registry / reference_set_registry 的初始化约定与示例 SQL。
......
......@@ -54,6 +54,7 @@
| [acr-architecture.md](./acr-architecture.md) | 当前系统蓝图、角色分工、在线/离线链路 | 架构、开发、运维 |
| [sota-evolution-guide.md](./sota-evolution-guide.md) | SOTA 演进路径、Phase-1 encoder-only 方案、后续升级路线 | 架构、模型、检索 |
| [postgresql-data-model.md](./postgresql-data-model.md) | PostgreSQL 数据字典、DDL 设计意图、流程图、查询路径 | 数据、后端、检索、平台 |
| [postgres_db_schema_samples.md](./postgres_db_schema_samples.md) | PostgreSQL 实际落库样例、live pgvector 测试链路、召回/混淆结果 | 数据、后端、检索、平台 |
| [phase1-implementation-checklist.md](./phase1-implementation-checklist.md) | Phase-1 落地 checklist,按阶段拆执行项 | 架构、开发、平台 |
| [model-feature-registry-bootstrap.md](./model-feature-registry-bootstrap.md) | 模型、feature set、reference set 初始化手册 | 模型、检索、数据 |
| [training-data-and-pgvector-guide.md](./training-data-and-pgvector-guide.md) | 当前训练/manifest/pgvector 原型链说明 | 开发、数据 |
......