Commit 96c9ce7d 96c9ce7d40ae89231e6f9d1a37d73e2c5bb380ff by cnb.bofCdSsphPA

Validate the PostgreSQL ACR storage path with live evidence

Constraint: The new data model had to be proven against the user-provided PostgreSQL instance and stay aligned with Phase-1 encoder-only decisions
Rejected: Document-only schema guidance without a live database run | It would leave retrieval correctness and table intent unproven
Confidence: high
Scope-risk: narrow
Directive: Keep future retrieval experiments writing through model/feature/reference registries instead of adding fixed per-model columns
Tested: /usr/local/miniconda3/bin/python scripts/live_pgvector_music20_eval.py --dsn 'postgres://d2:d2pass@127.0.0.1:5432/d2' --schema acr_test --reset-schema --output data/pgvector_eval/music20/live_pgvector_report.json; /usr/local/miniconda3/bin/python scripts/evaluate_songid_pgvector_path.py --reference-embeddings-jsonl data/pgvector_eval/music20/reference_embeddings.jsonl --query-embeddings-jsonl data/pgvector_eval/music20/query_embeddings.jsonl --output data/pgvector_eval/music20/songid_eval_report_fresh.json; /usr/local/miniconda3/bin/python -m py_compile scripts/live_pgvector_music20_eval.py scripts/evaluate_songid_pgvector_path.py; git diff --check -- docs/README.md docs/CHANGELOG.md docs/postgres_db_schema_samples.md acr-engine/scripts/live_pgvector_music20_eval.py acr-engine/data/pgvector_eval/music20/live_pgvector_report.json acr-engine/data/pgvector_eval/music20/songid_eval_report_fresh.json
Not-tested: MERT/MuQ live embeddings, type_8/type_16 live JSONL coverage, multi-recording/cover-lane decision flow
1 parent b220751b
1 {
2 "schema": "acr_test",
3 "dsn_redacted": "postgres://d2:***@127.0.0.1:5432/d2",
4 "input": {
5 "reference_embeddings_jsonl": "/workspace/acr-engine/data/pgvector_eval/music20/reference_embeddings.jsonl",
6 "query_embeddings_jsonl": "/workspace/acr-engine/data/pgvector_eval/music20/query_embeddings.jsonl",
7 "reference_count": 20,
8 "query_count": 22
9 },
10 "registry": {
11 "model_id": 1,
12 "feature_set_id": 1,
13 "reference_set_id": 1,
14 "retrieval_index_id": 1
15 },
16 "table_counts": {
17 "canonical_song": 20,
18 "work": 20,
19 "recording": 20,
20 "recording_asset": 20,
21 "audio_window": 20,
22 "audio_embedding": 20,
23 "retrieval_candidate": 220,
24 "match_decision": 22
25 },
26 "lineage_negative_test": {
27 "passed": true,
28 "error_type": "RaiseException",
29 "message": "Invalid asset_id=1 or recording_id=1000000 for audio_window"
30 },
31 "evaluation": {
32 "backend": "postgresql+pgvector-live",
33 "note": "Reference embeddings are stored in schema v2; 24-d logical embeddings are zero-padded to vector(192) for physical storage.",
34 "overall": {
35 "count": 22,
36 "top1": 0.909091,
37 "top3": 0.954545,
38 "top10": 0.954545,
39 "mrr": 0.934343,
40 "mean_rank": 1.8182,
41 "median_rank": 1.0
42 },
43 "by_query_type": {
44 "1": {
45 "count": 20,
46 "top1": 1.0,
47 "top3": 1.0,
48 "top10": 1.0,
49 "mrr": 1.0,
50 "mean_rank": 1.0,
51 "median_rank": 1.0
52 },
53 "7": {
54 "count": 2,
55 "top1": 0.0,
56 "top3": 0.5,
57 "top10": 0.5,
58 "mrr": 0.277778,
59 "mean_rank": 10.0,
60 "median_rank": 10.0
61 }
62 },
63 "confusion_focus": {
64 "7": {
65 "query_type": 7,
66 "metrics": {
67 "count": 2,
68 "top1": 0.0,
69 "top3": 0.5,
70 "top10": 0.5,
71 "mrr": 0.277778,
72 "mean_rank": 10.0,
73 "median_rank": 10.0
74 },
75 "interpretation": "light confusion / transformed query"
76 },
77 "8": {
78 "query_type": 8,
79 "metrics": {
80 "count": 0
81 },
82 "interpretation": "harder confusion bucket"
83 },
84 "16": {
85 "query_type": 16,
86 "metrics": {
87 "count": 0
88 },
89 "interpretation": "strong confusion or far-domain bucket"
90 }
91 },
92 "examples": {
93 "1": [
94 {
95 "query_id": "music20-q0000-t1-song100",
96 "song_id": "100",
97 "rank": 1,
98 "top3": [
99 {
100 "song_id": "100",
101 "canonical_song_id": 1,
102 "evidence_window_id": 1,
103 "combined_score": 0.9099869376417087,
104 "max_sim": 0.9999854862685651,
105 "top3_avg": 0.9999854862685651,
106 "vote": 1
107 },
108 {
109 "song_id": "116",
110 "canonical_song_id": 17,
111 "evidence_window_id": 17,
112 "combined_score": 0.8674688834706314,
113 "max_sim": 0.9527432038562573,
114 "top3_avg": 0.9527432038562573,
115 "vote": 1
116 },
117 {
118 "song_id": "103",
119 "canonical_song_id": 4,
120 "evidence_window_id": 4,
121 "combined_score": 0.8665370278518509,
122 "max_sim": 0.9517078087242788,
123 "top3_avg": 0.9517078087242788,
124 "vote": 1
125 }
126 ]
127 },
128 {
129 "query_id": "music20-q0001-t1-song101",
130 "song_id": "101",
131 "rank": 1,
132 "top3": [
133 {
134 "song_id": "101",
135 "canonical_song_id": 2,
136 "evidence_window_id": 2,
137 "combined_score": 0.9099997586011674,
138 "max_sim": 0.999999731779075,
139 "top3_avg": 0.999999731779075,
140 "vote": 1
141 },
142 {
143 "song_id": "118",
144 "canonical_song_id": 19,
145 "evidence_window_id": 19,
146 "combined_score": 0.8930541242989376,
147 "max_sim": 0.9811712492210417,
148 "top3_avg": 0.9811712492210417,
149 "vote": 1
150 },
151 {
152 "song_id": "116",
153 "canonical_song_id": 17,
154 "evidence_window_id": 17,
155 "combined_score": 0.892017854392,
156 "max_sim": 0.9800198382133333,
157 "top3_avg": 0.9800198382133333,
158 "vote": 1
159 }
160 ]
161 },
162 {
163 "query_id": "music20-q0002-t1-song102",
164 "song_id": "102",
165 "rank": 1,
166 "top3": [
167 {
168 "song_id": "102",
169 "canonical_song_id": 3,
170 "evidence_window_id": 3,
171 "combined_score": 0.9099973714353238,
172 "max_sim": 0.9999970793725819,
173 "top3_avg": 0.9999970793725819,
174 "vote": 1
175 },
176 {
177 "song_id": "113",
178 "canonical_song_id": 14,
179 "evidence_window_id": 14,
180 "combined_score": 0.878619819365752,
181 "max_sim": 0.9651331326286134,
182 "top3_avg": 0.9651331326286134,
183 "vote": 1
184 },
185 {
186 "song_id": "118",
187 "canonical_song_id": 19,
188 "evidence_window_id": 19,
189 "combined_score": 0.8727551417721799,
190 "max_sim": 0.9586168241913111,
191 "top3_avg": 0.9586168241913111,
192 "vote": 1
193 }
194 ]
195 },
196 {
197 "query_id": "music20-q0003-t1-song103",
198 "song_id": "103",
199 "rank": 1,
200 "top3": [
201 {
202 "song_id": "103",
203 "canonical_song_id": 4,
204 "evidence_window_id": 4,
205 "combined_score": 0.9078967457382905,
206 "max_sim": 0.9976630508203228,
207 "top3_avg": 0.9976630508203228,
208 "vote": 1
209 },
210 {
211 "song_id": "116",
212 "canonical_song_id": 17,
213 "evidence_window_id": 17,
214 "combined_score": 0.8892688048103843,
215 "max_sim": 0.9769653386782048,
216 "top3_avg": 0.9769653386782048,
217 "vote": 1
218 },
219 {
220 "song_id": "109",
221 "canonical_song_id": 10,
222 "evidence_window_id": 10,
223 "combined_score": 0.8786497490793317,
224 "max_sim": 0.9651663878659241,
225 "top3_avg": 0.9651663878659241,
226 "vote": 1
227 }
228 ]
229 },
230 {
231 "query_id": "music20-q0004-t1-song104",
232 "song_id": "104",
233 "rank": 1,
234 "top3": [
235 {
236 "song_id": "104",
237 "canonical_song_id": 5,
238 "evidence_window_id": 5,
239 "combined_score": 0.9099890834089845,
240 "max_sim": 0.9999878704544272,
241 "top3_avg": 0.9999878704544272,
242 "vote": 1
243 },
244 {
245 "song_id": "109",
246 "canonical_song_id": 10,
247 "evidence_window_id": 10,
248 "combined_score": 0.8646899513807881,
249 "max_sim": 0.9496555015342091,
250 "top3_avg": 0.9496555015342091,
251 "vote": 1
252 },
253 {
254 "song_id": "116",
255 "canonical_song_id": 17,
256 "evidence_window_id": 17,
257 "combined_score": 0.8414633946738618,
258 "max_sim": 0.9238482163042909,
259 "top3_avg": 0.9238482163042909,
260 "vote": 1
261 }
262 ]
263 }
264 ],
265 "7": [
266 {
267 "query_id": "music20-q0020-t7-song111",
268 "song_id": "111",
269 "rank": 18,
270 "top3": [
271 {
272 "song_id": "109",
273 "canonical_song_id": 10,
274 "evidence_window_id": 10,
275 "combined_score": 0.8765411333280498,
276 "max_sim": 0.9628234814756109,
277 "top3_avg": 0.9628234814756109,
278 "vote": 1
279 },
280 {
281 "song_id": "116",
282 "canonical_song_id": 17,
283 "evidence_window_id": 17,
284 "combined_score": 0.8749381679370203,
285 "max_sim": 0.9610424088189115,
286 "top3_avg": 0.9610424088189115,
287 "vote": 1
288 },
289 {
290 "song_id": "118",
291 "canonical_song_id": 19,
292 "evidence_window_id": 19,
293 "combined_score": 0.8641276021561776,
294 "max_sim": 0.9490306690624195,
295 "top3_avg": 0.9490306690624195,
296 "vote": 1
297 }
298 ]
299 },
300 {
301 "query_id": "music20-q0021-t7-song116",
302 "song_id": "116",
303 "rank": 2,
304 "top3": [
305 {
306 "song_id": "109",
307 "canonical_song_id": 10,
308 "evidence_window_id": 10,
309 "combined_score": 0.8701787704282636,
310 "max_sim": 0.9557541893647373,
311 "top3_avg": 0.9557541893647373,
312 "vote": 1
313 },
314 {
315 "song_id": "116",
316 "canonical_song_id": 17,
317 "evidence_window_id": 17,
318 "combined_score": 0.8674951972070233,
319 "max_sim": 0.9527724413411371,
320 "top3_avg": 0.9527724413411371,
321 "vote": 1
322 },
323 {
324 "song_id": "103",
325 "canonical_song_id": 4,
326 "evidence_window_id": 4,
327 "combined_score": 0.8659579133987426,
328 "max_sim": 0.9510643482208252,
329 "top3_avg": 0.9510643482208252,
330 "vote": 1
331 }
332 ]
333 }
334 ]
335 }
336 }
337 }
...\ No newline at end of file ...\ No newline at end of file
1 {
2 "backend": "faiss-as-pgvector-standin",
3 "note": "Uses song-level aggregation compatible with a future pgvector online path.",
4 "overall": {
5 "count": 22,
6 "top1": 0.909091,
7 "top3": 0.954545,
8 "top10": 0.954545,
9 "mrr": 0.934343,
10 "mean_rank": 1.8182,
11 "median_rank": 1.0
12 },
13 "by_query_type": {
14 "1": {
15 "count": 20,
16 "top1": 1.0,
17 "top3": 1.0,
18 "top10": 1.0,
19 "mrr": 1.0,
20 "mean_rank": 1.0,
21 "median_rank": 1.0
22 },
23 "7": {
24 "count": 2,
25 "top1": 0.0,
26 "top3": 0.5,
27 "top10": 0.5,
28 "mrr": 0.277778,
29 "mean_rank": 10.0,
30 "median_rank": 10.0
31 }
32 },
33 "examples": {
34 "1": [
35 {
36 "song_id": "100",
37 "rank": 1,
38 "top3": [
39 [
40 "100",
41 0.9099869644641876,
42 0.9999855160713196,
43 0.9999855160713196,
44 1
45 ],
46 [
47 "116",
48 0.8674689626693726,
49 0.9527432918548584,
50 0.9527432918548584,
51 1
52 ],
53 [
54 "103",
55 0.8665370559692382,
56 0.9517078399658203,
57 0.9517078399658203,
58 1
59 ]
60 ]
61 },
62 {
63 "song_id": "101",
64 "rank": 1,
65 "top3": [
66 [
67 "101",
68 0.9099996781349182,
69 0.9999996423721313,
70 0.9999996423721313,
71 1
72 ],
73 [
74 "118",
75 0.8930539643764497,
76 0.9811710715293884,
77 0.9811710715293884,
78 1
79 ],
80 [
81 "116",
82 0.8920178270339967,
83 0.9800198078155518,
84 0.9800198078155518,
85 1
86 ]
87 ]
88 },
89 {
90 "song_id": "102",
91 "rank": 1,
92 "top3": [
93 [
94 "102",
95 0.9099974250793457,
96 0.9999971389770508,
97 0.9999971389770508,
98 1
99 ],
100 [
101 "113",
102 0.878619978427887,
103 0.9651333093643188,
104 0.9651333093643188,
105 1
106 ],
107 [
108 "118",
109 0.8727551674842834,
110 0.9586168527603149,
111 0.9586168527603149,
112 1
113 ]
114 ]
115 },
116 {
117 "song_id": "103",
118 "rank": 1,
119 "top3": [
120 [
121 "103",
122 0.9078967189788818,
123 0.9976630210876465,
124 0.9976630210876465,
125 1
126 ],
127 [
128 "116",
129 0.8892688846588135,
130 0.9769654273986816,
131 0.9769654273986816,
132 1
133 ],
134 [
135 "109",
136 0.8786498045921325,
137 0.965166449546814,
138 0.965166449546814,
139 1
140 ]
141 ]
142 },
143 {
144 "song_id": "104",
145 "rank": 1,
146 "top3": [
147 [
148 "104",
149 0.9099890029430389,
150 0.999987781047821,
151 0.999987781047821,
152 1
153 ],
154 [
155 "109",
156 0.8646899795532226,
157 0.9496555328369141,
158 0.9496555328369141,
159 1
160 ],
161 [
162 "116",
163 0.8414634442329406,
164 0.9238482713699341,
165 0.9238482713699341,
166 1
167 ]
168 ]
169 }
170 ],
171 "7": [
172 {
173 "song_id": "111",
174 "rank": 18,
175 "top3": [
176 [
177 "109",
178 0.8765411591529846,
179 0.9628235101699829,
180 0.9628235101699829,
181 1
182 ],
183 [
184 "116",
185 0.8749382710456848,
186 0.9610425233840942,
187 0.9610425233840942,
188 1
189 ],
190 [
191 "118",
192 0.8641276276111602,
193 0.9490306973457336,
194 0.9490306973457336,
195 1
196 ]
197 ]
198 },
199 {
200 "song_id": "116",
201 "rank": 2,
202 "top3": [
203 [
204 "109",
205 0.8701787447929383,
206 0.9557541608810425,
207 0.9557541608810425,
208 1
209 ],
210 [
211 "116",
212 0.8674952483177185,
213 0.9527724981307983,
214 0.9527724981307983,
215 1
216 ],
217 [
218 "103",
219 0.8659579670429229,
220 0.95106440782547,
221 0.95106440782547,
222 1
223 ]
224 ]
225 }
226 ]
227 }
228 }
...\ No newline at end of file ...\ No newline at end of file
1 ## 2026-06-04 1 ## 2026-06-04
2 2
3 - 新增 [PostgreSQL 落库样例与 live 测试链路](./postgres_db_schema_samples.md),补齐 `acr_pg_schema_v2.sql` 的真实落库样例、`pgvector` live 检索验证、lineage trigger 负例测试,以及当前召回/混淆结果解读。
4 - 新增 `acr-engine/scripts/live_pgvector_music20_eval.py`,支持对用户提供的 PostgreSQL 执行隔离 schema 建表、样例数据导入、`pgvector` live 检索、`retrieval_candidate` / `match_decision` 落表与评测报告生成。
5 - 新增 `acr-engine/data/pgvector_eval/music20/live_pgvector_report.json``songid_eval_report_fresh.json`,记录本轮 live PostgreSQL + pgvector 与 FAISS stand-in 的对齐结果:overall `top1=0.9091` / `top3=0.9545`,但 `type_7` 仍明显偏弱。
3 - 重写 [session-handoff 交接文档](./session-handoff.md),将其从历史流水账收敛为“下次启动即用”的启动手册,明确当前稳定结论、推荐阅读顺序、已验证/未验证边界,以及下一步应从 PostgreSQL v2 schema 与 Phase-1 encoder-only 执行链开始推进。 6 - 重写 [session-handoff 交接文档](./session-handoff.md),将其从历史流水账收敛为“下次启动即用”的启动手册,明确当前稳定结论、推荐阅读顺序、已验证/未验证边界,以及下一步应从 PostgreSQL v2 schema 与 Phase-1 encoder-only 执行链开始推进。
4 - 新增 [Phase-1 实施清单](./phase1-implementation-checklist.md),把 encoder-only 路线拆成主数据、reference set、feature set、索引、评测的可执行阶段。 7 - 新增 [Phase-1 实施清单](./phase1-implementation-checklist.md),把 encoder-only 路线拆成主数据、reference set、feature set、索引、评测的可执行阶段。
5 - 新增 [模型与 Feature Set 初始化手册](./model-feature-registry-bootstrap.md),补齐 model_registry / feature_set_registry / reference_set_registry 的初始化约定与示例 SQL。 8 - 新增 [模型与 Feature Set 初始化手册](./model-feature-registry-bootstrap.md),补齐 model_registry / feature_set_registry / reference_set_registry 的初始化约定与示例 SQL。
......
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
54 | [acr-architecture.md](./acr-architecture.md) | 当前系统蓝图、角色分工、在线/离线链路 | 架构、开发、运维 | 54 | [acr-architecture.md](./acr-architecture.md) | 当前系统蓝图、角色分工、在线/离线链路 | 架构、开发、运维 |
55 | [sota-evolution-guide.md](./sota-evolution-guide.md) | SOTA 演进路径、Phase-1 encoder-only 方案、后续升级路线 | 架构、模型、检索 | 55 | [sota-evolution-guide.md](./sota-evolution-guide.md) | SOTA 演进路径、Phase-1 encoder-only 方案、后续升级路线 | 架构、模型、检索 |
56 | [postgresql-data-model.md](./postgresql-data-model.md) | PostgreSQL 数据字典、DDL 设计意图、流程图、查询路径 | 数据、后端、检索、平台 | 56 | [postgresql-data-model.md](./postgresql-data-model.md) | PostgreSQL 数据字典、DDL 设计意图、流程图、查询路径 | 数据、后端、检索、平台 |
57 | [postgres_db_schema_samples.md](./postgres_db_schema_samples.md) | PostgreSQL 实际落库样例、live pgvector 测试链路、召回/混淆结果 | 数据、后端、检索、平台 |
57 | [phase1-implementation-checklist.md](./phase1-implementation-checklist.md) | Phase-1 落地 checklist,按阶段拆执行项 | 架构、开发、平台 | 58 | [phase1-implementation-checklist.md](./phase1-implementation-checklist.md) | Phase-1 落地 checklist,按阶段拆执行项 | 架构、开发、平台 |
58 | [model-feature-registry-bootstrap.md](./model-feature-registry-bootstrap.md) | 模型、feature set、reference set 初始化手册 | 模型、检索、数据 | 59 | [model-feature-registry-bootstrap.md](./model-feature-registry-bootstrap.md) | 模型、feature set、reference set 初始化手册 | 模型、检索、数据 |
59 | [training-data-and-pgvector-guide.md](./training-data-and-pgvector-guide.md) | 当前训练/manifest/pgvector 原型链说明 | 开发、数据 | 60 | [training-data-and-pgvector-guide.md](./training-data-and-pgvector-guide.md) | 当前训练/manifest/pgvector 原型链说明 | 开发、数据 |
......