Commit e54e2ff2 e54e2ff299a0b4d476f1a0d5e899cc4be6efb1b2 by cnb.bofCdSsphPA

Harden lineage validation evidence for the PostgreSQL ACR path

Constraint: Each follow-up Ralph edit must update docs and preserve a push-ready, auditable validation trail
Rejected: Stop at a single audio_window negative test | It left recording/audio_embedding trigger coverage and report readability weaker than needed
Confidence: high
Scope-risk: narrow
Directive: Keep live retrieval reports self-explanatory enough for reviewers who only inspect JSON artifacts
Tested: /usr/local/miniconda3/bin/python scripts/live_pgvector_music20_eval.py --dsn 'postgres://d2:d2pass@127.0.0.1:5432/d2' --schema acr_test --reset-schema --output data/pgvector_eval/music20/live_pgvector_report.json; /usr/local/miniconda3/bin/python -m py_compile scripts/live_pgvector_music20_eval.py; git diff --check -- acr-engine/scripts/live_pgvector_music20_eval.py acr-engine/data/pgvector_eval/music20/live_pgvector_report.json docs/postgres_db_schema_samples.md docs/CHANGELOG.md docs/session-handoff.md
Not-tested: type_8/type_16 live JSONL coverage, MERT/MuQ live embeddings, multi-recording/cover-lane decision flow
1 parent 96c9ce7d
...@@ -25,8 +25,29 @@ ...@@ -25,8 +25,29 @@
25 }, 25 },
26 "lineage_negative_test": { 26 "lineage_negative_test": {
27 "passed": true, 27 "passed": true,
28 "error_type": "RaiseException", 28 "cases": [
29 "message": "Invalid asset_id=1 or recording_id=1000000 for audio_window" 29 {
30 "passed": true,
31 "case": "recording_lineage_mismatch",
32 "expected_guard": "recording.canonical_song_id must equal work.canonical_song_id",
33 "error_type": "RaiseException",
34 "message": "recording.canonical_song_id 1000000 mismatches work.canonical_song_id 1"
35 },
36 {
37 "passed": true,
38 "case": "audio_window_lineage_mismatch",
39 "expected_guard": "audio_window recording/work/song lineage must match recording_asset + recording parents",
40 "error_type": "RaiseException",
41 "message": "Invalid asset_id=1 or recording_id=1000000 for audio_window"
42 },
43 {
44 "passed": true,
45 "case": "audio_embedding_lineage_mismatch",
46 "expected_guard": "audio_embedding recording/work/song lineage must match the parent audio_window",
47 "error_type": "RaiseException",
48 "message": "audio_embedding lineage mismatch"
49 }
50 ]
30 }, 51 },
31 "evaluation": { 52 "evaluation": {
32 "backend": "postgresql+pgvector-live", 53 "backend": "postgresql+pgvector-live",
......
...@@ -239,21 +239,99 @@ def ingest_references(conn: psycopg.Connection, refs: list[dict[str, Any]], feat ...@@ -239,21 +239,99 @@ def ingest_references(conn: psycopg.Connection, refs: list[dict[str, Any]], feat
239 return entities 239 return entities
240 240
241 241
242 def run_lineage_negative_test(conn: psycopg.Connection, entity: EntityIds) -> dict[str, Any]: 242 def _expect_insert_failure(
243 conn: psycopg.Connection,
244 sql: str,
245 params: tuple[Any, ...],
246 case_name: str,
247 expected_guard: str,
248 ) -> dict[str, Any]:
243 try: 249 try:
244 with conn.transaction(): 250 with conn.transaction():
245 conn.execute( 251 conn.execute(sql, params)
246 """ 252 return {
247 INSERT INTO audio_window ( 253 'passed': False,
248 asset_id, recording_id, work_id, canonical_song_id, window_index, 254 'case': case_name,
249 start_sec, end_sec, duration_sec, segment_role, segment_type, quality_score, active_for_index 255 'expected_guard': expected_guard,
250 ) VALUES (%s, %s, %s, %s, 999, 0.0, 8.0, 8.0, 'reference', 'bad_lineage', 0.0, TRUE); 256 'note': 'bad lineage insert unexpectedly succeeded',
251 """, 257 }
252 (entity.asset_id, entity.recording_id + 999999, entity.work_id, entity.canonical_song_id),
253 )
254 return {'passed': False, 'note': 'bad lineage insert unexpectedly succeeded'}
255 except Exception as exc: 258 except Exception as exc:
256 return {'passed': True, 'error_type': type(exc).__name__, 'message': str(exc).splitlines()[0]} 259 return {
260 'passed': True,
261 'case': case_name,
262 'expected_guard': expected_guard,
263 'error_type': type(exc).__name__,
264 'message': str(exc).splitlines()[0],
265 }
266
267
268 def run_lineage_negative_tests(conn: psycopg.Connection, entity: EntityIds, feature_set_id: int) -> dict[str, Any]:
269 recording_case = _expect_insert_failure(
270 conn,
271 """
272 INSERT INTO recording (
273 work_id, canonical_song_id, recording_code, recording_title, artist_name,
274 album_name, version_type, is_reference, reference_priority, duration_sec, metadata_json
275 ) VALUES (%s, %s, %s, %s, %s, %s, %s, FALSE, %s, %s, %s::jsonb);
276 """,
277 (
278 entity.work_id,
279 entity.canonical_song_id + 999999,
280 f'bad-rec-{entity.recording_id}',
281 'Bad Recording Lineage',
282 'Bad Artist',
283 'bad-album',
284 'bad_lineage_probe',
285 9999,
286 8.0,
287 json.dumps({'probe': 'recording_lineage_negative'}),
288 ),
289 'recording_lineage_mismatch',
290 'recording.canonical_song_id must equal work.canonical_song_id',
291 )
292
293 audio_window_case = _expect_insert_failure(
294 conn,
295 """
296 INSERT INTO audio_window (
297 asset_id, recording_id, work_id, canonical_song_id, window_index,
298 start_sec, end_sec, duration_sec, segment_role, segment_type, quality_score, active_for_index
299 ) VALUES (%s, %s, %s, %s, 999, 0.0, 8.0, 8.0, 'reference', 'bad_lineage', 0.0, TRUE);
300 """,
301 (entity.asset_id, entity.recording_id + 999999, entity.work_id, entity.canonical_song_id),
302 'audio_window_lineage_mismatch',
303 'audio_window recording/work/song lineage must match recording_asset + recording parents',
304 )
305
306 audio_embedding_case = _expect_insert_failure(
307 conn,
308 """
309 INSERT INTO audio_embedding (
310 feature_set_id, extraction_job_id, asset_id, window_id, recording_id, work_id,
311 canonical_song_id, embedding_storage_mode, embedding_uri, vector_norm, checksum,
312 is_indexed, metadata_json
313 ) VALUES (%s, NULL, %s, %s, %s, %s, %s, %s, NULL, %s, NULL, FALSE, %s::jsonb);
314 """,
315 (
316 feature_set_id,
317 entity.asset_id,
318 entity.window_id,
319 entity.recording_id,
320 entity.work_id,
321 entity.canonical_song_id + 999999,
322 'pgvector_inline_192_padded',
323 1.0,
324 json.dumps({'probe': 'audio_embedding_lineage_negative'}),
325 ),
326 'audio_embedding_lineage_mismatch',
327 'audio_embedding recording/work/song lineage must match the parent audio_window',
328 )
329
330 cases = [recording_case, audio_window_case, audio_embedding_case]
331 return {
332 'passed': all(item['passed'] for item in cases),
333 'cases': cases,
334 }
257 335
258 336
259 def fetch_raw_candidates(conn: psycopg.Connection, feature_set_id: int, query_vec: list[float], topn: int) -> list[dict[str, Any]]: 337 def fetch_raw_candidates(conn: psycopg.Connection, feature_set_id: int, query_vec: list[float], topn: int) -> list[dict[str, Any]]:
...@@ -390,7 +468,7 @@ def main() -> None: ...@@ -390,7 +468,7 @@ def main() -> None:
390 apply_schema(conn, Path(args.schema_sql)) 468 apply_schema(conn, Path(args.schema_sql))
391 model_id, feature_set_id, reference_set_id, retrieval_index_id = seed_registry(conn) 469 model_id, feature_set_id, reference_set_id, retrieval_index_id = seed_registry(conn)
392 entities = ingest_references(conn, refs, feature_set_id, reference_set_id) 470 entities = ingest_references(conn, refs, feature_set_id, reference_set_id)
393 lineage_check = run_lineage_negative_test(conn, next(iter(entities.values()))) 471 lineage_check = run_lineage_negative_tests(conn, next(iter(entities.values())), feature_set_id)
394 report = evaluate_live(conn, feature_set_id, retrieval_index_id, queries, args.topn, args.topk) 472 report = evaluate_live(conn, feature_set_id, retrieval_index_id, queries, args.topn, args.topk)
395 conn.execute('UPDATE retrieval_index_registry SET row_count = %s WHERE retrieval_index_id = %s;', (len(refs), retrieval_index_id)) 473 conn.execute('UPDATE retrieval_index_registry SET row_count = %s WHERE retrieval_index_id = %s;', (len(refs), retrieval_index_id))
396 counts = { 474 counts = {
......
1 ## 2026-06-04 1 ## 2026-06-04
2 2
3 - 更新 [PostgreSQL 落库样例与 live 测试链路](./postgres_db_schema_samples.md)`acr-engine/scripts/live_pgvector_music20_eval.py`,把 lineage 负例验证从单条 `audio_window` 扩展到 `recording` / `audio_window` / `audio_embedding` 三类核心 trigger,并已重跑 live pgvector 报告确认检索指标不变;同时补充 `py_compile``diff --check` 通过的机械验证事实。
3 - 新增 [PostgreSQL 落库样例与 live 测试链路](./postgres_db_schema_samples.md),补齐 `acr_pg_schema_v2.sql` 的真实落库样例、`pgvector` live 检索验证、lineage trigger 负例测试,以及当前召回/混淆结果解读。 4 - 新增 [PostgreSQL 落库样例与 live 测试链路](./postgres_db_schema_samples.md),补齐 `acr_pg_schema_v2.sql` 的真实落库样例、`pgvector` live 检索验证、lineage trigger 负例测试,以及当前召回/混淆结果解读。
4 - 新增 `acr-engine/scripts/live_pgvector_music20_eval.py`,支持对用户提供的 PostgreSQL 执行隔离 schema 建表、样例数据导入、`pgvector` live 检索、`retrieval_candidate` / `match_decision` 落表与评测报告生成。 5 - 新增 `acr-engine/scripts/live_pgvector_music20_eval.py`,支持对用户提供的 PostgreSQL 执行隔离 schema 建表、样例数据导入、`pgvector` live 检索、`retrieval_candidate` / `match_decision` 落表与评测报告生成。
5 - 新增 `acr-engine/data/pgvector_eval/music20/live_pgvector_report.json``songid_eval_report_fresh.json`,记录本轮 live PostgreSQL + pgvector 与 FAISS stand-in 的对齐结果:overall `top1=0.9091` / `top3=0.9545`,但 `type_7` 仍明显偏弱。 6 - 新增 `acr-engine/data/pgvector_eval/music20/live_pgvector_report.json``songid_eval_report_fresh.json`,记录本轮 live PostgreSQL + pgvector 与 FAISS stand-in 的对齐结果:overall `top1=0.9091` / `top3=0.9545`,但 `type_7` 仍明显偏弱。
......
...@@ -34,8 +34,11 @@ ...@@ -34,8 +34,11 @@
34 - `query_type=7`: `top1=0.0`, `top3=0.5` 34 - `query_type=7`: `top1=0.0`, `top3=0.5`
35 35
36 5. **lineage trigger 已被验证有效** 36 5. **lineage trigger 已被验证有效**
37 - 脚本主动构造了一次错误 lineage 的 `audio_window` 37 - 脚本主动构造了三类错误 lineage:
38 - PostgreSQL 正确拒绝插入 38 - `recording`
39 - `audio_window`
40 - `audio_embedding`
41 - PostgreSQL 都正确拒绝插入
39 42
40 --- 43 ---
41 44
...@@ -222,8 +225,11 @@ flowchart LR ...@@ -222,8 +225,11 @@ flowchart LR
222 - `retrieval_index_registry` 225 - `retrieval_index_registry`
223 5. 导入 20 条 reference 样例 226 5. 导入 20 条 reference 样例
224 6. 验证表计数是否正确 227 6. 验证表计数是否正确
225 7. 主动插入一条错误 lineage 的 `audio_window` 228 7. 主动插入三类错误 lineage:
226 8. 预期 PostgreSQL trigger 拒绝该写入 229 - `recording.canonical_song_id``work.canonical_song_id` 不一致
230 - `audio_window.recording_id``recording_asset.recording_id` 不一致
231 - `audio_embedding``canonical_song_id` 与父 `audio_window` 不一致
232 8. 预期 PostgreSQL trigger 拒绝这些坏写入
227 233
228 ### B. live 检索评测测试 234 ### B. live 检索评测测试
229 235
...@@ -291,6 +297,26 @@ flowchart LR ...@@ -291,6 +297,26 @@ flowchart LR
291 297
292 --- 298 ---
293 299
300 ## 本轮补充:完整 lineage trigger 负例覆盖
301
302 本轮重新执行 live 脚本后,`live_pgvector_report.json` 中的 `lineage_negative_test` 已从“单条 audio_window 验证”升级为“三类坏写入全部验证”:
303
304 | case | 结果 | PostgreSQL 返回 |
305 |---|---|---|
306 | `recording_lineage_mismatch` | 拒绝成功 | `recording.canonical_song_id ... mismatches work.canonical_song_id ...` |
307 | `audio_window_lineage_mismatch` | 拒绝成功 | `Invalid asset_id=... or recording_id=... for audio_window` |
308 | `audio_embedding_lineage_mismatch` | 拒绝成功 | `audio_embedding lineage mismatch` |
309
310 这意味着:
311
312 > 当前 schema v2 的三条核心 lineage trigger,已经都有真实负例证据,而不只是“理论上存在”。
313
314 同时,本轮还补了两条机械验证证据:
315 - `py_compile` 通过:`live_pgvector_music20_eval.py`
316 - `git diff --check` 通过:本轮脚本、报告、文档变更无格式问题
317
318 ---
319
294 ## 混淆测试补充视图 320 ## 混淆测试补充视图
295 321
296 ### 1. 当前 live 样例视图 322 ### 1. 当前 live 样例视图
......
...@@ -178,12 +178,14 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql ...@@ -178,12 +178,14 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql
178 - Phase-1 方案、PostgreSQL 设计、实施 checklist、registry bootstrap 均已提交 178 - Phase-1 方案、PostgreSQL 设计、实施 checklist、registry bootstrap 均已提交
179 - architect 审核结论:**APPROVED** 179 - architect 审核结论:**APPROVED**
180 - 代码已推送远端 180 - 代码已推送远端
181 - PostgreSQL `acr_test` live 路径已再次验证:`recording` / `audio_window` / `audio_embedding` 三类 lineage trigger 均有真实负例证据
182 - 机械校验已补齐:`live_pgvector_music20_eval.py``py_compile` 通过,相关变更 `diff --check` 通过
181 183
182 ### 未验证 / 仍是缺口 184 ### 未验证 / 仍是缺口
183 - **未执行 live PostgreSQL apply**(当前环境缺少 `psql`
184 - **未实际跑 MERT / MuQ encoder-only 特征抽取** 185 - **未实际跑 MERT / MuQ encoder-only 特征抽取**
185 - **未落 reference set 的真实业务数据** 186 - **未落 reference set 的真实业务数据**
186 - **未定义最终线上分数融合细则** 187 - **未定义最终线上分数融合细则**
188 - **type_8 / type_16 还没有进入当前 live JSONL 的 PostgreSQL 实测链**
187 189
188 因此下次 session 应优先从这些未验证缺口里挑一条推进,而不是重复写总方案。 190 因此下次 session 应优先从这些未验证缺口里挑一条推进,而不是重复写总方案。
189 191
......