Harden lineage validation evidence for the PostgreSQL ACR path
Constraint: Each follow-up Ralph edit must update docs and preserve a push-ready, auditable validation trail Rejected: Stop at a single audio_window negative test | It left recording/audio_embedding trigger coverage and report readability weaker than needed Confidence: high Scope-risk: narrow Directive: Keep live retrieval reports self-explanatory enough for reviewers who only inspect JSON artifacts Tested: /usr/local/miniconda3/bin/python scripts/live_pgvector_music20_eval.py --dsn 'postgres://d2:d2pass@127.0.0.1:5432/d2' --schema acr_test --reset-schema --output data/pgvector_eval/music20/live_pgvector_report.json; /usr/local/miniconda3/bin/python -m py_compile scripts/live_pgvector_music20_eval.py; git diff --check -- acr-engine/scripts/live_pgvector_music20_eval.py acr-engine/data/pgvector_eval/music20/live_pgvector_report.json docs/postgres_db_schema_samples.md docs/CHANGELOG.md docs/session-handoff.md Not-tested: type_8/type_16 live JSONL coverage, MERT/MuQ live embeddings, multi-recording/cover-lane decision flow
Showing
5 changed files
with
148 additions
and
20 deletions
| ... | @@ -25,8 +25,29 @@ | ... | @@ -25,8 +25,29 @@ |
| 25 | }, | 25 | }, |
| 26 | "lineage_negative_test": { | 26 | "lineage_negative_test": { |
| 27 | "passed": true, | 27 | "passed": true, |
| 28 | "error_type": "RaiseException", | 28 | "cases": [ |
| 29 | "message": "Invalid asset_id=1 or recording_id=1000000 for audio_window" | 29 | { |
| 30 | "passed": true, | ||
| 31 | "case": "recording_lineage_mismatch", | ||
| 32 | "expected_guard": "recording.canonical_song_id must equal work.canonical_song_id", | ||
| 33 | "error_type": "RaiseException", | ||
| 34 | "message": "recording.canonical_song_id 1000000 mismatches work.canonical_song_id 1" | ||
| 35 | }, | ||
| 36 | { | ||
| 37 | "passed": true, | ||
| 38 | "case": "audio_window_lineage_mismatch", | ||
| 39 | "expected_guard": "audio_window recording/work/song lineage must match recording_asset + recording parents", | ||
| 40 | "error_type": "RaiseException", | ||
| 41 | "message": "Invalid asset_id=1 or recording_id=1000000 for audio_window" | ||
| 42 | }, | ||
| 43 | { | ||
| 44 | "passed": true, | ||
| 45 | "case": "audio_embedding_lineage_mismatch", | ||
| 46 | "expected_guard": "audio_embedding recording/work/song lineage must match the parent audio_window", | ||
| 47 | "error_type": "RaiseException", | ||
| 48 | "message": "audio_embedding lineage mismatch" | ||
| 49 | } | ||
| 50 | ] | ||
| 30 | }, | 51 | }, |
| 31 | "evaluation": { | 52 | "evaluation": { |
| 32 | "backend": "postgresql+pgvector-live", | 53 | "backend": "postgresql+pgvector-live", | ... | ... |
| ... | @@ -239,21 +239,99 @@ def ingest_references(conn: psycopg.Connection, refs: list[dict[str, Any]], feat | ... | @@ -239,21 +239,99 @@ def ingest_references(conn: psycopg.Connection, refs: list[dict[str, Any]], feat |
| 239 | return entities | 239 | return entities |
| 240 | 240 | ||
| 241 | 241 | ||
| 242 | def run_lineage_negative_test(conn: psycopg.Connection, entity: EntityIds) -> dict[str, Any]: | 242 | def _expect_insert_failure( |
| 243 | conn: psycopg.Connection, | ||
| 244 | sql: str, | ||
| 245 | params: tuple[Any, ...], | ||
| 246 | case_name: str, | ||
| 247 | expected_guard: str, | ||
| 248 | ) -> dict[str, Any]: | ||
| 243 | try: | 249 | try: |
| 244 | with conn.transaction(): | 250 | with conn.transaction(): |
| 245 | conn.execute( | 251 | conn.execute(sql, params) |
| 246 | """ | 252 | return { |
| 247 | INSERT INTO audio_window ( | 253 | 'passed': False, |
| 248 | asset_id, recording_id, work_id, canonical_song_id, window_index, | 254 | 'case': case_name, |
| 249 | start_sec, end_sec, duration_sec, segment_role, segment_type, quality_score, active_for_index | 255 | 'expected_guard': expected_guard, |
| 250 | ) VALUES (%s, %s, %s, %s, 999, 0.0, 8.0, 8.0, 'reference', 'bad_lineage', 0.0, TRUE); | 256 | 'note': 'bad lineage insert unexpectedly succeeded', |
| 251 | """, | 257 | } |
| 252 | (entity.asset_id, entity.recording_id + 999999, entity.work_id, entity.canonical_song_id), | ||
| 253 | ) | ||
| 254 | return {'passed': False, 'note': 'bad lineage insert unexpectedly succeeded'} | ||
| 255 | except Exception as exc: | 258 | except Exception as exc: |
| 256 | return {'passed': True, 'error_type': type(exc).__name__, 'message': str(exc).splitlines()[0]} | 259 | return { |
| 260 | 'passed': True, | ||
| 261 | 'case': case_name, | ||
| 262 | 'expected_guard': expected_guard, | ||
| 263 | 'error_type': type(exc).__name__, | ||
| 264 | 'message': str(exc).splitlines()[0], | ||
| 265 | } | ||
| 266 | |||
| 267 | |||
| 268 | def run_lineage_negative_tests(conn: psycopg.Connection, entity: EntityIds, feature_set_id: int) -> dict[str, Any]: | ||
| 269 | recording_case = _expect_insert_failure( | ||
| 270 | conn, | ||
| 271 | """ | ||
| 272 | INSERT INTO recording ( | ||
| 273 | work_id, canonical_song_id, recording_code, recording_title, artist_name, | ||
| 274 | album_name, version_type, is_reference, reference_priority, duration_sec, metadata_json | ||
| 275 | ) VALUES (%s, %s, %s, %s, %s, %s, %s, FALSE, %s, %s, %s::jsonb); | ||
| 276 | """, | ||
| 277 | ( | ||
| 278 | entity.work_id, | ||
| 279 | entity.canonical_song_id + 999999, | ||
| 280 | f'bad-rec-{entity.recording_id}', | ||
| 281 | 'Bad Recording Lineage', | ||
| 282 | 'Bad Artist', | ||
| 283 | 'bad-album', | ||
| 284 | 'bad_lineage_probe', | ||
| 285 | 9999, | ||
| 286 | 8.0, | ||
| 287 | json.dumps({'probe': 'recording_lineage_negative'}), | ||
| 288 | ), | ||
| 289 | 'recording_lineage_mismatch', | ||
| 290 | 'recording.canonical_song_id must equal work.canonical_song_id', | ||
| 291 | ) | ||
| 292 | |||
| 293 | audio_window_case = _expect_insert_failure( | ||
| 294 | conn, | ||
| 295 | """ | ||
| 296 | INSERT INTO audio_window ( | ||
| 297 | asset_id, recording_id, work_id, canonical_song_id, window_index, | ||
| 298 | start_sec, end_sec, duration_sec, segment_role, segment_type, quality_score, active_for_index | ||
| 299 | ) VALUES (%s, %s, %s, %s, 999, 0.0, 8.0, 8.0, 'reference', 'bad_lineage', 0.0, TRUE); | ||
| 300 | """, | ||
| 301 | (entity.asset_id, entity.recording_id + 999999, entity.work_id, entity.canonical_song_id), | ||
| 302 | 'audio_window_lineage_mismatch', | ||
| 303 | 'audio_window recording/work/song lineage must match recording_asset + recording parents', | ||
| 304 | ) | ||
| 305 | |||
| 306 | audio_embedding_case = _expect_insert_failure( | ||
| 307 | conn, | ||
| 308 | """ | ||
| 309 | INSERT INTO audio_embedding ( | ||
| 310 | feature_set_id, extraction_job_id, asset_id, window_id, recording_id, work_id, | ||
| 311 | canonical_song_id, embedding_storage_mode, embedding_uri, vector_norm, checksum, | ||
| 312 | is_indexed, metadata_json | ||
| 313 | ) VALUES (%s, NULL, %s, %s, %s, %s, %s, %s, NULL, %s, NULL, FALSE, %s::jsonb); | ||
| 314 | """, | ||
| 315 | ( | ||
| 316 | feature_set_id, | ||
| 317 | entity.asset_id, | ||
| 318 | entity.window_id, | ||
| 319 | entity.recording_id, | ||
| 320 | entity.work_id, | ||
| 321 | entity.canonical_song_id + 999999, | ||
| 322 | 'pgvector_inline_192_padded', | ||
| 323 | 1.0, | ||
| 324 | json.dumps({'probe': 'audio_embedding_lineage_negative'}), | ||
| 325 | ), | ||
| 326 | 'audio_embedding_lineage_mismatch', | ||
| 327 | 'audio_embedding recording/work/song lineage must match the parent audio_window', | ||
| 328 | ) | ||
| 329 | |||
| 330 | cases = [recording_case, audio_window_case, audio_embedding_case] | ||
| 331 | return { | ||
| 332 | 'passed': all(item['passed'] for item in cases), | ||
| 333 | 'cases': cases, | ||
| 334 | } | ||
| 257 | 335 | ||
| 258 | 336 | ||
| 259 | def fetch_raw_candidates(conn: psycopg.Connection, feature_set_id: int, query_vec: list[float], topn: int) -> list[dict[str, Any]]: | 337 | def fetch_raw_candidates(conn: psycopg.Connection, feature_set_id: int, query_vec: list[float], topn: int) -> list[dict[str, Any]]: |
| ... | @@ -390,7 +468,7 @@ def main() -> None: | ... | @@ -390,7 +468,7 @@ def main() -> None: |
| 390 | apply_schema(conn, Path(args.schema_sql)) | 468 | apply_schema(conn, Path(args.schema_sql)) |
| 391 | model_id, feature_set_id, reference_set_id, retrieval_index_id = seed_registry(conn) | 469 | model_id, feature_set_id, reference_set_id, retrieval_index_id = seed_registry(conn) |
| 392 | entities = ingest_references(conn, refs, feature_set_id, reference_set_id) | 470 | entities = ingest_references(conn, refs, feature_set_id, reference_set_id) |
| 393 | lineage_check = run_lineage_negative_test(conn, next(iter(entities.values()))) | 471 | lineage_check = run_lineage_negative_tests(conn, next(iter(entities.values())), feature_set_id) |
| 394 | report = evaluate_live(conn, feature_set_id, retrieval_index_id, queries, args.topn, args.topk) | 472 | report = evaluate_live(conn, feature_set_id, retrieval_index_id, queries, args.topn, args.topk) |
| 395 | conn.execute('UPDATE retrieval_index_registry SET row_count = %s WHERE retrieval_index_id = %s;', (len(refs), retrieval_index_id)) | 473 | conn.execute('UPDATE retrieval_index_registry SET row_count = %s WHERE retrieval_index_id = %s;', (len(refs), retrieval_index_id)) |
| 396 | counts = { | 474 | counts = { | ... | ... |
| 1 | ## 2026-06-04 | 1 | ## 2026-06-04 |
| 2 | 2 | ||
| 3 | - 更新 [PostgreSQL 落库样例与 live 测试链路](./postgres_db_schema_samples.md) 与 `acr-engine/scripts/live_pgvector_music20_eval.py`,把 lineage 负例验证从单条 `audio_window` 扩展到 `recording` / `audio_window` / `audio_embedding` 三类核心 trigger,并已重跑 live pgvector 报告确认检索指标不变;同时补充 `py_compile` 与 `diff --check` 通过的机械验证事实。 | ||
| 3 | - 新增 [PostgreSQL 落库样例与 live 测试链路](./postgres_db_schema_samples.md),补齐 `acr_pg_schema_v2.sql` 的真实落库样例、`pgvector` live 检索验证、lineage trigger 负例测试,以及当前召回/混淆结果解读。 | 4 | - 新增 [PostgreSQL 落库样例与 live 测试链路](./postgres_db_schema_samples.md),补齐 `acr_pg_schema_v2.sql` 的真实落库样例、`pgvector` live 检索验证、lineage trigger 负例测试,以及当前召回/混淆结果解读。 |
| 4 | - 新增 `acr-engine/scripts/live_pgvector_music20_eval.py`,支持对用户提供的 PostgreSQL 执行隔离 schema 建表、样例数据导入、`pgvector` live 检索、`retrieval_candidate` / `match_decision` 落表与评测报告生成。 | 5 | - 新增 `acr-engine/scripts/live_pgvector_music20_eval.py`,支持对用户提供的 PostgreSQL 执行隔离 schema 建表、样例数据导入、`pgvector` live 检索、`retrieval_candidate` / `match_decision` 落表与评测报告生成。 |
| 5 | - 新增 `acr-engine/data/pgvector_eval/music20/live_pgvector_report.json` 与 `songid_eval_report_fresh.json`,记录本轮 live PostgreSQL + pgvector 与 FAISS stand-in 的对齐结果:overall `top1=0.9091` / `top3=0.9545`,但 `type_7` 仍明显偏弱。 | 6 | - 新增 `acr-engine/data/pgvector_eval/music20/live_pgvector_report.json` 与 `songid_eval_report_fresh.json`,记录本轮 live PostgreSQL + pgvector 与 FAISS stand-in 的对齐结果:overall `top1=0.9091` / `top3=0.9545`,但 `type_7` 仍明显偏弱。 | ... | ... |
| ... | @@ -34,8 +34,11 @@ | ... | @@ -34,8 +34,11 @@ |
| 34 | - `query_type=7`: `top1=0.0`, `top3=0.5` | 34 | - `query_type=7`: `top1=0.0`, `top3=0.5` |
| 35 | 35 | ||
| 36 | 5. **lineage trigger 已被验证有效** | 36 | 5. **lineage trigger 已被验证有效** |
| 37 | - 脚本主动构造了一次错误 lineage 的 `audio_window` | 37 | - 脚本主动构造了三类错误 lineage: |
| 38 | - PostgreSQL 正确拒绝插入 | 38 | - `recording` |
| 39 | - `audio_window` | ||
| 40 | - `audio_embedding` | ||
| 41 | - PostgreSQL 都正确拒绝插入 | ||
| 39 | 42 | ||
| 40 | --- | 43 | --- |
| 41 | 44 | ||
| ... | @@ -222,8 +225,11 @@ flowchart LR | ... | @@ -222,8 +225,11 @@ flowchart LR |
| 222 | - `retrieval_index_registry` | 225 | - `retrieval_index_registry` |
| 223 | 5. 导入 20 条 reference 样例 | 226 | 5. 导入 20 条 reference 样例 |
| 224 | 6. 验证表计数是否正确 | 227 | 6. 验证表计数是否正确 |
| 225 | 7. 主动插入一条错误 lineage 的 `audio_window` | 228 | 7. 主动插入三类错误 lineage: |
| 226 | 8. 预期 PostgreSQL trigger 拒绝该写入 | 229 | - `recording.canonical_song_id` 与 `work.canonical_song_id` 不一致 |
| 230 | - `audio_window.recording_id` 与 `recording_asset.recording_id` 不一致 | ||
| 231 | - `audio_embedding` 的 `canonical_song_id` 与父 `audio_window` 不一致 | ||
| 232 | 8. 预期 PostgreSQL trigger 拒绝这些坏写入 | ||
| 227 | 233 | ||
| 228 | ### B. live 检索评测测试 | 234 | ### B. live 检索评测测试 |
| 229 | 235 | ||
| ... | @@ -291,6 +297,26 @@ flowchart LR | ... | @@ -291,6 +297,26 @@ flowchart LR |
| 291 | 297 | ||
| 292 | --- | 298 | --- |
| 293 | 299 | ||
| 300 | ## 本轮补充:完整 lineage trigger 负例覆盖 | ||
| 301 | |||
| 302 | 本轮重新执行 live 脚本后,`live_pgvector_report.json` 中的 `lineage_negative_test` 已从“单条 audio_window 验证”升级为“三类坏写入全部验证”: | ||
| 303 | |||
| 304 | | case | 结果 | PostgreSQL 返回 | | ||
| 305 | |---|---|---| | ||
| 306 | | `recording_lineage_mismatch` | 拒绝成功 | `recording.canonical_song_id ... mismatches work.canonical_song_id ...` | | ||
| 307 | | `audio_window_lineage_mismatch` | 拒绝成功 | `Invalid asset_id=... or recording_id=... for audio_window` | | ||
| 308 | | `audio_embedding_lineage_mismatch` | 拒绝成功 | `audio_embedding lineage mismatch` | | ||
| 309 | |||
| 310 | 这意味着: | ||
| 311 | |||
| 312 | > 当前 schema v2 的三条核心 lineage trigger,已经都有真实负例证据,而不只是“理论上存在”。 | ||
| 313 | |||
| 314 | 同时,本轮还补了两条机械验证证据: | ||
| 315 | - `py_compile` 通过:`live_pgvector_music20_eval.py` | ||
| 316 | - `git diff --check` 通过:本轮脚本、报告、文档变更无格式问题 | ||
| 317 | |||
| 318 | --- | ||
| 319 | |||
| 294 | ## 混淆测试补充视图 | 320 | ## 混淆测试补充视图 |
| 295 | 321 | ||
| 296 | ### 1. 当前 live 样例视图 | 322 | ### 1. 当前 live 样例视图 | ... | ... |
| ... | @@ -178,12 +178,14 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql | ... | @@ -178,12 +178,14 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql |
| 178 | - Phase-1 方案、PostgreSQL 设计、实施 checklist、registry bootstrap 均已提交 | 178 | - Phase-1 方案、PostgreSQL 设计、实施 checklist、registry bootstrap 均已提交 |
| 179 | - architect 审核结论:**APPROVED** | 179 | - architect 审核结论:**APPROVED** |
| 180 | - 代码已推送远端 | 180 | - 代码已推送远端 |
| 181 | - PostgreSQL `acr_test` live 路径已再次验证:`recording` / `audio_window` / `audio_embedding` 三类 lineage trigger 均有真实负例证据 | ||
| 182 | - 机械校验已补齐:`live_pgvector_music20_eval.py` 的 `py_compile` 通过,相关变更 `diff --check` 通过 | ||
| 181 | 183 | ||
| 182 | ### 未验证 / 仍是缺口 | 184 | ### 未验证 / 仍是缺口 |
| 183 | - **未执行 live PostgreSQL apply**(当前环境缺少 `psql`) | ||
| 184 | - **未实际跑 MERT / MuQ encoder-only 特征抽取** | 185 | - **未实际跑 MERT / MuQ encoder-only 特征抽取** |
| 185 | - **未落 reference set 的真实业务数据** | 186 | - **未落 reference set 的真实业务数据** |
| 186 | - **未定义最终线上分数融合细则** | 187 | - **未定义最终线上分数融合细则** |
| 188 | - **type_8 / type_16 还没有进入当前 live JSONL 的 PostgreSQL 实测链** | ||
| 187 | 189 | ||
| 188 | 因此下次 session 应优先从这些未验证缺口里挑一条推进,而不是重复写总方案。 | 190 | 因此下次 session 应优先从这些未验证缺口里挑一条推进,而不是重复写总方案。 |
| 189 | 191 | ... | ... |
-
Please register or sign in to post a comment