Commit e54e2ff2 e54e2ff299a0b4d476f1a0d5e899cc4be6efb1b2 by cnb.bofCdSsphPA

Harden lineage validation evidence for the PostgreSQL ACR path

Constraint: Each follow-up Ralph edit must update docs and preserve a push-ready, auditable validation trail
Rejected: Stop at a single audio_window negative test | It left recording/audio_embedding trigger coverage and report readability weaker than needed
Confidence: high
Scope-risk: narrow
Directive: Keep live retrieval reports self-explanatory enough for reviewers who only inspect JSON artifacts
Tested: /usr/local/miniconda3/bin/python scripts/live_pgvector_music20_eval.py --dsn 'postgres://d2:d2pass@127.0.0.1:5432/d2' --schema acr_test --reset-schema --output data/pgvector_eval/music20/live_pgvector_report.json; /usr/local/miniconda3/bin/python -m py_compile scripts/live_pgvector_music20_eval.py; git diff --check -- acr-engine/scripts/live_pgvector_music20_eval.py acr-engine/data/pgvector_eval/music20/live_pgvector_report.json docs/postgres_db_schema_samples.md docs/CHANGELOG.md docs/session-handoff.md
Not-tested: type_8/type_16 live JSONL coverage, MERT/MuQ live embeddings, multi-recording/cover-lane decision flow
1 parent 96c9ce7d
......@@ -25,8 +25,29 @@
},
"lineage_negative_test": {
"passed": true,
"error_type": "RaiseException",
"message": "Invalid asset_id=1 or recording_id=1000000 for audio_window"
"cases": [
{
"passed": true,
"case": "recording_lineage_mismatch",
"expected_guard": "recording.canonical_song_id must equal work.canonical_song_id",
"error_type": "RaiseException",
"message": "recording.canonical_song_id 1000000 mismatches work.canonical_song_id 1"
},
{
"passed": true,
"case": "audio_window_lineage_mismatch",
"expected_guard": "audio_window recording/work/song lineage must match recording_asset + recording parents",
"error_type": "RaiseException",
"message": "Invalid asset_id=1 or recording_id=1000000 for audio_window"
},
{
"passed": true,
"case": "audio_embedding_lineage_mismatch",
"expected_guard": "audio_embedding recording/work/song lineage must match the parent audio_window",
"error_type": "RaiseException",
"message": "audio_embedding lineage mismatch"
}
]
},
"evaluation": {
"backend": "postgresql+pgvector-live",
......
......@@ -239,21 +239,99 @@ def ingest_references(conn: psycopg.Connection, refs: list[dict[str, Any]], feat
return entities
def run_lineage_negative_test(conn: psycopg.Connection, entity: EntityIds) -> dict[str, Any]:
def _expect_insert_failure(
conn: psycopg.Connection,
sql: str,
params: tuple[Any, ...],
case_name: str,
expected_guard: str,
) -> dict[str, Any]:
try:
with conn.transaction():
conn.execute(
"""
INSERT INTO audio_window (
asset_id, recording_id, work_id, canonical_song_id, window_index,
start_sec, end_sec, duration_sec, segment_role, segment_type, quality_score, active_for_index
) VALUES (%s, %s, %s, %s, 999, 0.0, 8.0, 8.0, 'reference', 'bad_lineage', 0.0, TRUE);
""",
(entity.asset_id, entity.recording_id + 999999, entity.work_id, entity.canonical_song_id),
)
return {'passed': False, 'note': 'bad lineage insert unexpectedly succeeded'}
conn.execute(sql, params)
return {
'passed': False,
'case': case_name,
'expected_guard': expected_guard,
'note': 'bad lineage insert unexpectedly succeeded',
}
except Exception as exc:
return {'passed': True, 'error_type': type(exc).__name__, 'message': str(exc).splitlines()[0]}
return {
'passed': True,
'case': case_name,
'expected_guard': expected_guard,
'error_type': type(exc).__name__,
'message': str(exc).splitlines()[0],
}
def run_lineage_negative_tests(conn: psycopg.Connection, entity: EntityIds, feature_set_id: int) -> dict[str, Any]:
recording_case = _expect_insert_failure(
conn,
"""
INSERT INTO recording (
work_id, canonical_song_id, recording_code, recording_title, artist_name,
album_name, version_type, is_reference, reference_priority, duration_sec, metadata_json
) VALUES (%s, %s, %s, %s, %s, %s, %s, FALSE, %s, %s, %s::jsonb);
""",
(
entity.work_id,
entity.canonical_song_id + 999999,
f'bad-rec-{entity.recording_id}',
'Bad Recording Lineage',
'Bad Artist',
'bad-album',
'bad_lineage_probe',
9999,
8.0,
json.dumps({'probe': 'recording_lineage_negative'}),
),
'recording_lineage_mismatch',
'recording.canonical_song_id must equal work.canonical_song_id',
)
audio_window_case = _expect_insert_failure(
conn,
"""
INSERT INTO audio_window (
asset_id, recording_id, work_id, canonical_song_id, window_index,
start_sec, end_sec, duration_sec, segment_role, segment_type, quality_score, active_for_index
) VALUES (%s, %s, %s, %s, 999, 0.0, 8.0, 8.0, 'reference', 'bad_lineage', 0.0, TRUE);
""",
(entity.asset_id, entity.recording_id + 999999, entity.work_id, entity.canonical_song_id),
'audio_window_lineage_mismatch',
'audio_window recording/work/song lineage must match recording_asset + recording parents',
)
audio_embedding_case = _expect_insert_failure(
conn,
"""
INSERT INTO audio_embedding (
feature_set_id, extraction_job_id, asset_id, window_id, recording_id, work_id,
canonical_song_id, embedding_storage_mode, embedding_uri, vector_norm, checksum,
is_indexed, metadata_json
) VALUES (%s, NULL, %s, %s, %s, %s, %s, %s, NULL, %s, NULL, FALSE, %s::jsonb);
""",
(
feature_set_id,
entity.asset_id,
entity.window_id,
entity.recording_id,
entity.work_id,
entity.canonical_song_id + 999999,
'pgvector_inline_192_padded',
1.0,
json.dumps({'probe': 'audio_embedding_lineage_negative'}),
),
'audio_embedding_lineage_mismatch',
'audio_embedding recording/work/song lineage must match the parent audio_window',
)
cases = [recording_case, audio_window_case, audio_embedding_case]
return {
'passed': all(item['passed'] for item in cases),
'cases': cases,
}
def fetch_raw_candidates(conn: psycopg.Connection, feature_set_id: int, query_vec: list[float], topn: int) -> list[dict[str, Any]]:
......@@ -390,7 +468,7 @@ def main() -> None:
apply_schema(conn, Path(args.schema_sql))
model_id, feature_set_id, reference_set_id, retrieval_index_id = seed_registry(conn)
entities = ingest_references(conn, refs, feature_set_id, reference_set_id)
lineage_check = run_lineage_negative_test(conn, next(iter(entities.values())))
lineage_check = run_lineage_negative_tests(conn, next(iter(entities.values())), feature_set_id)
report = evaluate_live(conn, feature_set_id, retrieval_index_id, queries, args.topn, args.topk)
conn.execute('UPDATE retrieval_index_registry SET row_count = %s WHERE retrieval_index_id = %s;', (len(refs), retrieval_index_id))
counts = {
......
## 2026-06-04
- 更新 [PostgreSQL 落库样例与 live 测试链路](./postgres_db_schema_samples.md)`acr-engine/scripts/live_pgvector_music20_eval.py`,把 lineage 负例验证从单条 `audio_window` 扩展到 `recording` / `audio_window` / `audio_embedding` 三类核心 trigger,并已重跑 live pgvector 报告确认检索指标不变;同时补充 `py_compile``diff --check` 通过的机械验证事实。
- 新增 [PostgreSQL 落库样例与 live 测试链路](./postgres_db_schema_samples.md),补齐 `acr_pg_schema_v2.sql` 的真实落库样例、`pgvector` live 检索验证、lineage trigger 负例测试,以及当前召回/混淆结果解读。
- 新增 `acr-engine/scripts/live_pgvector_music20_eval.py`,支持对用户提供的 PostgreSQL 执行隔离 schema 建表、样例数据导入、`pgvector` live 检索、`retrieval_candidate` / `match_decision` 落表与评测报告生成。
- 新增 `acr-engine/data/pgvector_eval/music20/live_pgvector_report.json``songid_eval_report_fresh.json`,记录本轮 live PostgreSQL + pgvector 与 FAISS stand-in 的对齐结果:overall `top1=0.9091` / `top3=0.9545`,但 `type_7` 仍明显偏弱。
......
......@@ -34,8 +34,11 @@
- `query_type=7`: `top1=0.0`, `top3=0.5`
5. **lineage trigger 已被验证有效**
- 脚本主动构造了一次错误 lineage 的 `audio_window`
- PostgreSQL 正确拒绝插入
- 脚本主动构造了三类错误 lineage:
- `recording`
- `audio_window`
- `audio_embedding`
- PostgreSQL 都正确拒绝插入
---
......@@ -222,8 +225,11 @@ flowchart LR
- `retrieval_index_registry`
5. 导入 20 条 reference 样例
6. 验证表计数是否正确
7. 主动插入一条错误 lineage 的 `audio_window`
8. 预期 PostgreSQL trigger 拒绝该写入
7. 主动插入三类错误 lineage:
- `recording.canonical_song_id``work.canonical_song_id` 不一致
- `audio_window.recording_id``recording_asset.recording_id` 不一致
- `audio_embedding``canonical_song_id` 与父 `audio_window` 不一致
8. 预期 PostgreSQL trigger 拒绝这些坏写入
### B. live 检索评测测试
......@@ -291,6 +297,26 @@ flowchart LR
---
## 本轮补充:完整 lineage trigger 负例覆盖
本轮重新执行 live 脚本后,`live_pgvector_report.json` 中的 `lineage_negative_test` 已从“单条 audio_window 验证”升级为“三类坏写入全部验证”:
| case | 结果 | PostgreSQL 返回 |
|---|---|---|
| `recording_lineage_mismatch` | 拒绝成功 | `recording.canonical_song_id ... mismatches work.canonical_song_id ...` |
| `audio_window_lineage_mismatch` | 拒绝成功 | `Invalid asset_id=... or recording_id=... for audio_window` |
| `audio_embedding_lineage_mismatch` | 拒绝成功 | `audio_embedding lineage mismatch` |
这意味着:
> 当前 schema v2 的三条核心 lineage trigger,已经都有真实负例证据,而不只是“理论上存在”。
同时,本轮还补了两条机械验证证据:
- `py_compile` 通过:`live_pgvector_music20_eval.py`
- `git diff --check` 通过:本轮脚本、报告、文档变更无格式问题
---
## 混淆测试补充视图
### 1. 当前 live 样例视图
......
......@@ -178,12 +178,14 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql
- Phase-1 方案、PostgreSQL 设计、实施 checklist、registry bootstrap 均已提交
- architect 审核结论:**APPROVED**
- 代码已推送远端
- PostgreSQL `acr_test` live 路径已再次验证:`recording` / `audio_window` / `audio_embedding` 三类 lineage trigger 均有真实负例证据
- 机械校验已补齐:`live_pgvector_music20_eval.py``py_compile` 通过,相关变更 `diff --check` 通过
### 未验证 / 仍是缺口
- **未执行 live PostgreSQL apply**(当前环境缺少 `psql`
- **未实际跑 MERT / MuQ encoder-only 特征抽取**
- **未落 reference set 的真实业务数据**
- **未定义最终线上分数融合细则**
- **type_8 / type_16 还没有进入当前 live JSONL 的 PostgreSQL 实测链**
因此下次 session 应优先从这些未验证缺口里挑一条推进,而不是重复写总方案。
......