Commit 6ea7365b 6ea7365b235904d9b4fbfcd3b704d8d1cdec2259 by cnb.bofCdSsphPA

Prove asset-level embedding upserts against live PostgreSQL

Constraint: The schema already declared asset-level idempotency, but without live evidence future work could mistake it for an unverified design note.
Rejected: Rely on DDL inspection alone | It would not prove duplicate inserts are blocked and upserts reuse the same embedding row.
Confidence: high
Scope-risk: narrow
Directive: Keep asset-level writer implementations aligned with the verified ON CONFLICT (feature_set_id, asset_id) WHERE window_id IS NULL contract.
Tested: /usr/local/miniconda3/bin/python -m py_compile scripts/validate_audio_embedding_asset_upsert_live.py; git diff --check; /usr/local/miniconda3/bin/python scripts/validate_audio_embedding_asset_upsert_live.py --dsn 'postgres://d2:d2pass@127.0.0.1:5432/d2' --schema acr_asset_upsert_test --output data/pgvector_eval/music20/audio_embedding_asset_upsert_live_report.json
Not-tested: No production semantic writer uses the asset-level contract yet; this commit validates the DB contract, not an end-to-end extractor.
1 parent 015e3261
1 {
2 "schema": "acr_asset_upsert_test",
3 "dsn_redacted": "postgres://d2:***@127.0.0.1:5432/d2",
4 "seed_ids": {
5 "model_id": 1,
6 "feature_set_id": 1,
7 "canonical_song_id": 1,
8 "work_id": 1,
9 "recording_id": 1,
10 "asset_id": 1
11 },
12 "first_insert_embedding_id": 1,
13 "duplicate_insert_guard": {
14 "passed": true,
15 "error_type": "UniqueViolation",
16 "message": "duplicate key value violates unique constraint \"uq_audio_embedding_feature_asset\""
17 },
18 "upsert_embedding_id": 1,
19 "same_embedding_id_reused": true,
20 "counts": {
21 "audio_embedding": 1,
22 "audio_embedding_vector_192": 1
23 },
24 "final_state": {
25 "embedding_id": 1,
26 "asset_id": 1,
27 "window_id": null,
28 "checksum": "checksum-v2",
29 "embedding_uri": "inline://asset-probe-upsert",
30 "metadata_json": {
31 "probe": "asset_level_upsert_v2"
32 },
33 "vector_literal": "[0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2]"
34 },
35 "passed": true
36 }
...\ No newline at end of file ...\ No newline at end of file
1 #!/usr/bin/env /usr/local/miniconda3/bin/python
2 from __future__ import annotations
3
4 import argparse
5 import json
6 from pathlib import Path
7 import sys
8 from typing import Any
9
10 import psycopg
11
12 ROOT = Path(__file__).resolve().parents[1]
13 if str(ROOT) not in sys.path:
14 sys.path.insert(0, str(ROOT))
15
16 from workers._job_common import validate_schema
17
18 DEFAULT_SCHEMA_SQL = ROOT / 'sql' / 'acr_pg_schema_v2.sql'
19 DEFAULT_OUTPUT = ROOT / 'data' / 'pgvector_eval' / 'music20' / 'audio_embedding_asset_upsert_live_report.json'
20
21
22 def vec_literal(vec: list[float]) -> str:
23 return '[' + ','.join(f'{x:.10f}' for x in vec) + ']'
24
25
26 def reset_schema(conn: psycopg.Connection, schema: str) -> None:
27 schema = validate_schema(schema)
28 conn.execute(f'DROP SCHEMA IF EXISTS {schema} CASCADE;')
29 conn.execute(f'CREATE SCHEMA {schema};')
30 conn.execute(f'SET search_path TO {schema}, public;')
31
32
33 def apply_schema(conn: psycopg.Connection, schema_sql: Path) -> None:
34 conn.execute(schema_sql.read_text(encoding='utf-8'))
35
36
37 def seed_minimal_graph(conn: psycopg.Connection) -> dict[str, int]:
38 model_id = conn.execute(
39 """
40 INSERT INTO model_registry (
41 model_name, model_family, model_version, model_source, model_uri,
42 license_name, input_sample_rate, default_window_sec, default_hop_sec,
43 output_embedding_dim, pooling_supported, metadata_json
44 ) VALUES (
45 'asset_level_probe', 'probe', 'v1', 'live-test',
46 'scripts/validate_audio_embedding_asset_upsert_live.py', 'internal-eval',
47 16000, 5.0, 2.5, 192, ARRAY['none'], '{}'::jsonb
48 )
49 RETURNING model_id;
50 """
51 ).fetchone()[0]
52 feature_set_id = conn.execute(
53 """
54 INSERT INTO feature_set_registry (
55 model_id, feature_name, feature_level, extraction_granularity,
56 window_sec, hop_sec, embedding_dim, pooling_strategy, layer_selection,
57 normalize_l2, distance_metric, quantization_type, feature_schema_version,
58 config_json, status
59 ) VALUES (
60 %s, 'semantic_embedding', 'asset', 'whole_asset',
61 5.0, 2.5, 192, 'none', 'na', TRUE, 'cosine', NULL, 'v1',
62 '{"probe":"asset_level_upsert"}'::jsonb, 'active'
63 )
64 RETURNING feature_set_id;
65 """,
66 (model_id,),
67 ).fetchone()[0]
68 canonical_song_id = conn.execute(
69 """
70 INSERT INTO canonical_song (biz_song_code, title, rights_status, metadata_json)
71 VALUES ('asset-probe-song', 'Asset Probe Song', 'protected', '{}'::jsonb)
72 RETURNING canonical_song_id;
73 """
74 ).fetchone()[0]
75 work_id = conn.execute(
76 """
77 INSERT INTO work (canonical_song_id, work_code, work_title, metadata_json)
78 VALUES (%s, 'asset-probe-work', 'Asset Probe Work', '{}'::jsonb)
79 RETURNING work_id;
80 """,
81 (canonical_song_id,),
82 ).fetchone()[0]
83 recording_id = conn.execute(
84 """
85 INSERT INTO recording (
86 work_id, canonical_song_id, recording_code, recording_title,
87 version_type, is_reference, duration_sec, metadata_json
88 ) VALUES (%s, %s, 'asset-probe-rec', 'Asset Probe Recording', 'master_reference', TRUE, 5.0, '{}'::jsonb)
89 RETURNING recording_id;
90 """,
91 (work_id, canonical_song_id),
92 ).fetchone()[0]
93 asset_id = conn.execute(
94 """
95 INSERT INTO recording_asset (
96 recording_id, asset_role, storage_uri, storage_scheme, file_ext,
97 mime_type, sample_rate, channels, codec_name, duration_sec,
98 normalized_storage_uri, ingest_status, metadata_json
99 ) VALUES (
100 %s, 'reference_audio', '/tmp/asset-probe.wav', 'file', 'wav',
101 'audio/wav', 16000, 1, 'pcm_s16le', 5.0,
102 '/tmp/asset-probe.wav', 'ready', '{}'::jsonb
103 )
104 RETURNING asset_id;
105 """,
106 (recording_id,),
107 ).fetchone()[0]
108 return {
109 'model_id': int(model_id),
110 'feature_set_id': int(feature_set_id),
111 'canonical_song_id': int(canonical_song_id),
112 'work_id': int(work_id),
113 'recording_id': int(recording_id),
114 'asset_id': int(asset_id),
115 }
116
117
118 def insert_asset_embedding(conn: psycopg.Connection, ids: dict[str, int], *, checksum: str, metadata: dict[str, Any], vec: list[float]) -> int:
119 embedding_id = conn.execute(
120 """
121 INSERT INTO audio_embedding (
122 feature_set_id, extraction_job_id, asset_id, window_id, recording_id, work_id,
123 canonical_song_id, embedding_storage_mode, embedding_uri, vector_norm, checksum,
124 is_indexed, metadata_json
125 ) VALUES (
126 %s, NULL, %s, NULL, %s, %s,
127 %s, 'pgvector_inline_192', 'inline://asset-probe', 1.0, %s,
128 TRUE, %s::jsonb
129 )
130 RETURNING embedding_id;
131 """,
132 (
133 ids['feature_set_id'],
134 ids['asset_id'],
135 ids['recording_id'],
136 ids['work_id'],
137 ids['canonical_song_id'],
138 checksum,
139 json.dumps(metadata, ensure_ascii=False),
140 ),
141 ).fetchone()[0]
142 conn.execute(
143 'INSERT INTO audio_embedding_vector_192 (embedding_id, embedding) VALUES (%s, %s::vector);',
144 (embedding_id, vec_literal(vec)),
145 )
146 return int(embedding_id)
147
148
149 def expect_duplicate_insert_failure(conn: psycopg.Connection, ids: dict[str, int]) -> dict[str, Any]:
150 try:
151 with conn.transaction():
152 conn.execute(
153 """
154 INSERT INTO audio_embedding (
155 feature_set_id, extraction_job_id, asset_id, window_id, recording_id, work_id,
156 canonical_song_id, embedding_storage_mode, embedding_uri, vector_norm, checksum,
157 is_indexed, metadata_json
158 ) VALUES (
159 %s, NULL, %s, NULL, %s, %s,
160 %s, 'pgvector_inline_192', 'inline://asset-probe-duplicate', 1.0, 'dup-checksum',
161 TRUE, '{"probe":"duplicate_insert"}'::jsonb
162 );
163 """,
164 (
165 ids['feature_set_id'],
166 ids['asset_id'],
167 ids['recording_id'],
168 ids['work_id'],
169 ids['canonical_song_id'],
170 ),
171 )
172 return {'passed': False, 'note': 'duplicate asset-level insert unexpectedly succeeded'}
173 except Exception as exc: # noqa: BLE001
174 return {
175 'passed': 'uq_audio_embedding_feature_asset' in str(exc),
176 'error_type': type(exc).__name__,
177 'message': str(exc).splitlines()[0],
178 }
179
180
181 def upsert_asset_embedding(conn: psycopg.Connection, ids: dict[str, int], *, checksum: str, metadata: dict[str, Any], vec: list[float]) -> int:
182 embedding_id = conn.execute(
183 """
184 INSERT INTO audio_embedding (
185 feature_set_id, extraction_job_id, asset_id, window_id, recording_id, work_id,
186 canonical_song_id, embedding_storage_mode, embedding_uri, vector_norm, checksum,
187 is_indexed, metadata_json
188 ) VALUES (
189 %s, NULL, %s, NULL, %s, %s,
190 %s, 'pgvector_inline_192', 'inline://asset-probe-upsert', 1.0, %s,
191 TRUE, %s::jsonb
192 )
193 ON CONFLICT (feature_set_id, asset_id)
194 WHERE window_id IS NULL AND asset_id IS NOT NULL
195 DO UPDATE SET
196 checksum = EXCLUDED.checksum,
197 embedding_uri = EXCLUDED.embedding_uri,
198 metadata_json = EXCLUDED.metadata_json,
199 is_indexed = EXCLUDED.is_indexed,
200 vector_norm = EXCLUDED.vector_norm
201 RETURNING embedding_id;
202 """,
203 (
204 ids['feature_set_id'],
205 ids['asset_id'],
206 ids['recording_id'],
207 ids['work_id'],
208 ids['canonical_song_id'],
209 checksum,
210 json.dumps(metadata, ensure_ascii=False),
211 ),
212 ).fetchone()[0]
213 conn.execute(
214 """
215 INSERT INTO audio_embedding_vector_192 (embedding_id, embedding)
216 VALUES (%s, %s::vector)
217 ON CONFLICT (embedding_id)
218 DO UPDATE SET embedding = EXCLUDED.embedding;
219 """,
220 (embedding_id, vec_literal(vec)),
221 )
222 return int(embedding_id)
223
224
225 def fetch_final_state(conn: psycopg.Connection, embedding_id: int) -> dict[str, Any]:
226 row = conn.execute(
227 """
228 SELECT ae.embedding_id, ae.asset_id, ae.window_id, ae.checksum, ae.embedding_uri, ae.metadata_json,
229 aev.embedding::text
230 FROM audio_embedding ae
231 JOIN audio_embedding_vector_192 aev ON aev.embedding_id = ae.embedding_id
232 WHERE ae.embedding_id = %s;
233 """,
234 (embedding_id,),
235 ).fetchone()
236 return {
237 'embedding_id': int(row[0]),
238 'asset_id': int(row[1]),
239 'window_id': row[2],
240 'checksum': row[3],
241 'embedding_uri': row[4],
242 'metadata_json': row[5] or {},
243 'vector_literal': row[6],
244 }
245
246
247 def main() -> None:
248 ap = argparse.ArgumentParser()
249 ap.add_argument('--dsn', required=True)
250 ap.add_argument('--schema', default='acr_asset_upsert_test')
251 ap.add_argument('--schema-sql', default=str(DEFAULT_SCHEMA_SQL))
252 ap.add_argument('--output', default=str(DEFAULT_OUTPUT))
253 args = ap.parse_args()
254
255 initial_vec = [0.1] * 192
256 updated_vec = [0.2] * 192
257
258 payload: dict[str, Any] = {
259 'schema': args.schema,
260 'dsn_redacted': 'postgres://d2:***@127.0.0.1:5432/d2',
261 }
262 with psycopg.connect(args.dsn, autocommit=True) as conn:
263 reset_schema(conn, args.schema)
264 apply_schema(conn, Path(args.schema_sql))
265 ids = seed_minimal_graph(conn)
266 payload['seed_ids'] = ids
267
268 first_embedding_id = insert_asset_embedding(
269 conn,
270 ids,
271 checksum='checksum-v1',
272 metadata={'probe': 'asset_level_insert_v1'},
273 vec=initial_vec,
274 )
275 payload['first_insert_embedding_id'] = first_embedding_id
276 payload['duplicate_insert_guard'] = expect_duplicate_insert_failure(conn, ids)
277
278 upsert_embedding_id = upsert_asset_embedding(
279 conn,
280 ids,
281 checksum='checksum-v2',
282 metadata={'probe': 'asset_level_upsert_v2'},
283 vec=updated_vec,
284 )
285 payload['upsert_embedding_id'] = upsert_embedding_id
286 payload['same_embedding_id_reused'] = first_embedding_id == upsert_embedding_id
287 payload['counts'] = {
288 'audio_embedding': int(conn.execute('SELECT count(*) FROM audio_embedding;').fetchone()[0]),
289 'audio_embedding_vector_192': int(conn.execute('SELECT count(*) FROM audio_embedding_vector_192;').fetchone()[0]),
290 }
291 payload['final_state'] = fetch_final_state(conn, upsert_embedding_id)
292 payload['passed'] = (
293 payload['duplicate_insert_guard'].get('passed')
294 and payload['same_embedding_id_reused']
295 and payload['counts']['audio_embedding'] == 1
296 and payload['counts']['audio_embedding_vector_192'] == 1
297 and payload['final_state']['checksum'] == 'checksum-v2'
298 and payload['final_state']['metadata_json'].get('probe') == 'asset_level_upsert_v2'
299 )
300
301 out = Path(args.output)
302 out.parent.mkdir(parents=True, exist_ok=True)
303 out.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding='utf-8')
304 print(json.dumps(payload, ensure_ascii=False, indent=2))
305
306
307 if __name__ == '__main__':
308 main()
1 ## 2026-06-04 1 ## 2026-06-04
2 2
3 - 新增 `scripts/validate_audio_embedding_asset_upsert_live.py``audio_embedding_asset_upsert_live_report.json`,在隔离 schema `acr_asset_upsert_test` 上真实验证 `uq_audio_embedding_feature_asset`:重复普通 insert 会触发 `UniqueViolation`,而 `ON CONFLICT ... DO UPDATE` 会复用同一 `embedding_id`,最终 `audio_embedding/audio_embedding_vector_192` 行数都保持为 `1`
3 - 新增 `scripts/run_phase1_embedding_preflight_matrix_live.py``phase1_embedding_preflight_matrix_report.json`,对 `mert / muq / ecapa` 四条 semantic jobs 做了统一 live preflight 矩阵验证;结果表明 4 条 job 全都稳定落到 `preflight_failed`,且 blocker 已收敛为 `/workspace/downloads` 未挂载与语义模型 runtime 缺失,而不是单条 job 的偶发异常。 4 - 新增 `scripts/run_phase1_embedding_preflight_matrix_live.py``phase1_embedding_preflight_matrix_report.json`,对 `mert / muq / ecapa` 四条 semantic jobs 做了统一 live preflight 矩阵验证;结果表明 4 条 job 全都稳定落到 `preflight_failed`,且 blocker 已收敛为 `/workspace/downloads` 未挂载与语义模型 runtime 缺失,而不是单条 job 的偶发异常。
4 - 更新 `run_embedding_job.py`,把 semantic lane 从“只有 dry-run”推进到“真实 scope 读取 + vector table 校验 + runtime 依赖校验 + 缺音频校验 + PostgreSQL failed 落账”的 preflight write contract;当前 live `mert` job 会把 `unreadable_audio_assets``model_runtime_unavailable` 同时写入 `feature_extraction_job.metadata_json`,不再只停留在纸面设计。 5 - 更新 `run_embedding_job.py`,把 semantic lane 从“只有 dry-run”推进到“真实 scope 读取 + vector table 校验 + runtime 依赖校验 + 缺音频校验 + PostgreSQL failed 落账”的 preflight write contract;当前 live `mert` job 会把 `unreadable_audio_assets``model_runtime_unavailable` 同时写入 `feature_extraction_job.metadata_json`,不再只停留在纸面设计。
5 -`audio_embedding` 补上 `UNIQUE(feature_set_id, window_id) WHERE window_id IS NOT NULL``UNIQUE(feature_set_id, asset_id) WHERE window_id IS NULL AND asset_id IS NOT NULL` 两条幂等唯一键,为后续真实 `MERT / MuQ / ECAPA` upsert 落库固定主键策略。 6 -`audio_embedding` 补上 `UNIQUE(feature_set_id, window_id) WHERE window_id IS NOT NULL``UNIQUE(feature_set_id, asset_id) WHERE window_id IS NULL AND asset_id IS NOT NULL` 两条幂等唯一键,为后续真实 `MERT / MuQ / ECAPA` upsert 落库固定主键策略。
......
...@@ -343,6 +343,17 @@ MERT 5s/2.5s job (`extraction_job_id=2`) 在 `acr_test` 上已经真实验证: ...@@ -343,6 +343,17 @@ MERT 5s/2.5s job (`extraction_job_id=2`) 在 `acr_test` 上已经真实验证:
343 343
344 而不需要先查再写。 344 而不需要先查再写。
345 345
346 当前这两条唯一键里,asset-level 路径也已经有 live 证据:
347
348 - `scripts/validate_audio_embedding_asset_upsert_live.py`
349 - `audio_embedding_asset_upsert_live_report.json`
350
351 已验证:
352
353 - 重复 `INSERT` 会被 `uq_audio_embedding_feature_asset` 拒绝
354 - `ON CONFLICT ... DO UPDATE` 会复用同一个 `embedding_id`
355 - `audio_embedding` / `audio_embedding_vector_192` 行数都保持为 `1`
356
346 ### 下一步替换点 357 ### 下一步替换点
347 358
348 当 runtime 与音频挂载到位后,只需要把 guarded failure path 替换成真实 inference: 359 当 runtime 与音频挂载到位后,只需要把 guarded failure path 替换成真实 inference:
......
...@@ -774,3 +774,40 @@ cd /workspace/acr-engine ...@@ -774,3 +774,40 @@ cd /workspace/acr-engine
774 - 当前真正阻塞 Phase-1 encoder-only 落地的是: 774 - 当前真正阻塞 Phase-1 encoder-only 落地的是:
775 1. `/workspace/downloads` 音频挂载 775 1. `/workspace/downloads` 音频挂载
776 2. 模型 runtime 依赖安装 776 2. 模型 runtime 依赖安装
777
778
779 ## 新增:asset-level embedding upsert live 验证
780
781 为了把 `uq_audio_embedding_feature_asset` 从“DDL 声明”推进到“真实证据”,本轮新增:
782
783 - `acr-engine/scripts/validate_audio_embedding_asset_upsert_live.py`
784 - `acr-engine/data/pgvector_eval/music20/audio_embedding_asset_upsert_live_report.json`
785
786 ### 验证动作
787
788 脚本会在隔离 schema `acr_asset_upsert_test` 中:
789
790 1. 落最小主数据图:`song -> work -> recording -> asset`
791 2. 插入第一条 `window_id IS NULL` 的 asset-level embedding
792 3. 再做一次普通重复 `INSERT`
793 4. 预期被 `uq_audio_embedding_feature_asset` 拒绝
794 5. 再做一次 `ON CONFLICT ... DO UPDATE`
795 6. 验证最终仍只有 `1``audio_embedding``1``audio_embedding_vector_192`
796
797 ### 当前结果
798
799 | 项 | 结果 |
800 |---|---|
801 | 首次 `embedding_id` | `1` |
802 | 重复普通 `INSERT` | `UniqueViolation` |
803 | 唯一键名 | `uq_audio_embedding_feature_asset` |
804 | upsert 后 `embedding_id` | `1` |
805 | `same_embedding_id_reused` | `true` |
806 | `audio_embedding` 行数 | `1` |
807 | `audio_embedding_vector_192` 行数 | `1` |
808 | 最终 `checksum` | `checksum-v2` |
809
810 结论:
811
812 - asset-level 唯一键不是“纸面存在”,而是已经在 live PostgreSQL 上真实生效
813 - 后续如果补 asset-level semantic writer,可以直接沿用同一个 `ON CONFLICT (feature_set_id, asset_id) ...` 合同
......
...@@ -192,6 +192,7 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql ...@@ -192,6 +192,7 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql
192 - semantic lane 也已完成 live failure contract:`run_embedding_job.py` 现在会同时暴露 `unreadable_audio_assets``model_runtime_unavailable`,而不是把失败伪装成 completed 192 - semantic lane 也已完成 live failure contract:`run_embedding_job.py` 现在会同时暴露 `unreadable_audio_assets``model_runtime_unavailable`,而不是把失败伪装成 completed
193 - `audio_embedding` 已补上 window / asset 双路唯一键,后续真实 encoder 只需替换 inference adapter 即可复用同一 upsert 合同 193 - `audio_embedding` 已补上 window / asset 双路唯一键,后续真实 encoder 只需替换 inference adapter 即可复用同一 upsert 合同
194 - `scripts/run_phase1_embedding_preflight_matrix_live.py` 已跑通,4 条 semantic jobs(mert/muq/ecapa)在 `acr_test` 上都被稳定标记为 `preflight_failed`;当前共性 blocker 已收敛为 `/workspace/downloads` 缺失 + 语义模型 runtime 缺失 194 - `scripts/run_phase1_embedding_preflight_matrix_live.py` 已跑通,4 条 semantic jobs(mert/muq/ecapa)在 `acr_test` 上都被稳定标记为 `preflight_failed`;当前共性 blocker 已收敛为 `/workspace/downloads` 缺失 + 语义模型 runtime 缺失
195 - `scripts/validate_audio_embedding_asset_upsert_live.py` 已在隔离 schema `acr_asset_upsert_test` 上验证 `uq_audio_embedding_feature_asset`:重复 insert 会被唯一键拒绝,upsert 会复用同一 `embedding_id`,说明 asset-level 幂等键也已有真实证据
195 - `phase1_hot_reference_v1``acr_test` 里已经真实补齐 `20` 个 reference members,因此 worker dry-run 当前看到的 scope 已是 `20 recordings / 20 assets / 20 windows` 196 - `phase1_hot_reference_v1``acr_test` 里已经真实补齐 `20` 个 reference members,因此 worker dry-run 当前看到的 scope 已是 `20 recordings / 20 assets / 20 windows`
196 - worker contract 现在已有基础前置状态保护;重复执行同一 chromaprint dry-run job 会被 `expected_status=pending` 明确拒绝,证据见 `phase1_worker_double_claim_guard_report.json` 197 - worker contract 现在已有基础前置状态保护;重复执行同一 chromaprint dry-run job 会被 `expected_status=pending` 明确拒绝,证据见 `phase1_worker_double_claim_guard_report.json`
197 - exact lane 的 `run_chromaprint_job.py` 已具备非 dry-run 写入路径;当前在 `acr_test` 的 live 结果是因为 `/workspace/downloads/...` 缺失而明确 `failed`,不是继续假装 `completed` 198 - exact lane 的 `run_chromaprint_job.py` 已具备非 dry-run 写入路径;当前在 `acr_test` 的 live 结果是因为 `/workspace/downloads/...` 缺失而明确 `failed`,不是继续假装 `completed`
......