Commit 223f80ac 223f80ac85c128da194bface19cb17d93c72247a by cnb.bofCdSsphPA

Collapse Phase-1 worker validation into one live smoke entrypoint

Constraint: Phase-1 now has multiple lane-specific validation scripts, so without a single smoke entrypoint the next session must manually reconstruct the current blocker picture.
Rejected: Keep exact and semantic checks separate only | It would slow restart diagnosis and hide the shared environment blockers.
Confidence: high
Scope-risk: narrow
Directive: Use the smoke entrypoint first on future sessions to distinguish contract regressions from missing mounts/runtime prerequisites.
Tested: /usr/local/miniconda3/bin/python -m py_compile scripts/run_phase1_worker_contract_smoke_live.py; git diff --check; /usr/local/miniconda3/bin/python scripts/run_phase1_worker_contract_smoke_live.py --dsn 'postgres://d2:d2pass@127.0.0.1:5432/d2' --schema acr_test --output data/pgvector_eval/music20/phase1_worker_contract_smoke_report.json
Not-tested: This smoke still reflects an environment-blocked host and does not prove successful extraction.
1 parent 6ea7365b
1 {
2 "worker": "run_chromaprint_job",
3 "schema": "acr_test",
4 "job": {
5 "extraction_job_id": 1,
6 "feature_set_id": 2,
7 "target_scope": "reference_set:phase1_hot_reference_v1",
8 "job_status": "pending",
9 "shard_key": "phase1/reference/chromaprint/v1",
10 "job_metadata": {
11 "lane": "exact",
12 "phase": "phase1",
13 "priority": "p0"
14 },
15 "feature_name": "fingerprint_asset",
16 "feature_level": "asset",
17 "extraction_granularity": "full_asset",
18 "window_sec": 5.0,
19 "hop_sec": 2.5,
20 "embedding_dim": null,
21 "distance_metric": "hamming",
22 "feature_config": {
23 "lane": "exact",
24 "index_target": "audio_fingerprint"
25 },
26 "model_id": 2,
27 "model_name": "chromaprint",
28 "model_version": "v1",
29 "model_family": "fingerprint",
30 "input_sample_rate": 16000,
31 "output_embedding_dim": null,
32 "model_metadata": {
33 "lane": "exact",
34 "note": "exact fingerprint lane baseline",
35 "phase": "phase1"
36 }
37 },
38 "target_scope_summary": {
39 "scope_type": "reference_set",
40 "scope_value": "phase1_hot_reference_v1",
41 "reference_set_id": 2,
42 "reference_set_name": "phase1_hot_reference_v1",
43 "recording_count": 20,
44 "ready_asset_count": 20,
45 "active_window_count": 20
46 },
47 "scope_asset_count": 20,
48 "processed_assets": [],
49 "missing_assets": [
50 {
51 "asset_id": 1,
52 "storage_uri": "/workspace/downloads/100/type_11/93dfdeb0-7da5-42a8-9c71-cf12af57dd191650256918.wav",
53 "reason": "missing_audio"
54 },
55 {
56 "asset_id": 2,
57 "storage_uri": "/workspace/downloads/101/type_11/83c0c07f-4f96-4ff4-998c-58db910f3cfa1650256915.wav",
58 "reason": "missing_audio"
59 },
60 {
61 "asset_id": 3,
62 "storage_uri": "/workspace/downloads/102/type_11/43440ec5-70b4-4d50-8683-d3e41cad29411650256908.wav",
63 "reason": "missing_audio"
64 },
65 {
66 "asset_id": 4,
67 "storage_uri": "/workspace/downloads/103/type_11/19876dbb-fffc-40f8-9530-9322c9ed77681650256912.wav",
68 "reason": "missing_audio"
69 },
70 {
71 "asset_id": 5,
72 "storage_uri": "/workspace/downloads/104/type_11/4c1d3e22-045f-445b-ab87-ba1ae3ee09b31650256912.wav",
73 "reason": "missing_audio"
74 },
75 {
76 "asset_id": 6,
77 "storage_uri": "/workspace/downloads/105/type_11/57e61cde-4410-4751-93e9-d7a4ecece5791650256910.wav",
78 "reason": "missing_audio"
79 },
80 {
81 "asset_id": 7,
82 "storage_uri": "/workspace/downloads/106/type_11/bf61426c-67b7-4cf1-a9e7-f78cf519a0021650256910.wav",
83 "reason": "missing_audio"
84 },
85 {
86 "asset_id": 8,
87 "storage_uri": "/workspace/downloads/107/type_11/296bbc25-617c-4368-9a69-357aeec394381650256910.wav",
88 "reason": "missing_audio"
89 },
90 {
91 "asset_id": 9,
92 "storage_uri": "/workspace/downloads/108/type_11/d7e28fe6-4ad6-4243-b66b-d90ff5ca1e491650256909.wav",
93 "reason": "missing_audio"
94 },
95 {
96 "asset_id": 10,
97 "storage_uri": "/workspace/downloads/109/type_11/84acef9b-2a74-44bc-9eff-5ca7969ac9b61650256909.wav",
98 "reason": "missing_audio"
99 },
100 {
101 "asset_id": 11,
102 "storage_uri": "/workspace/downloads/110/type_11/2197b39e-23e2-4a66-b07e-dd672eab214a1650256908.wav",
103 "reason": "missing_audio"
104 },
105 {
106 "asset_id": 12,
107 "storage_uri": "/workspace/downloads/111/type_11/7f5256e8-de5f-41c5-bf76-419e05df72d81650256908.wav",
108 "reason": "missing_audio"
109 },
110 {
111 "asset_id": 13,
112 "storage_uri": "/workspace/downloads/112/type_11/34acd523-3c01-443d-ac3d-4ad7b9e2246f1650256907.wav",
113 "reason": "missing_audio"
114 },
115 {
116 "asset_id": 14,
117 "storage_uri": "/workspace/downloads/113/type_11/6d9438af-5d83-434b-bb20-76e28d0bbc4e1650256907.wav",
118 "reason": "missing_audio"
119 },
120 {
121 "asset_id": 15,
122 "storage_uri": "/workspace/downloads/114/type_11/0238ecbf-b234-470e-82e4-f3b80a267d771650256906.wav",
123 "reason": "missing_audio"
124 },
125 {
126 "asset_id": 16,
127 "storage_uri": "/workspace/downloads/115/type_11/aabad0ff-13de-4786-aa9c-40e1f957ed9f1650256906.wav",
128 "reason": "missing_audio"
129 },
130 {
131 "asset_id": 17,
132 "storage_uri": "/workspace/downloads/116/type_11/da34f6ff-39e7-4dde-8265-e1bb01b6263e1650256901.wav",
133 "reason": "missing_audio"
134 },
135 {
136 "asset_id": 18,
137 "storage_uri": "/workspace/downloads/117/type_11/1e1599e6-ebbd-4ceb-a81d-a320331ef6e31650256901.wav",
138 "reason": "missing_audio"
139 },
140 {
141 "asset_id": 19,
142 "storage_uri": "/workspace/downloads/118/type_11/db64461e-d752-4cf3-ab1d-56ff9232823d1650256901.wav",
143 "reason": "missing_audio"
144 },
145 {
146 "asset_id": 20,
147 "storage_uri": "/workspace/downloads/119/type_11/180dfa7d-836a-449c-990f-a3bf39c11da11650256898.wav",
148 "reason": "missing_audio"
149 }
150 ],
151 "status_after_start": {
152 "extraction_job_id": 1,
153 "job_status": "running",
154 "input_count": 20,
155 "output_count": null,
156 "started_at": "2026-06-04T13:57:50.652147+08:00",
157 "finished_at": null,
158 "log_uri": null,
159 "metadata_json": {
160 "lane": "exact",
161 "phase": "phase1",
162 "worker": "run_chromaprint_job",
163 "dry_run": false,
164 "priority": "p0",
165 "output_target": "audio_fingerprint",
166 "execution_mode": "write_attempt",
167 "target_scope_summary": {
168 "scope_type": "reference_set",
169 "scope_value": "phase1_hot_reference_v1",
170 "recording_count": 20,
171 "reference_set_id": 2,
172 "ready_asset_count": 20,
173 "reference_set_name": "phase1_hot_reference_v1",
174 "active_window_count": 20
175 }
176 }
177 },
178 "status_after_complete": null,
179 "status_after_failed": {
180 "extraction_job_id": 1,
181 "job_status": "failed",
182 "input_count": 20,
183 "output_count": 0,
184 "started_at": "2026-06-04T13:57:50.652147+08:00",
185 "finished_at": "2026-06-04T13:57:50.653101+08:00",
186 "log_uri": null,
187 "metadata_json": {
188 "lane": "exact",
189 "phase": "phase1",
190 "worker": "run_chromaprint_job",
191 "dry_run": false,
192 "priority": "p0",
193 "artifact_dir": "/workspace/acr-engine/data/pgvector_eval/music20/phase1_fingerprints",
194 "output_target": "audio_fingerprint",
195 "execution_mode": "write_attempt",
196 "failure_reason": "unreadable_audio_assets",
197 "write_target_table": "audio_fingerprint",
198 "missing_asset_count": 20,
199 "target_scope_summary": {
200 "scope_type": "reference_set",
201 "scope_value": "phase1_hot_reference_v1",
202 "recording_count": 20,
203 "reference_set_id": 2,
204 "ready_asset_count": 20,
205 "reference_set_name": "phase1_hot_reference_v1",
206 "active_window_count": 20
207 },
208 "missing_asset_samples": [
209 {
210 "reason": "missing_audio",
211 "asset_id": 1,
212 "storage_uri": "/workspace/downloads/100/type_11/93dfdeb0-7da5-42a8-9c71-cf12af57dd191650256918.wav"
213 },
214 {
215 "reason": "missing_audio",
216 "asset_id": 2,
217 "storage_uri": "/workspace/downloads/101/type_11/83c0c07f-4f96-4ff4-998c-58db910f3cfa1650256915.wav"
218 },
219 {
220 "reason": "missing_audio",
221 "asset_id": 3,
222 "storage_uri": "/workspace/downloads/102/type_11/43440ec5-70b4-4d50-8683-d3e41cad29411650256908.wav"
223 },
224 {
225 "reason": "missing_audio",
226 "asset_id": 4,
227 "storage_uri": "/workspace/downloads/103/type_11/19876dbb-fffc-40f8-9530-9322c9ed77681650256912.wav"
228 },
229 {
230 "reason": "missing_audio",
231 "asset_id": 5,
232 "storage_uri": "/workspace/downloads/104/type_11/4c1d3e22-045f-445b-ab87-ba1ae3ee09b31650256912.wav"
233 }
234 ]
235 }
236 },
237 "next_write_target": "audio_fingerprint",
238 "notes": [
239 "dry-run preserves the verified planner -> job -> PostgreSQL state flow",
240 "non-dry-run now writes repo-local chromaprint-style hash artifacts plus audio_fingerprint rows when source audio is readable"
241 ]
242 }
...\ No newline at end of file ...\ No newline at end of file
1 {
2 "schema": "acr_test",
3 "dsn_redacted": "postgres://d2:***@127.0.0.1:5432/d2",
4 "exact_lane": {
5 "job_id": 1,
6 "returncode": 0,
7 "job_status": "failed",
8 "failure_reason": "unreadable_audio_assets",
9 "missing_asset_count": 20,
10 "artifact": "data/pgvector_eval/music20/phase1_worker_contract_smoke_exact.json"
11 },
12 "semantic_lane": {
13 "returncode": 0,
14 "semantic_job_count": 4,
15 "failed_jobs": 4,
16 "unique_blockers": [
17 "model_runtime_unavailable",
18 "unreadable_audio_assets"
19 ],
20 "artifact": "data/pgvector_eval/music20/phase1_worker_contract_smoke_semantic_matrix.json"
21 },
22 "summary": {
23 "exact_status": "failed",
24 "semantic_failed_jobs": 4,
25 "shared_environment_blockers": [
26 "missing /workspace/downloads mount",
27 "missing semantic model runtime dependencies"
28 ]
29 }
30 }
...\ No newline at end of file ...\ No newline at end of file
1 {
2 "schema": "acr_test",
3 "dsn_redacted": "postgres://d2:***@127.0.0.1:5432/d2",
4 "semantic_job_count": 4,
5 "results": [
6 {
7 "extraction_job_id": 2,
8 "model_name": "mert",
9 "model_version": "v1-95m",
10 "vector_table": "audio_embedding_vector_768",
11 "returncode": 0,
12 "job_status": "failed",
13 "failure_reason": "preflight_failed",
14 "preflight_blockers": [
15 "unreadable_audio_assets",
16 "model_runtime_unavailable"
17 ],
18 "missing_window_count": 20,
19 "runtime_missing_dependencies": [
20 "torch",
21 "torchaudio",
22 "transformers"
23 ],
24 "vector_table_report": {
25 "reason": null,
26 "resolved": true,
27 "expected_dim": 768,
28 "table_exists": true,
29 "allowed_vector_tables": [
30 "audio_embedding_vector_192",
31 "audio_embedding_vector_768"
32 ],
33 "requested_vector_table": "audio_embedding_vector_768"
34 },
35 "attempt_artifact": "data/pgvector_eval/music20/job2_mert_preflight_attempt.json"
36 },
37 {
38 "extraction_job_id": 3,
39 "model_name": "mert",
40 "model_version": "v1-95m",
41 "vector_table": "audio_embedding_vector_768",
42 "returncode": 0,
43 "job_status": "failed",
44 "failure_reason": "preflight_failed",
45 "preflight_blockers": [
46 "unreadable_audio_assets",
47 "model_runtime_unavailable"
48 ],
49 "missing_window_count": 20,
50 "runtime_missing_dependencies": [
51 "torch",
52 "torchaudio",
53 "transformers"
54 ],
55 "vector_table_report": {
56 "reason": null,
57 "resolved": true,
58 "expected_dim": 768,
59 "table_exists": true,
60 "allowed_vector_tables": [
61 "audio_embedding_vector_192",
62 "audio_embedding_vector_768"
63 ],
64 "requested_vector_table": "audio_embedding_vector_768"
65 },
66 "attempt_artifact": "data/pgvector_eval/music20/job3_mert_preflight_attempt.json"
67 },
68 {
69 "extraction_job_id": 4,
70 "model_name": "muq",
71 "model_version": "large-msd-iter",
72 "vector_table": "audio_embedding_vector_768",
73 "returncode": 0,
74 "job_status": "failed",
75 "failure_reason": "preflight_failed",
76 "preflight_blockers": [
77 "unreadable_audio_assets",
78 "model_runtime_unavailable"
79 ],
80 "missing_window_count": 20,
81 "runtime_missing_dependencies": [
82 "torch",
83 "torchaudio",
84 "transformers"
85 ],
86 "vector_table_report": {
87 "reason": null,
88 "resolved": true,
89 "expected_dim": 768,
90 "table_exists": true,
91 "allowed_vector_tables": [
92 "audio_embedding_vector_192",
93 "audio_embedding_vector_768"
94 ],
95 "requested_vector_table": "audio_embedding_vector_768"
96 },
97 "attempt_artifact": "data/pgvector_eval/music20/job4_muq_preflight_attempt.json"
98 },
99 {
100 "extraction_job_id": 5,
101 "model_name": "ecapa",
102 "model_version": "acr-baseline-v1",
103 "vector_table": "audio_embedding_vector_192",
104 "returncode": 0,
105 "job_status": "failed",
106 "failure_reason": "preflight_failed",
107 "preflight_blockers": [
108 "unreadable_audio_assets",
109 "model_runtime_unavailable"
110 ],
111 "missing_window_count": 20,
112 "runtime_missing_dependencies": [
113 "torch",
114 "torchaudio",
115 "speechbrain"
116 ],
117 "vector_table_report": {
118 "reason": null,
119 "resolved": true,
120 "expected_dim": 192,
121 "table_exists": true,
122 "allowed_vector_tables": [
123 "audio_embedding_vector_192",
124 "audio_embedding_vector_768"
125 ],
126 "requested_vector_table": "audio_embedding_vector_192"
127 },
128 "attempt_artifact": "data/pgvector_eval/music20/job5_ecapa_preflight_attempt.json"
129 }
130 ],
131 "summary": {
132 "failed_jobs": 4,
133 "models": [
134 "mert",
135 "mert",
136 "muq",
137 "ecapa"
138 ],
139 "unique_blockers": [
140 "model_runtime_unavailable",
141 "unreadable_audio_assets"
142 ]
143 }
144 }
...\ No newline at end of file ...\ No newline at end of file
1 #!/usr/bin/env /usr/local/miniconda3/bin/python
2 from __future__ import annotations
3
4 import argparse
5 import json
6 import subprocess
7 from pathlib import Path
8 from typing import Any
9
10 ROOT = Path(__file__).resolve().parents[1]
11 PYTHON_BIN = '/usr/local/miniconda3/bin/python'
12 DEFAULT_OUTPUT = ROOT / 'data' / 'pgvector_eval' / 'music20' / 'phase1_worker_contract_smoke_report.json'
13
14
15 def run_cmd(cmd: list[str]) -> subprocess.CompletedProcess[str]:
16 return subprocess.run(cmd, cwd=ROOT, capture_output=True, text=True)
17
18
19 def reset_jobs(dsn: str, schema: str) -> None:
20 proc = run_cmd([
21 PYTHON_BIN,
22 'scripts/bootstrap_phase1_extraction_jobs_live.py',
23 '--dsn', dsn,
24 '--schema', schema,
25 ])
26 if proc.returncode != 0:
27 raise SystemExit(proc.stderr or proc.stdout)
28
29
30 def run_exact_lane(dsn: str, schema: str) -> dict[str, Any]:
31 out = ROOT / 'data' / 'pgvector_eval' / 'music20' / 'phase1_worker_contract_smoke_exact.json'
32 proc = run_cmd([
33 PYTHON_BIN,
34 'workers/run_chromaprint_job.py',
35 '--dsn', dsn,
36 '--schema', schema,
37 '--job-id', '1',
38 '--output', str(out),
39 ])
40 if proc.returncode != 0:
41 raise SystemExit(proc.stderr or proc.stdout)
42 payload = json.loads(out.read_text(encoding='utf-8'))
43 status = payload.get('status_after_failed') or payload.get('status_after_complete') or {}
44 metadata = status.get('metadata_json') or {}
45 return {
46 'job_id': 1,
47 'returncode': proc.returncode,
48 'job_status': status.get('job_status'),
49 'failure_reason': metadata.get('failure_reason'),
50 'missing_asset_count': metadata.get('missing_asset_count'),
51 'artifact': str(out.relative_to(ROOT)),
52 }
53
54
55 def run_semantic_matrix(dsn: str, schema: str) -> dict[str, Any]:
56 out = ROOT / 'data' / 'pgvector_eval' / 'music20' / 'phase1_worker_contract_smoke_semantic_matrix.json'
57 proc = run_cmd([
58 PYTHON_BIN,
59 'scripts/run_phase1_embedding_preflight_matrix_live.py',
60 '--dsn', dsn,
61 '--schema', schema,
62 '--output', str(out),
63 ])
64 if proc.returncode != 0:
65 raise SystemExit(proc.stderr or proc.stdout)
66 payload = json.loads(out.read_text(encoding='utf-8'))
67 return {
68 'returncode': proc.returncode,
69 'semantic_job_count': payload.get('semantic_job_count'),
70 'failed_jobs': payload.get('summary', {}).get('failed_jobs'),
71 'unique_blockers': payload.get('summary', {}).get('unique_blockers'),
72 'artifact': str(out.relative_to(ROOT)),
73 }
74
75
76 def main() -> None:
77 ap = argparse.ArgumentParser()
78 ap.add_argument('--dsn', required=True)
79 ap.add_argument('--schema', default='acr_test')
80 ap.add_argument('--output', default=str(DEFAULT_OUTPUT))
81 args = ap.parse_args()
82
83 reset_jobs(args.dsn, args.schema)
84 exact = run_exact_lane(args.dsn, args.schema)
85 reset_jobs(args.dsn, args.schema)
86 semantic = run_semantic_matrix(args.dsn, args.schema)
87
88 payload = {
89 'schema': args.schema,
90 'dsn_redacted': 'postgres://d2:***@127.0.0.1:5432/d2',
91 'exact_lane': exact,
92 'semantic_lane': semantic,
93 'summary': {
94 'exact_status': exact['job_status'],
95 'semantic_failed_jobs': semantic['failed_jobs'],
96 'shared_environment_blockers': [
97 'missing /workspace/downloads mount',
98 'missing semantic model runtime dependencies',
99 ],
100 },
101 }
102 out = Path(args.output)
103 out.parent.mkdir(parents=True, exist_ok=True)
104 out.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding='utf-8')
105 print(json.dumps(payload, ensure_ascii=False, indent=2))
106
107
108 if __name__ == '__main__':
109 main()
1 ## 2026-06-04 1 ## 2026-06-04
2 2
3 - 新增 `scripts/run_phase1_worker_contract_smoke_live.py``phase1_worker_contract_smoke_report.json`,把 exact lane 非 dry-run 验证与 semantic preflight matrix 合成一条 live smoke 命令;当前总览结果为 exact=`failed/unreadable_audio_assets`、semantic=`4/4 failed`,说明阻塞点已经收敛到环境挂载与模型 runtime,而不是 worker contract 本身。
3 - 新增 `scripts/validate_audio_embedding_asset_upsert_live.py``audio_embedding_asset_upsert_live_report.json`,在隔离 schema `acr_asset_upsert_test` 上真实验证 `uq_audio_embedding_feature_asset`:重复普通 insert 会触发 `UniqueViolation`,而 `ON CONFLICT ... DO UPDATE` 会复用同一 `embedding_id`,最终 `audio_embedding/audio_embedding_vector_192` 行数都保持为 `1` 4 - 新增 `scripts/validate_audio_embedding_asset_upsert_live.py``audio_embedding_asset_upsert_live_report.json`,在隔离 schema `acr_asset_upsert_test` 上真实验证 `uq_audio_embedding_feature_asset`:重复普通 insert 会触发 `UniqueViolation`,而 `ON CONFLICT ... DO UPDATE` 会复用同一 `embedding_id`,最终 `audio_embedding/audio_embedding_vector_192` 行数都保持为 `1`
4 - 新增 `scripts/run_phase1_embedding_preflight_matrix_live.py``phase1_embedding_preflight_matrix_report.json`,对 `mert / muq / ecapa` 四条 semantic jobs 做了统一 live preflight 矩阵验证;结果表明 4 条 job 全都稳定落到 `preflight_failed`,且 blocker 已收敛为 `/workspace/downloads` 未挂载与语义模型 runtime 缺失,而不是单条 job 的偶发异常。 5 - 新增 `scripts/run_phase1_embedding_preflight_matrix_live.py``phase1_embedding_preflight_matrix_report.json`,对 `mert / muq / ecapa` 四条 semantic jobs 做了统一 live preflight 矩阵验证;结果表明 4 条 job 全都稳定落到 `preflight_failed`,且 blocker 已收敛为 `/workspace/downloads` 未挂载与语义模型 runtime 缺失,而不是单条 job 的偶发异常。
5 - 更新 `run_embedding_job.py`,把 semantic lane 从“只有 dry-run”推进到“真实 scope 读取 + vector table 校验 + runtime 依赖校验 + 缺音频校验 + PostgreSQL failed 落账”的 preflight write contract;当前 live `mert` job 会把 `unreadable_audio_assets``model_runtime_unavailable` 同时写入 `feature_extraction_job.metadata_json`,不再只停留在纸面设计。 6 - 更新 `run_embedding_job.py`,把 semantic lane 从“只有 dry-run”推进到“真实 scope 读取 + vector table 校验 + runtime 依赖校验 + 缺音频校验 + PostgreSQL failed 落账”的 preflight write contract;当前 live `mert` job 会把 `unreadable_audio_assets``model_runtime_unavailable` 同时写入 `feature_extraction_job.metadata_json`,不再只停留在纸面设计。
......
...@@ -811,3 +811,37 @@ cd /workspace/acr-engine ...@@ -811,3 +811,37 @@ cd /workspace/acr-engine
811 811
812 - asset-level 唯一键不是“纸面存在”,而是已经在 live PostgreSQL 上真实生效 812 - asset-level 唯一键不是“纸面存在”,而是已经在 live PostgreSQL 上真实生效
813 - 后续如果补 asset-level semantic writer,可以直接沿用同一个 `ON CONFLICT (feature_set_id, asset_id) ...` 合同 813 - 后续如果补 asset-level semantic writer,可以直接沿用同一个 `ON CONFLICT (feature_set_id, asset_id) ...` 合同
814
815
816 ## 新增:Phase-1 worker contract smoke 总览
817
818 为了让下次启动不用分别手工跑 exact worker 与 semantic matrix,本轮新增:
819
820 - `acr-engine/scripts/run_phase1_worker_contract_smoke_live.py`
821 - `acr-engine/data/pgvector_eval/music20/phase1_worker_contract_smoke_report.json`
822
823 它会:
824
825 1. reset `feature_extraction_job`
826 2. 跑一次 exact lane 非 dry-run
827 3. 再 reset jobs
828 4. 跑完整 semantic preflight matrix
829 5. 输出一个总览 JSON
830
831 ### 当前 smoke 总览结果
832
833 | lane | 结果 |
834 |---|---|
835 | exact | `failed` |
836 | exact failure reason | `unreadable_audio_assets` |
837 | exact missing assets | `20` |
838 | semantic jobs | `4` |
839 | semantic failed jobs | `4` |
840 | semantic blockers | `model_runtime_unavailable`, `unreadable_audio_assets` |
841
842 这说明:
843
844 - 当前 PostgreSQL worker contract 本身已经是**稳定的**
845 - 当前阻塞已经非常明确,主要不是 orchestration,而是环境:
846 - `/workspace/downloads` 未挂载
847 - semantic model runtime 未安装
......
...@@ -193,6 +193,7 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql ...@@ -193,6 +193,7 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql
193 - `audio_embedding` 已补上 window / asset 双路唯一键,后续真实 encoder 只需替换 inference adapter 即可复用同一 upsert 合同 193 - `audio_embedding` 已补上 window / asset 双路唯一键,后续真实 encoder 只需替换 inference adapter 即可复用同一 upsert 合同
194 - `scripts/run_phase1_embedding_preflight_matrix_live.py` 已跑通,4 条 semantic jobs(mert/muq/ecapa)在 `acr_test` 上都被稳定标记为 `preflight_failed`;当前共性 blocker 已收敛为 `/workspace/downloads` 缺失 + 语义模型 runtime 缺失 194 - `scripts/run_phase1_embedding_preflight_matrix_live.py` 已跑通,4 条 semantic jobs(mert/muq/ecapa)在 `acr_test` 上都被稳定标记为 `preflight_failed`;当前共性 blocker 已收敛为 `/workspace/downloads` 缺失 + 语义模型 runtime 缺失
195 - `scripts/validate_audio_embedding_asset_upsert_live.py` 已在隔离 schema `acr_asset_upsert_test` 上验证 `uq_audio_embedding_feature_asset`:重复 insert 会被唯一键拒绝,upsert 会复用同一 `embedding_id`,说明 asset-level 幂等键也已有真实证据 195 - `scripts/validate_audio_embedding_asset_upsert_live.py` 已在隔离 schema `acr_asset_upsert_test` 上验证 `uq_audio_embedding_feature_asset`:重复 insert 会被唯一键拒绝,upsert 会复用同一 `embedding_id`,说明 asset-level 幂等键也已有真实证据
196 - `scripts/run_phase1_worker_contract_smoke_live.py` 已提供一条命令的全局 smoke:当前 exact lane = `failed/unreadable_audio_assets`,semantic lane = `4/4 failed`,共性 blocker 已固化为音频挂载缺失 + 语义模型 runtime 缺失
196 - `phase1_hot_reference_v1``acr_test` 里已经真实补齐 `20` 个 reference members,因此 worker dry-run 当前看到的 scope 已是 `20 recordings / 20 assets / 20 windows` 197 - `phase1_hot_reference_v1``acr_test` 里已经真实补齐 `20` 个 reference members,因此 worker dry-run 当前看到的 scope 已是 `20 recordings / 20 assets / 20 windows`
197 - worker contract 现在已有基础前置状态保护;重复执行同一 chromaprint dry-run job 会被 `expected_status=pending` 明确拒绝,证据见 `phase1_worker_double_claim_guard_report.json` 198 - worker contract 现在已有基础前置状态保护;重复执行同一 chromaprint dry-run job 会被 `expected_status=pending` 明确拒绝,证据见 `phase1_worker_double_claim_guard_report.json`
198 - exact lane 的 `run_chromaprint_job.py` 已具备非 dry-run 写入路径;当前在 `acr_test` 的 live 结果是因为 `/workspace/downloads/...` 缺失而明确 `failed`,不是继续假装 `completed` 199 - exact lane 的 `run_chromaprint_job.py` 已具备非 dry-run 写入路径;当前在 `acr_test` 的 live 结果是因为 `/workspace/downloads/...` 缺失而明确 `failed`,不是继续假装 `completed`
......