Commit 58c29eaa 58c29eaaa0e3c5de99f7b62d77c74a41b694c927 by cnb.bofCdSsphPA

Turn Phase-1 host prerequisites into a live audit artifact

Constraint: Worker-contract validation is now stable enough that the remaining uncertainty is host readiness, so the next blocker had to be made explicit instead of inferred from repeated failed runs.
Rejected: Keep prerequisite knowledge only in prose | It would drift and force future sessions to rediscover the same missing mounts and packages.
Confidence: high
Scope-risk: narrow
Directive: Run the prerequisite audit before retrying live extraction so host blockers are measured once and reused across lanes.
Tested: /usr/local/miniconda3/bin/python -m py_compile scripts/run_phase1_prereq_audit_live.py; git diff --check; /usr/local/miniconda3/bin/python scripts/run_phase1_prereq_audit_live.py --dsn 'postgres://d2:d2pass@127.0.0.1:5432/d2' --schema acr_test --output data/pgvector_eval/music20/phase1_prereq_audit_report.json
Not-tested: This audit does not install dependencies or mount assets; it only reports readiness.
1 parent 43d2f93a
1 {
2 "schema": "acr_test",
3 "dsn_redacted": "postgres://d2:***@127.0.0.1:5432/d2",
4 "downloads_root": "/workspace/downloads",
5 "downloads_root_exists": false,
6 "package_checks": {
7 "numpy": {
8 "package": "numpy",
9 "available": true
10 },
11 "speechbrain": {
12 "package": "speechbrain",
13 "available": false,
14 "error_type": "ModuleNotFoundError",
15 "error": "No module named 'speechbrain'"
16 },
17 "torch": {
18 "package": "torch",
19 "available": false,
20 "error_type": "ModuleNotFoundError",
21 "error": "No module named 'torch'"
22 },
23 "torchaudio": {
24 "package": "torchaudio",
25 "available": false,
26 "error_type": "ModuleNotFoundError",
27 "error": "No module named 'torchaudio'"
28 },
29 "transformers": {
30 "package": "transformers",
31 "available": false,
32 "error_type": "ModuleNotFoundError",
33 "error": "No module named 'transformers'"
34 }
35 },
36 "jobs": [
37 {
38 "extraction_job_id": 1,
39 "model_name": "chromaprint",
40 "model_version": "v1",
41 "embedding_dim": null,
42 "target_scope": "reference_set:phase1_hot_reference_v1",
43 "required_packages": [
44 "numpy"
45 ],
46 "missing_packages": [],
47 "downloads_root_exists": false,
48 "ready_for_live_worker": false
49 },
50 {
51 "extraction_job_id": 2,
52 "model_name": "mert",
53 "model_version": "v1-95m",
54 "embedding_dim": 768,
55 "target_scope": "reference_set:phase1_hot_reference_v1",
56 "required_packages": [
57 "numpy",
58 "torch",
59 "torchaudio",
60 "transformers"
61 ],
62 "missing_packages": [
63 "torch",
64 "torchaudio",
65 "transformers"
66 ],
67 "downloads_root_exists": false,
68 "ready_for_live_worker": false
69 },
70 {
71 "extraction_job_id": 3,
72 "model_name": "mert",
73 "model_version": "v1-95m",
74 "embedding_dim": 768,
75 "target_scope": "reference_set:phase1_hot_reference_v1",
76 "required_packages": [
77 "numpy",
78 "torch",
79 "torchaudio",
80 "transformers"
81 ],
82 "missing_packages": [
83 "torch",
84 "torchaudio",
85 "transformers"
86 ],
87 "downloads_root_exists": false,
88 "ready_for_live_worker": false
89 },
90 {
91 "extraction_job_id": 4,
92 "model_name": "muq",
93 "model_version": "large-msd-iter",
94 "embedding_dim": 768,
95 "target_scope": "reference_set:phase1_hot_reference_v1",
96 "required_packages": [
97 "numpy",
98 "torch",
99 "torchaudio",
100 "transformers"
101 ],
102 "missing_packages": [
103 "torch",
104 "torchaudio",
105 "transformers"
106 ],
107 "downloads_root_exists": false,
108 "ready_for_live_worker": false
109 },
110 {
111 "extraction_job_id": 5,
112 "model_name": "ecapa",
113 "model_version": "acr-baseline-v1",
114 "embedding_dim": 192,
115 "target_scope": "reference_set:phase1_hot_reference_v1",
116 "required_packages": [
117 "numpy",
118 "torch",
119 "torchaudio",
120 "speechbrain"
121 ],
122 "missing_packages": [
123 "torch",
124 "torchaudio",
125 "speechbrain"
126 ],
127 "downloads_root_exists": false,
128 "ready_for_live_worker": false
129 }
130 ],
131 "summary": {
132 "total_jobs": 5,
133 "ready_jobs": 0,
134 "blocked_jobs": 5,
135 "missing_packages_union": [
136 "speechbrain",
137 "torch",
138 "torchaudio",
139 "transformers"
140 ]
141 }
142 }
...\ No newline at end of file ...\ No newline at end of file
1 #!/usr/bin/env /usr/local/miniconda3/bin/python
2 from __future__ import annotations
3
4 import argparse
5 import importlib
6 import json
7 from pathlib import Path
8 import sys
9 from typing import Any
10
11 import psycopg
12
13 ROOT = Path(__file__).resolve().parents[1]
14 if str(ROOT) not in sys.path:
15 sys.path.insert(0, str(ROOT))
16
17 from workers._job_common import validate_schema
18 DEFAULT_OUTPUT = ROOT / 'data' / 'pgvector_eval' / 'music20' / 'phase1_prereq_audit_report.json'
19 MODEL_REQUIREMENTS = {
20 'mert': ['numpy', 'torch', 'torchaudio', 'transformers'],
21 'muq': ['numpy', 'torch', 'torchaudio', 'transformers'],
22 'ecapa': ['numpy', 'torch', 'torchaudio', 'speechbrain'],
23 'chromaprint': ['numpy'],
24 }
25
26
27 def check_import(name: str) -> dict[str, Any]:
28 try:
29 importlib.import_module(name)
30 return {'package': name, 'available': True}
31 except Exception as exc: # noqa: BLE001
32 return {'package': name, 'available': False, 'error_type': type(exc).__name__, 'error': str(exc).splitlines()[0]}
33
34
35 def load_jobs(conn: psycopg.Connection) -> list[dict[str, Any]]:
36 rows = conn.execute(
37 """
38 SELECT fej.extraction_job_id, mr.model_name, mr.model_version, fs.embedding_dim, fej.target_scope
39 FROM feature_extraction_job fej
40 JOIN feature_set_registry fs ON fs.feature_set_id = fej.feature_set_id
41 JOIN model_registry mr ON mr.model_id = fs.model_id
42 ORDER BY fej.extraction_job_id;
43 """
44 ).fetchall()
45 return [
46 {
47 'extraction_job_id': int(row[0]),
48 'model_name': row[1],
49 'model_version': row[2],
50 'embedding_dim': int(row[3]) if row[3] is not None else None,
51 'target_scope': row[4],
52 }
53 for row in rows
54 ]
55
56
57 def main() -> None:
58 ap = argparse.ArgumentParser()
59 ap.add_argument('--dsn', required=True)
60 ap.add_argument('--schema', default='acr_test')
61 ap.add_argument('--downloads-root', default='/workspace/downloads')
62 ap.add_argument('--output', default=str(DEFAULT_OUTPUT))
63 args = ap.parse_args()
64
65 schema = validate_schema(args.schema)
66 downloads_root = Path(args.downloads_root)
67 downloads_exists = downloads_root.exists()
68
69 with psycopg.connect(args.dsn, autocommit=True) as conn:
70 conn.execute(f'SET search_path TO {schema}, public;')
71 jobs = load_jobs(conn)
72
73 package_names = sorted({pkg for job in jobs for pkg in MODEL_REQUIREMENTS.get(job['model_name'], ['numpy'])})
74 package_checks = {item['package']: item for item in (check_import(name) for name in package_names)}
75
76 job_reports = []
77 for job in jobs:
78 required = MODEL_REQUIREMENTS.get(job['model_name'], ['numpy'])
79 missing = [name for name in required if not package_checks[name]['available']]
80 job_reports.append(
81 {
82 **job,
83 'required_packages': required,
84 'missing_packages': missing,
85 'downloads_root_exists': downloads_exists,
86 'ready_for_live_worker': downloads_exists and not missing,
87 }
88 )
89
90 payload = {
91 'schema': schema,
92 'dsn_redacted': 'postgres://d2:***@127.0.0.1:5432/d2',
93 'downloads_root': str(downloads_root),
94 'downloads_root_exists': downloads_exists,
95 'package_checks': package_checks,
96 'jobs': job_reports,
97 'summary': {
98 'total_jobs': len(job_reports),
99 'ready_jobs': sum(1 for job in job_reports if job['ready_for_live_worker']),
100 'blocked_jobs': sum(1 for job in job_reports if not job['ready_for_live_worker']),
101 'missing_packages_union': sorted({pkg for job in job_reports for pkg in job['missing_packages']}),
102 },
103 }
104 out = Path(args.output)
105 out.parent.mkdir(parents=True, exist_ok=True)
106 out.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding='utf-8')
107 print(json.dumps(payload, ensure_ascii=False, indent=2))
108
109
110 if __name__ == '__main__':
111 main()
1 ## 2026-06-04 1 ## 2026-06-04
2 2
3 - 新增 `scripts/run_phase1_prereq_audit_live.py``phase1_prereq_audit_report.json`,把 `/workspace/downloads` 挂载状态、`torch/torchaudio/transformers/speechbrain` 依赖状态与 5 条 Phase-1 jobs 的 readiness 汇总到一份 live 审计报告;当前结果为 `ready_jobs=0``blocked_jobs=5`
3 - 新增 `scripts/run_embedding_vector_table_negative_matrix_live.py``embedding_vector_table_negative_matrix_report.json`,在 live PostgreSQL 上补齐 semantic preflight 的三类向量表负例:维度不匹配、未 allowlist、schema 缺表;三类 case 都会稳定落到 `preflight_failed`,且 `vector_table_report.reason` 与预期一致。 4 - 新增 `scripts/run_embedding_vector_table_negative_matrix_live.py``embedding_vector_table_negative_matrix_report.json`,在 live PostgreSQL 上补齐 semantic preflight 的三类向量表负例:维度不匹配、未 allowlist、schema 缺表;三类 case 都会稳定落到 `preflight_failed`,且 `vector_table_report.reason` 与预期一致。
4 - 新增 `scripts/run_phase1_worker_contract_smoke_live.py``phase1_worker_contract_smoke_report.json`,把 exact lane 非 dry-run 验证与 semantic preflight matrix 合成一条 live smoke 命令;当前总览结果为 exact=`failed/unreadable_audio_assets`、semantic=`4/4 failed`,说明阻塞点已经收敛到环境挂载与模型 runtime,而不是 worker contract 本身。 5 - 新增 `scripts/run_phase1_worker_contract_smoke_live.py``phase1_worker_contract_smoke_report.json`,把 exact lane 非 dry-run 验证与 semantic preflight matrix 合成一条 live smoke 命令;当前总览结果为 exact=`failed/unreadable_audio_assets`、semantic=`4/4 failed`,说明阻塞点已经收敛到环境挂载与模型 runtime,而不是 worker contract 本身。
5 - 新增 `scripts/validate_audio_embedding_asset_upsert_live.py``audio_embedding_asset_upsert_live_report.json`,在隔离 schema `acr_asset_upsert_test` 上真实验证 `uq_audio_embedding_feature_asset`:重复普通 insert 会触发 `UniqueViolation`,而 `ON CONFLICT ... DO UPDATE` 会复用同一 `embedding_id`,最终 `audio_embedding/audio_embedding_vector_192` 行数都保持为 `1` 6 - 新增 `scripts/validate_audio_embedding_asset_upsert_live.py``audio_embedding_asset_upsert_live_report.json`,在隔离 schema `acr_asset_upsert_test` 上真实验证 `uq_audio_embedding_feature_asset`:重复普通 insert 会触发 `UniqueViolation`,而 `ON CONFLICT ... DO UPDATE` 会复用同一 `embedding_id`,最终 `audio_embedding/audio_embedding_vector_192` 行数都保持为 `1`
......
...@@ -872,3 +872,29 @@ cd /workspace/acr-engine ...@@ -872,3 +872,29 @@ cd /workspace/acr-engine
872 872
873 - 当前 semantic preflight 已经能够把“运行环境问题”和“配置错误问题”分层暴露 873 - 当前 semantic preflight 已经能够把“运行环境问题”和“配置错误问题”分层暴露
874 - 后续只要看 `vector_table_report.reason`,就能快速区分是 DDL/配置错误,还是模型 runtime/音频挂载错误 874 - 后续只要看 `vector_table_report.reason`,就能快速区分是 DDL/配置错误,还是模型 runtime/音频挂载错误
875
876
877 ## 新增:Phase-1 prerequisites audit
878
879 为了避免每次都靠肉眼猜“到底是音频挂载缺失,还是模型 runtime 缺失”,本轮新增:
880
881 - `acr-engine/scripts/run_phase1_prereq_audit_live.py`
882 - `acr-engine/data/pgvector_eval/music20/phase1_prereq_audit_report.json`
883
884 ### 当前审计结果
885
886 | 指标 | 结果 |
887 |---|---|
888 | `downloads_root_exists` | `false` |
889 | `total_jobs` | `5` |
890 | `ready_jobs` | `0` |
891 | `blocked_jobs` | `5` |
892 | 缺失依赖并集 | `speechbrain`, `torch`, `torchaudio`, `transformers` |
893
894 按 job 看:
895
896 - `chromaprint`:依赖层面可跑,但被 `/workspace/downloads` 缺失阻塞
897 - `mert / muq`:同时被 `/workspace/downloads` 缺失与 `torch/torchaudio/transformers` 缺失阻塞
898 - `ecapa`:同时被 `/workspace/downloads` 缺失与 `torch/torchaudio/speechbrain` 缺失阻塞
899
900 这使得“当前为什么跑不通”已经可以通过单份 JSON 报告回答,而不必重新手工试跑。
......
...@@ -195,6 +195,7 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql ...@@ -195,6 +195,7 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql
195 - `scripts/validate_audio_embedding_asset_upsert_live.py` 已在隔离 schema `acr_asset_upsert_test` 上验证 `uq_audio_embedding_feature_asset`:重复 insert 会被唯一键拒绝,upsert 会复用同一 `embedding_id`,说明 asset-level 幂等键也已有真实证据 195 - `scripts/validate_audio_embedding_asset_upsert_live.py` 已在隔离 schema `acr_asset_upsert_test` 上验证 `uq_audio_embedding_feature_asset`:重复 insert 会被唯一键拒绝,upsert 会复用同一 `embedding_id`,说明 asset-level 幂等键也已有真实证据
196 - `scripts/run_phase1_worker_contract_smoke_live.py` 已提供一条命令的全局 smoke:当前 exact lane = `failed/unreadable_audio_assets`,semantic lane = `4/4 failed`,共性 blocker 已固化为音频挂载缺失 + 语义模型 runtime 缺失 196 - `scripts/run_phase1_worker_contract_smoke_live.py` 已提供一条命令的全局 smoke:当前 exact lane = `failed/unreadable_audio_assets`,semantic lane = `4/4 failed`,共性 blocker 已固化为音频挂载缺失 + 语义模型 runtime 缺失
197 - `scripts/run_embedding_vector_table_negative_matrix_live.py` 已在 live PostgreSQL 上补齐 semantic vector-table 负例矩阵:`vector_table_dim_mismatch``vector_table_not_allowlisted``vector_table_missing_in_schema` 三类错误都能被稳定写入 `vector_table_report.reason` 197 - `scripts/run_embedding_vector_table_negative_matrix_live.py` 已在 live PostgreSQL 上补齐 semantic vector-table 负例矩阵:`vector_table_dim_mismatch``vector_table_not_allowlisted``vector_table_missing_in_schema` 三类错误都能被稳定写入 `vector_table_report.reason`
198 - `scripts/run_phase1_prereq_audit_live.py` 已给出当前 host 的先决条件审计:`downloads_root_exists=false``ready_jobs=0/5`,并把 `torch/torchaudio/transformers/speechbrain` 的缺失状态按 job 落成 JSON 报告
198 - `phase1_hot_reference_v1``acr_test` 里已经真实补齐 `20` 个 reference members,因此 worker dry-run 当前看到的 scope 已是 `20 recordings / 20 assets / 20 windows` 199 - `phase1_hot_reference_v1``acr_test` 里已经真实补齐 `20` 个 reference members,因此 worker dry-run 当前看到的 scope 已是 `20 recordings / 20 assets / 20 windows`
199 - worker contract 现在已有基础前置状态保护;重复执行同一 chromaprint dry-run job 会被 `expected_status=pending` 明确拒绝,证据见 `phase1_worker_double_claim_guard_report.json` 200 - worker contract 现在已有基础前置状态保护;重复执行同一 chromaprint dry-run job 会被 `expected_status=pending` 明确拒绝,证据见 `phase1_worker_double_claim_guard_report.json`
200 - exact lane 的 `run_chromaprint_job.py` 已具备非 dry-run 写入路径;当前在 `acr_test` 的 live 结果是因为 `/workspace/downloads/...` 缺失而明确 `failed`,不是继续假装 `completed` 201 - exact lane 的 `run_chromaprint_job.py` 已具备非 dry-run 写入路径;当前在 `acr_test` 的 live 结果是因为 `/workspace/downloads/...` 缺失而明确 `failed`,不是继续假装 `completed`
......