Make the Phase-1 planner carry live validation entrypoints
Constraint: The repo now has multiple verified smoke and audit scripts, so leaving them outside the planner would force future sessions to rediscover the right validation commands by reading docs. Rejected: Document the commands only in markdown | That would drift from the executable plan artifact and slow restart execution. Confidence: high Scope-risk: narrow Directive: Treat validation_commands in the planner as the first-stop entrypoints before running individual extraction jobs on a new host. Tested: /usr/local/miniconda3/bin/python -m py_compile scripts/plan_phase1_extraction_jobs_live.py; git diff --check; /usr/local/miniconda3/bin/python scripts/plan_phase1_extraction_jobs_live.py --dsn 'postgres://d2:d2pass@127.0.0.1:5432/d2' --schema acr_test --job-status pending --output data/pgvector_eval/music20/phase1_extraction_plan_report.json Not-tested: The planner still emits commands for an environment-blocked host and does not prove successful extraction by itself.
Showing
6 changed files
with
66 additions
and
0 deletions
| ... | @@ -467,6 +467,12 @@ | ... | @@ -467,6 +467,12 @@ |
| 467 | } | 467 | } |
| 468 | ] | 468 | ] |
| 469 | }, | 469 | }, |
| 470 | "validation_commands": { | ||
| 471 | "prereq_audit": "cd /workspace/acr-engine && PG_DSN=\"${PG_DSN:?set PG_DSN}\" /usr/local/miniconda3/bin/python scripts/run_phase1_prereq_audit_live.py --dsn \"$PG_DSN\" --schema acr_test --output data/pgvector_eval/music20/phase1_prereq_audit_report.json", | ||
| 472 | "worker_contract_smoke": "cd /workspace/acr-engine && PG_DSN=\"${PG_DSN:?set PG_DSN}\" /usr/local/miniconda3/bin/python scripts/run_phase1_worker_contract_smoke_live.py --dsn \"$PG_DSN\" --schema acr_test --output data/pgvector_eval/music20/phase1_worker_contract_smoke_report.json", | ||
| 473 | "semantic_vector_negative_matrix": "cd /workspace/acr-engine && PG_DSN=\"${PG_DSN:?set PG_DSN}\" /usr/local/miniconda3/bin/python scripts/run_embedding_vector_table_negative_matrix_live.py --dsn \"$PG_DSN\" --output data/pgvector_eval/music20/embedding_vector_table_negative_matrix_report.json", | ||
| 474 | "asset_level_upsert_validation": "cd /workspace/acr-engine && PG_DSN=\"${PG_DSN:?set PG_DSN}\" /usr/local/miniconda3/bin/python scripts/validate_audio_embedding_asset_upsert_live.py --dsn \"$PG_DSN\" --schema acr_asset_upsert_test --output data/pgvector_eval/music20/audio_embedding_asset_upsert_live_report.json" | ||
| 475 | }, | ||
| 470 | "execution_order_summary": [ | 476 | "execution_order_summary": [ |
| 471 | { | 477 | { |
| 472 | "order": 1, | 478 | "order": 1, | ... | ... |
| ... | @@ -60,6 +60,29 @@ def build_command_suggestions(job: dict[str, Any], schema: str) -> list[str]: | ... | @@ -60,6 +60,29 @@ def build_command_suggestions(job: dict[str, Any], schema: str) -> list[str]: |
| 60 | return commands | 60 | return commands |
| 61 | 61 | ||
| 62 | 62 | ||
| 63 | def build_validation_commands(schema: str) -> dict[str, str]: | ||
| 64 | command_prefix = 'cd /workspace/acr-engine && ' | ||
| 65 | base = command_prefix + 'PG_DSN="${PG_DSN:?set PG_DSN}" ' | ||
| 66 | return { | ||
| 67 | 'prereq_audit': ( | ||
| 68 | base | ||
| 69 | + f"{PYTHON_BIN} scripts/run_phase1_prereq_audit_live.py --dsn \"$PG_DSN\" --schema {schema} --output data/pgvector_eval/music20/phase1_prereq_audit_report.json" | ||
| 70 | ), | ||
| 71 | 'worker_contract_smoke': ( | ||
| 72 | base | ||
| 73 | + f"{PYTHON_BIN} scripts/run_phase1_worker_contract_smoke_live.py --dsn \"$PG_DSN\" --schema {schema} --output data/pgvector_eval/music20/phase1_worker_contract_smoke_report.json" | ||
| 74 | ), | ||
| 75 | 'semantic_vector_negative_matrix': ( | ||
| 76 | base | ||
| 77 | + f"{PYTHON_BIN} scripts/run_embedding_vector_table_negative_matrix_live.py --dsn \"$PG_DSN\" --output data/pgvector_eval/music20/embedding_vector_table_negative_matrix_report.json" | ||
| 78 | ), | ||
| 79 | 'asset_level_upsert_validation': ( | ||
| 80 | base | ||
| 81 | + f"{PYTHON_BIN} scripts/validate_audio_embedding_asset_upsert_live.py --dsn \"$PG_DSN\" --schema acr_asset_upsert_test --output data/pgvector_eval/music20/audio_embedding_asset_upsert_live_report.json" | ||
| 82 | ), | ||
| 83 | } | ||
| 84 | |||
| 85 | |||
| 63 | def main() -> None: | 86 | def main() -> None: |
| 64 | ap = argparse.ArgumentParser() | 87 | ap = argparse.ArgumentParser() |
| 65 | ap.add_argument('--dsn', required=True) | 88 | ap.add_argument('--dsn', required=True) |
| ... | @@ -166,6 +189,7 @@ def main() -> None: | ... | @@ -166,6 +189,7 @@ def main() -> None: |
| 166 | }, | 189 | }, |
| 167 | 'ordered_jobs': jobs, | 190 | 'ordered_jobs': jobs, |
| 168 | 'by_lane': by_lane, | 191 | 'by_lane': by_lane, |
| 192 | 'validation_commands': build_validation_commands(schema), | ||
| 169 | 'execution_order_summary': [ | 193 | 'execution_order_summary': [ |
| 170 | { | 194 | { |
| 171 | 'order': idx + 1, | 195 | 'order': idx + 1, | ... | ... |
| 1 | ## 2026-06-04 | 1 | ## 2026-06-04 |
| 2 | 2 | ||
| 3 | - 更新 `scripts/plan_phase1_extraction_jobs_live.py` 与 `phase1_extraction_plan_report.json`,除了 per-job `command_suggestions` 之外,又补充了 `validation_commands`:`prereq_audit`、`worker_contract_smoke`、`semantic_vector_negative_matrix`、`asset_level_upsert_validation`,使 planner 本身也成为下次 session 的执行入口。 | ||
| 3 | - 新增 `scripts/run_phase1_prereq_audit_live.py` 与 `phase1_prereq_audit_report.json`,把 `/workspace/downloads` 挂载状态、`torch/torchaudio/transformers/speechbrain` 依赖状态与 5 条 Phase-1 jobs 的 readiness 汇总到一份 live 审计报告;当前结果为 `ready_jobs=0`、`blocked_jobs=5`。 | 4 | - 新增 `scripts/run_phase1_prereq_audit_live.py` 与 `phase1_prereq_audit_report.json`,把 `/workspace/downloads` 挂载状态、`torch/torchaudio/transformers/speechbrain` 依赖状态与 5 条 Phase-1 jobs 的 readiness 汇总到一份 live 审计报告;当前结果为 `ready_jobs=0`、`blocked_jobs=5`。 |
| 4 | - 新增 `scripts/run_embedding_vector_table_negative_matrix_live.py` 与 `embedding_vector_table_negative_matrix_report.json`,在 live PostgreSQL 上补齐 semantic preflight 的三类向量表负例:维度不匹配、未 allowlist、schema 缺表;三类 case 都会稳定落到 `preflight_failed`,且 `vector_table_report.reason` 与预期一致。 | 5 | - 新增 `scripts/run_embedding_vector_table_negative_matrix_live.py` 与 `embedding_vector_table_negative_matrix_report.json`,在 live PostgreSQL 上补齐 semantic preflight 的三类向量表负例:维度不匹配、未 allowlist、schema 缺表;三类 case 都会稳定落到 `preflight_failed`,且 `vector_table_report.reason` 与预期一致。 |
| 5 | - 新增 `scripts/run_phase1_worker_contract_smoke_live.py` 与 `phase1_worker_contract_smoke_report.json`,把 exact lane 非 dry-run 验证与 semantic preflight matrix 合成一条 live smoke 命令;当前总览结果为 exact=`failed/unreadable_audio_assets`、semantic=`4/4 failed`,说明阻塞点已经收敛到环境挂载与模型 runtime,而不是 worker contract 本身。 | 6 | - 新增 `scripts/run_phase1_worker_contract_smoke_live.py` 与 `phase1_worker_contract_smoke_report.json`,把 exact lane 非 dry-run 验证与 semantic preflight matrix 合成一条 live smoke 命令;当前总览结果为 exact=`failed/unreadable_audio_assets`、semantic=`4/4 failed`,说明阻塞点已经收敛到环境挂载与模型 runtime,而不是 worker contract 本身。 | ... | ... |
| ... | @@ -450,3 +450,21 @@ cd /workspace/acr-engine && PG_DSN="${PG_DSN:?set PG_DSN}" EXTRACTION_JOB_ID=2 F | ... | @@ -450,3 +450,21 @@ cd /workspace/acr-engine && PG_DSN="${PG_DSN:?set PG_DSN}" EXTRACTION_JOB_ID=2 F |
| 450 | ``` | 450 | ``` |
| 451 | 451 | ||
| 452 | 这意味着下个 session 不需要先手工拼环境变量和 job 绑定关系,而可以直接从 planner 报告里复制命令模板。 | 452 | 这意味着下个 session 不需要先手工拼环境变量和 job 绑定关系,而可以直接从 planner 报告里复制命令模板。 |
| 453 | |||
| 454 | ### 10.4 planner 现在也会附带 validation commands | ||
| 455 | |||
| 456 | 除了 per-job command suggestion,当前 planner 还会输出一组全局验证入口: | ||
| 457 | |||
| 458 | - `prereq_audit` | ||
| 459 | - `worker_contract_smoke` | ||
| 460 | - `semantic_vector_negative_matrix` | ||
| 461 | - `asset_level_upsert_validation` | ||
| 462 | |||
| 463 | 也就是: | ||
| 464 | |||
| 465 | 1. 先审计 host 前置条件 | ||
| 466 | 2. 再跑 exact+semantic 的 contract smoke | ||
| 467 | 3. 再检查 semantic vector-table 负例是否稳定 | ||
| 468 | 4. 再验证 asset-level upsert contract | ||
| 469 | |||
| 470 | 这让 planner 从“只会排任务”升级成“同时给出执行前检查入口”的交付物。 | ... | ... |
| ... | @@ -140,6 +140,10 @@ Phase-1 的交付目标不是“证明某个新模型绝对最优”,而是: | ... | @@ -140,6 +140,10 @@ Phase-1 的交付目标不是“证明某个新模型绝对最优”,而是: |
| 140 | - [ ] 做 query encode | 140 | - [ ] 做 query encode |
| 141 | - [ ] 返回 `retrieval_candidate` | 141 | - [ ] 返回 `retrieval_candidate` |
| 142 | - [ ] 聚合到 `recording / work / canonical_song` | 142 | - [ ] 聚合到 `recording / work / canonical_song` |
| 143 | - [ ] 跑 `phase1_prereq_audit` | ||
| 144 | - [ ] 跑 `phase1_worker_contract_smoke` | ||
| 145 | - [ ] 跑 `semantic_vector_negative_matrix` | ||
| 146 | - [ ] 跑 `asset_level_upsert_validation` | ||
| 143 | 147 | ||
| 144 | ### 第一版聚合建议 | 148 | ### 第一版聚合建议 |
| 145 | - max score | 149 | - max score |
| ... | @@ -224,3 +228,15 @@ flowchart TD | ... | @@ -224,3 +228,15 @@ flowchart TD |
| 224 | 4. encoder-only 抽特征 | 228 | 4. encoder-only 抽特征 |
| 225 | 5. 双路召回与聚合 | 229 | 5. 双路召回与聚合 |
| 226 | 6. benchmark 与门禁 | 230 | 6. benchmark 与门禁 |
| 231 | |||
| 232 | |||
| 233 | ## 6.1 当前 planner 已提供的 validation entrypoints | ||
| 234 | |||
| 235 | `acr-engine/scripts/plan_phase1_extraction_jobs_live.py` 现在除了 job 级 `command_suggestions`,还会在 `phase1_extraction_plan_report.json` 里附带: | ||
| 236 | |||
| 237 | - `validation_commands.prereq_audit` | ||
| 238 | - `validation_commands.worker_contract_smoke` | ||
| 239 | - `validation_commands.semantic_vector_negative_matrix` | ||
| 240 | - `validation_commands.asset_level_upsert_validation` | ||
| 241 | |||
| 242 | 这意味着下次启动时可以先跑“全局验证入口”,再决定是否执行具体 job,而不必手工拼测试命令。 | ... | ... |
| ... | @@ -196,6 +196,7 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql | ... | @@ -196,6 +196,7 @@ sed -n '1,320p' acr-engine/sql/acr_pg_schema_v2.sql |
| 196 | - `scripts/run_phase1_worker_contract_smoke_live.py` 已提供一条命令的全局 smoke:当前 exact lane = `failed/unreadable_audio_assets`,semantic lane = `4/4 failed`,共性 blocker 已固化为音频挂载缺失 + 语义模型 runtime 缺失 | 196 | - `scripts/run_phase1_worker_contract_smoke_live.py` 已提供一条命令的全局 smoke:当前 exact lane = `failed/unreadable_audio_assets`,semantic lane = `4/4 failed`,共性 blocker 已固化为音频挂载缺失 + 语义模型 runtime 缺失 |
| 197 | - `scripts/run_embedding_vector_table_negative_matrix_live.py` 已在 live PostgreSQL 上补齐 semantic vector-table 负例矩阵:`vector_table_dim_mismatch`、`vector_table_not_allowlisted`、`vector_table_missing_in_schema` 三类错误都能被稳定写入 `vector_table_report.reason` | 197 | - `scripts/run_embedding_vector_table_negative_matrix_live.py` 已在 live PostgreSQL 上补齐 semantic vector-table 负例矩阵:`vector_table_dim_mismatch`、`vector_table_not_allowlisted`、`vector_table_missing_in_schema` 三类错误都能被稳定写入 `vector_table_report.reason` |
| 198 | - `scripts/run_phase1_prereq_audit_live.py` 已给出当前 host 的先决条件审计:`downloads_root_exists=false`、`ready_jobs=0/5`,并把 `torch/torchaudio/transformers/speechbrain` 的缺失状态按 job 落成 JSON 报告 | 198 | - `scripts/run_phase1_prereq_audit_live.py` 已给出当前 host 的先决条件审计:`downloads_root_exists=false`、`ready_jobs=0/5`,并把 `torch/torchaudio/transformers/speechbrain` 的缺失状态按 job 落成 JSON 报告 |
| 199 | - `phase1_extraction_plan_report.json` 现已附带 `validation_commands`,下次 session 可以直接从 planner 复制 `prereq_audit / worker_contract_smoke / semantic_vector_negative_matrix / asset_level_upsert_validation` 四类命令 | ||
| 199 | - `phase1_hot_reference_v1` 在 `acr_test` 里已经真实补齐 `20` 个 reference members,因此 worker dry-run 当前看到的 scope 已是 `20 recordings / 20 assets / 20 windows` | 200 | - `phase1_hot_reference_v1` 在 `acr_test` 里已经真实补齐 `20` 个 reference members,因此 worker dry-run 当前看到的 scope 已是 `20 recordings / 20 assets / 20 windows` |
| 200 | - worker contract 现在已有基础前置状态保护;重复执行同一 chromaprint dry-run job 会被 `expected_status=pending` 明确拒绝,证据见 `phase1_worker_double_claim_guard_report.json` | 201 | - worker contract 现在已有基础前置状态保护;重复执行同一 chromaprint dry-run job 会被 `expected_status=pending` 明确拒绝,证据见 `phase1_worker_double_claim_guard_report.json` |
| 201 | - exact lane 的 `run_chromaprint_job.py` 已具备非 dry-run 写入路径;当前在 `acr_test` 的 live 结果是因为 `/workspace/downloads/...` 缺失而明确 `failed`,不是继续假装 `completed` | 202 | - exact lane 的 `run_chromaprint_job.py` 已具备非 dry-run 写入路径;当前在 `acr_test` 的 live 结果是因为 `/workspace/downloads/...` 缺失而明确 `failed`,不是继续假装 `completed` | ... | ... |
-
Please register or sign in to post a comment