Expose smoke device control before scaling real-data runs
Constraint: Real FMA smoke is already running on CPU, but future smoke runs must be able to target GPU without manually splitting the pipeline
Rejected: Pass through raw 'auto' everywhere | run_demo/evaluate embedder paths cannot consume torch.device('auto') safely
Confidence: high
Scope-risk: narrow
Directive: Keep smoke orchestration device handling normalized at the adapter boundary unless all downstream CLIs gain native auto-device support
Tested: smoke-local --help shows --device; resolve_device('auto') returns cpu on this host; smoke-local synthetic run prints Device: cpu; manual build-index and evaluate succeed on smoke artifacts with top1=1.0 topk=1.0
Not-tested: End-to-end smoke-local completion on the long-running real FMA job and a live CUDA host path
Showing
4 changed files
with
60 additions
and
10 deletions
| ... | @@ -8,6 +8,7 @@ from typing import Dict, List | ... | @@ -8,6 +8,7 @@ from typing import Dict, List |
| 8 | import argparse | 8 | import argparse |
| 9 | import json | 9 | import json |
| 10 | import subprocess | 10 | import subprocess |
| 11 | import torch | ||
| 11 | 12 | ||
| 12 | 13 | ||
| 13 | AUDIO_EXTS = (".wav", ".mp3", ".flac", ".ogg") | 14 | AUDIO_EXTS = (".wav", ".mp3", ".flac", ".ogg") |
| ... | @@ -15,6 +16,12 @@ MIN_SMOKE_AUDIO_FILES = 2 | ... | @@ -15,6 +16,12 @@ MIN_SMOKE_AUDIO_FILES = 2 |
| 15 | MIN_SMOKE_ELIGIBLE_QUERY_FILES = 2 | 16 | MIN_SMOKE_ELIGIBLE_QUERY_FILES = 2 |
| 16 | 17 | ||
| 17 | 18 | ||
| 19 | def resolve_device(device: str) -> str: | ||
| 20 | if device == "auto": | ||
| 21 | return "cuda" if torch.cuda.is_available() else "cpu" | ||
| 22 | return device | ||
| 23 | |||
| 24 | |||
| 18 | @dataclass | 25 | @dataclass |
| 19 | class DatasetRecord: | 26 | class DatasetRecord: |
| 20 | name: str | 27 | name: str |
| ... | @@ -306,6 +313,7 @@ def smoke_local_dataset( | ... | @@ -306,6 +313,7 @@ def smoke_local_dataset( |
| 306 | seed: int, | 313 | seed: int, |
| 307 | train_epochs: int, | 314 | train_epochs: int, |
| 308 | batch_size: int, | 315 | batch_size: int, |
| 316 | device: str, | ||
| 309 | ) -> Dict: | 317 | ) -> Dict: |
| 310 | readiness = assess_local_dataset_ready( | 318 | readiness = assess_local_dataset_ready( |
| 311 | dataset, | 319 | dataset, |
| ... | @@ -321,6 +329,7 @@ def smoke_local_dataset( | ... | @@ -321,6 +329,7 @@ def smoke_local_dataset( |
| 321 | }, indent=2, ensure_ascii=False)) | 329 | }, indent=2, ensure_ascii=False)) |
| 322 | 330 | ||
| 323 | adapter = ADAPTERS[dataset] | 331 | adapter = ADAPTERS[dataset] |
| 332 | resolved_device = resolve_device(device) | ||
| 324 | inspect_summary = readiness["inspect"] | 333 | inspect_summary = readiness["inspect"] |
| 325 | prepare_summary = adapter.prepare_local_audio( | 334 | prepare_summary = adapter.prepare_local_audio( |
| 326 | input_dir, | 335 | input_dir, |
| ... | @@ -342,7 +351,7 @@ def smoke_local_dataset( | ... | @@ -342,7 +351,7 @@ def smoke_local_dataset( |
| 342 | "train.py", | 351 | "train.py", |
| 343 | "--data", str(manifests_dir), | 352 | "--data", str(manifests_dir), |
| 344 | "--output", str(model_dir), | 353 | "--output", str(model_dir), |
| 345 | "--device", "cpu", | 354 | "--device", resolved_device, |
| 346 | "--epochs", str(train_epochs), | 355 | "--epochs", str(train_epochs), |
| 347 | "--batch-size", str(batch_size), | 356 | "--batch-size", str(batch_size), |
| 348 | ], check=True) | 357 | ], check=True) |
| ... | @@ -354,7 +363,7 @@ def smoke_local_dataset( | ... | @@ -354,7 +363,7 @@ def smoke_local_dataset( |
| 354 | "--data", str(manifests_dir), | 363 | "--data", str(manifests_dir), |
| 355 | "--model", str(model_dir / "best_model.pt"), | 364 | "--model", str(model_dir / "best_model.pt"), |
| 356 | "--output", str(index_dir), | 365 | "--output", str(index_dir), |
| 357 | "--device", "cpu", | 366 | "--device", resolved_device, |
| 358 | ], check=True) | 367 | ], check=True) |
| 359 | 368 | ||
| 360 | report_dir.mkdir(parents=True, exist_ok=True) | 369 | report_dir.mkdir(parents=True, exist_ok=True) |
| ... | @@ -366,7 +375,7 @@ def smoke_local_dataset( | ... | @@ -366,7 +375,7 @@ def smoke_local_dataset( |
| 366 | "--model", str(model_dir / "best_model.pt"), | 375 | "--model", str(model_dir / "best_model.pt"), |
| 367 | "--index-prefix", str(index_dir / "reference"), | 376 | "--index-prefix", str(index_dir / "reference"), |
| 368 | "--split", "test", | 377 | "--split", "test", |
| 369 | "--device", "cpu", | 378 | "--device", resolved_device, |
| 370 | "--fast-eval", | 379 | "--fast-eval", |
| 371 | "--output-json", str(eval_json), | 380 | "--output-json", str(eval_json), |
| 372 | ], check=True) | 381 | ], check=True) |
| ... | @@ -377,6 +386,8 @@ def smoke_local_dataset( | ... | @@ -377,6 +386,8 @@ def smoke_local_dataset( |
| 377 | "run": { | 386 | "run": { |
| 378 | "train_epochs": train_epochs, | 387 | "train_epochs": train_epochs, |
| 379 | "batch_size": batch_size, | 388 | "batch_size": batch_size, |
| 389 | "requested_device": device, | ||
| 390 | "resolved_device": resolved_device, | ||
| 380 | }, | 391 | }, |
| 381 | } | 392 | } |
| 382 | report_dir.mkdir(parents=True, exist_ok=True) | 393 | report_dir.mkdir(parents=True, exist_ok=True) |
| ... | @@ -398,6 +409,8 @@ def smoke_local_dataset( | ... | @@ -398,6 +409,8 @@ def smoke_local_dataset( |
| 398 | "inspect": inspect_summary, | 409 | "inspect": inspect_summary, |
| 399 | "prepare": prepare_summary, | 410 | "prepare": prepare_summary, |
| 400 | "validate": validate_summary, | 411 | "validate": validate_summary, |
| 412 | "requested_device": device, | ||
| 413 | "resolved_device": resolved_device, | ||
| 401 | "model_dir": str(model_dir), | 414 | "model_dir": str(model_dir), |
| 402 | "index_dir": str(index_dir), | 415 | "index_dir": str(index_dir), |
| 403 | "report_dir": str(report_dir), | 416 | "report_dir": str(report_dir), |
| ... | @@ -457,6 +470,7 @@ def main(): | ... | @@ -457,6 +470,7 @@ def main(): |
| 457 | p.add_argument("--seed", type=int, default=42) | 470 | p.add_argument("--seed", type=int, default=42) |
| 458 | p.add_argument("--train-epochs", type=int, default=1) | 471 | p.add_argument("--train-epochs", type=int, default=1) |
| 459 | p.add_argument("--batch-size", type=int, default=2) | 472 | p.add_argument("--batch-size", type=int, default=2) |
| 473 | p.add_argument("--device", default="cpu") | ||
| 460 | 474 | ||
| 461 | args = parser.parse_args() | 475 | args = parser.parse_args() |
| 462 | if args.cmd == "registry": | 476 | if args.cmd == "registry": |
| ... | @@ -508,6 +522,7 @@ def main(): | ... | @@ -508,6 +522,7 @@ def main(): |
| 508 | seed=args.seed, | 522 | seed=args.seed, |
| 509 | train_epochs=args.train_epochs, | 523 | train_epochs=args.train_epochs, |
| 510 | batch_size=args.batch_size, | 524 | batch_size=args.batch_size, |
| 525 | device=args.device, | ||
| 511 | ) | 526 | ) |
| 512 | print(json.dumps(summary, indent=2, ensure_ascii=False)) | 527 | print(json.dumps(summary, indent=2, ensure_ascii=False)) |
| 513 | 528 | ... | ... |
| ... | @@ -2,6 +2,35 @@ | ... | @@ -2,6 +2,35 @@ |
| 2 | 2 | ||
| 3 | ## 2026-06-02 | 3 | ## 2026-06-02 |
| 4 | 4 | ||
| 5 | ### Stage: 让 smoke-local 支持显式设备选择并验证 auto 设备解析 | ||
| 6 | |||
| 7 | 完成项: | ||
| 8 | - 修改 `acr-engine/src/data/external_adapters.py`,为 `smoke-local` 增加 `--device` | ||
| 9 | - 增加 `auto -> cpu/cuda` 的内部解析,避免把字符串 `auto` 直接传给 embedding / eval 侧 | ||
| 10 | - 将训练、建索引、评测三个子命令统一改为透传解析后的设备 | ||
| 11 | - 在 smoke 配置摘要中记录 `requested_device` 与 `resolved_device` | ||
| 12 | - 同步更新 [open-dataset-workflow.md](./open-dataset-workflow.md) 与 [training-data-and-pgvector-guide.md](./training-data-and-pgvector-guide.md) | ||
| 13 | |||
| 14 | 验证结果: | ||
| 15 | - CLI 验证: | ||
| 16 | - `/usr/local/miniconda3/bin/python src/data/external_adapters.py smoke-local --help` 已出现 `--device DEVICE` | ||
| 17 | - 最小链路验证: | ||
| 18 | - 使用 `data/synthetic_v2/songs` 运行 `smoke-local ... --device auto` | ||
| 19 | - 训练阶段输出 `Device: cpu`,说明 `auto` 已被正确解析 | ||
| 20 | - 随后手动验证后半段命令可正常运行: | ||
| 21 | - `run_demo.py build-index --device cpu` | ||
| 22 | - `evaluate.py --device cpu` | ||
| 23 | - `evaluate.py` 返回: | ||
| 24 | - `top1=1.0` | ||
| 25 | - `topk=1.0` | ||
| 26 | - 真实 FMA 状态复检: | ||
| 27 | - 真实 FMA smoke 主进程仍存活 | ||
| 28 | - 当前子进程停留在 `run_demo.py build-index ... --device cpu` | ||
| 29 | |||
| 30 | 结论: | ||
| 31 | - 现在 `smoke-local` 已具备 GPU/CPU/auto 设备入口,可直接用于后续真实数据 GPU smoke | ||
| 32 | - 同时也暴露出新的后续任务:真实 FMA smoke 的后半段索引/产物生成仍需继续观察与优化 | ||
| 33 | |||
| 5 | 34 | ||
| 6 | ### Stage: 补齐训练数据、重叠窗口、GPU 与 FMA 数据处理文档 | 35 | ### Stage: 补齐训练数据、重叠窗口、GPU 与 FMA 数据处理文档 |
| 7 | 36 | ... | ... |
| ... | @@ -85,6 +85,7 @@ flowchart LR | ... | @@ -85,6 +85,7 @@ flowchart LR |
| 85 | 85 | ||
| 86 | ```bash | 86 | ```bash |
| 87 | /usr/local/miniconda3/bin/python src/data/external_adapters.py smoke-local fma data/raw/fma_small_audio --output-root data/external_smoke --eval-ratio 0.2 --query-duration 8.0 --train-epochs 1 --batch-size 2 | 87 | /usr/local/miniconda3/bin/python src/data/external_adapters.py smoke-local fma data/raw/fma_small_audio --output-root data/external_smoke --eval-ratio 0.2 --query-duration 8.0 --train-epochs 1 --batch-size 2 |
| 88 | /usr/local/miniconda3/bin/python src/data/external_adapters.py smoke-local fma data/raw/fma_small_audio --output-root data/external_smoke --eval-ratio 0.2 --query-duration 8.0 --train-epochs 1 --batch-size 2 --device auto | ||
| 88 | ``` | 89 | ``` |
| 89 | 90 | ||
| 90 | 真实目录放置位置可参考: | 91 | 真实目录放置位置可参考: |
| ... | @@ -128,6 +129,8 @@ flowchart LR | ... | @@ -128,6 +129,8 @@ flowchart LR |
| 128 | - `release-checklist.md` | 129 | - `release-checklist.md` |
| 129 | - `smoke-local`: | 130 | - `smoke-local`: |
| 130 | - 会一次性返回 inspect / prepare / validate / report 路径摘要 | 131 | - 会一次性返回 inspect / prepare / validate / report 路径摘要 |
| 132 | - 现在支持 `--device cpu|cuda|auto` | ||
| 133 | - `auto` 会在 smoke 内部解析成实际设备,避免把字符串 `auto` 直接传给 embedding/eval 侧 | ||
| 131 | 134 | ||
| 132 | --- | 135 | --- |
| 133 | 136 | ... | ... |
| ... | @@ -9,7 +9,7 @@ | ... | @@ -9,7 +9,7 @@ |
| 9 | 9 | ||
| 10 | 1. **当前训练输入的最小单位是“带 `song_id` 的 query 样本 + reference 资产 + manifest”**,不是直接把 3 分钟 mp3 整批扔进模型。 | 10 | 1. **当前训练输入的最小单位是“带 `song_id` 的 query 样本 + reference 资产 + manifest”**,不是直接把 3 分钟 mp3 整批扔进模型。 |
| 11 | 2. **3 分钟 mp3 当前在训练端通常不是预切全量重叠窗口,而是运行时随机裁 5s;检索端才是重叠滑窗。** | 11 | 2. **3 分钟 mp3 当前在训练端通常不是预切全量重叠窗口,而是运行时随机裁 5s;检索端才是重叠滑窗。** |
| 12 | 3. **如果有 GPU,FMA 这类真实数据训练会明显加速,当前 `train.py` 已支持 `auto/cuda`,但 `smoke-local` 现在仍硬编码为 CPU。** | 12 | 3. **如果有 GPU,FMA 这类真实数据训练会明显加速;当前 `train.py` 支持 `auto/cuda`,`smoke-local` 也已支持 `--device cpu|cuda|auto`,其中 `auto` 会在 smoke 内部解析成实际设备。** |
| 13 | 4. **FMA、MTG-Jamendo、自有 BGM/录音都应先变成统一 manifest,再做训练、评测和 pgvector 入库。** | 13 | 4. **FMA、MTG-Jamendo、自有 BGM/录音都应先变成统一 manifest,再做训练、评测和 pgvector 入库。** |
| 14 | 5. **后续你们要扩自己的数据集时,最重要的不是文件后缀,而是 `song_id / type / offset / source_dataset / split` 这些结构化字段。** | 14 | 5. **后续你们要扩自己的数据集时,最重要的不是文件后缀,而是 `song_id / type / offset / source_dataset / split` 这些结构化字段。** |
| 15 | 15 | ||
| ... | @@ -242,18 +242,21 @@ cd /workspace/acr-engine | ... | @@ -242,18 +242,21 @@ cd /workspace/acr-engine |
| 242 | |---|---| | 242 | |---|---| |
| 243 | | `train.py` | 支持 `--device auto/cuda/cpu` | | 243 | | `train.py` | 支持 `--device auto/cuda/cpu` | |
| 244 | | CUDA mixed precision | 已支持 | | 244 | | CUDA mixed precision | 已支持 | |
| 245 | | `smoke-local` | **当前硬编码 `--device cpu`** | | 245 | | `smoke-local` | 现已支持 `--device cpu|cuda|auto` | |
| 246 | | `evaluate.py` | 当前 CLI 默认 `cpu` | | 246 | | `evaluate.py` | 当前 CLI 默认 `cpu` | |
| 247 | | `run_demo.py build-index` | 当前 smoke 里也走 `cpu` | | 247 | | `run_demo.py build-index` | 当前 smoke 里也走 `cpu` | |
| 248 | 248 | ||
| 249 | ### 当前要注意的一点 | 249 | ### 当前要注意的一点 |
| 250 | 250 | ||
| 251 | `smoke-local` 虽然支持真实数据,但它现在为了稳妥把: | 251 | `smoke-local` 现在已经支持显式设备选择,但有一个实现细节必须明确: |
| 252 | - 训练 | 252 | - `train.py` 可以直接理解 `auto` |
| 253 | - 建索引 | 253 | - `run_demo.py / evaluate.py` 的 embedding 侧不能直接吃字符串 `auto` |
| 254 | - 评测 | ||
| 255 | 254 | ||
| 256 | 都固定到了 **CPU**。所以如果你想真正在本机上加速真实 FMA 训练,后续应该继续把 `smoke-local` 的 device 变成可配置项。 | 255 | 所以当前 `smoke-local` 的做法是: |
| 256 | - 对外允许传 `--device auto` | ||
| 257 | - 对内先解析成真实设备,再分发给训练 / 建索引 / 评测 | ||
| 258 | |||
| 259 | 这让真实数据 smoke 可以直接复用 GPU,而不需要手工拆成多段命令。 | ||
| 257 | 260 | ||
| 258 | --- | 261 | --- |
| 259 | 262 | ... | ... |
-
Please register or sign in to post a comment