Commit 730d9b90 730d9b908e8cb220e28b581ae65b4b185fd388d3 by cnb.bofCdSsphPA

Make long-running FMA archive progress legible at a glance

Constraint: Multi-session continuation gets brittle when large real-data downloads require manual byte math to estimate progress
Rejected: Leave inspect output as raw archive size only | Forces every future session to recalculate completion state by hand
Confidence: high
Scope-risk: narrow
Directive: Keep progress fields stable so handoff tooling and humans can rely on them during long archive transfers
Tested: /usr/local/miniconda3/bin/python -m py_compile acr-engine/scripts/prepare_fma_archive.py; /usr/local/miniconda3/bin/python acr-engine/scripts/prepare_fma_archive.py inspect
Not-tested: Completion of the full archive and downstream extraction remain pending
1 parent d1d7a512
...@@ -9,6 +9,7 @@ import subprocess ...@@ -9,6 +9,7 @@ import subprocess
9 from pathlib import Path 9 from pathlib import Path
10 10
11 FMA_SMALL_URL = "https://modelscope.cn/datasets/pengzhendong/fma/resolve/master/fma_small.zip" 11 FMA_SMALL_URL = "https://modelscope.cn/datasets/pengzhendong/fma/resolve/master/fma_small.zip"
12 FMA_SMALL_BYTES = 7679594875
12 ARCHIVE_PATH = Path("data/raw/fma_small.zip") 13 ARCHIVE_PATH = Path("data/raw/fma_small.zip")
13 EXTRACT_DIR = Path("data/raw/fma_small_audio") 14 EXTRACT_DIR = Path("data/raw/fma_small_audio")
14 15
...@@ -42,12 +43,17 @@ def inspect() -> dict: ...@@ -42,12 +43,17 @@ def inspect() -> dict:
42 num_audio = 0 43 num_audio = 0
43 if extract_exists: 44 if extract_exists:
44 num_audio = len([p for p in EXTRACT_DIR.rglob('*') if p.suffix.lower() in {'.mp3', '.wav', '.flac', '.ogg'}]) 45 num_audio = len([p for p in EXTRACT_DIR.rglob('*') if p.suffix.lower() in {'.mp3', '.wav', '.flac', '.ogg'}])
46 archive_size = ARCHIVE_PATH.stat().st_size if archive_exists else 0
47 progress_ratio = (archive_size / FMA_SMALL_BYTES) if archive_exists and FMA_SMALL_BYTES else 0.0
45 return { 48 return {
46 "action": "inspect", 49 "action": "inspect",
47 "archive_url": FMA_SMALL_URL, 50 "archive_url": FMA_SMALL_URL,
51 "archive_bytes_expected": FMA_SMALL_BYTES,
48 "archive_path": str(ARCHIVE_PATH.resolve()), 52 "archive_path": str(ARCHIVE_PATH.resolve()),
49 "archive_exists": archive_exists, 53 "archive_exists": archive_exists,
50 "archive_size": ARCHIVE_PATH.stat().st_size if archive_exists else 0, 54 "archive_size": archive_size,
55 "archive_progress_ratio": round(progress_ratio, 6),
56 "archive_progress_percent": round(progress_ratio * 100, 4),
51 "extract_dir": str(EXTRACT_DIR.resolve()), 57 "extract_dir": str(EXTRACT_DIR.resolve()),
52 "extract_exists": extract_exists, 58 "extract_exists": extract_exists,
53 "num_audio_files": num_audio, 59 "num_audio_files": num_audio,
......
...@@ -230,6 +230,27 @@ ...@@ -230,6 +230,27 @@
230 230
231 231
232 232
233
234 ### Stage: FMA 下载进度可视化
235
236 完成项:
237 - 增强 [acr-engine/scripts/prepare_fma_archive.py](../acr-engine/scripts/prepare_fma_archive.py)`inspect` 输出
238 - 新增:
239 - `archive_bytes_expected`
240 - `archive_progress_ratio`
241 - `archive_progress_percent`
242
243 验证结果:
244 - `/usr/local/miniconda3/bin/python -m py_compile scripts/prepare_fma_archive.py` 成功
245 - `/usr/local/miniconda3/bin/python scripts/prepare_fma_archive.py inspect` 成功
246 - 当前结果:
247 - `archive_size=61550592`
248 - `archive_progress_percent=0.8015`
249
250 结论:
251 - 新 session 现在不需要手工换算大包下载进度
252 - 长时间 FMA 下载的交接成本进一步降低
253
233 ### Stage: FMA 源切换到 ModelScope 254 ### Stage: FMA 源切换到 ModelScope
234 255
235 完成项: 256 完成项:
......