Make long-running FMA archive progress legible at a glance
Constraint: Multi-session continuation gets brittle when large real-data downloads require manual byte math to estimate progress Rejected: Leave inspect output as raw archive size only | Forces every future session to recalculate completion state by hand Confidence: high Scope-risk: narrow Directive: Keep progress fields stable so handoff tooling and humans can rely on them during long archive transfers Tested: /usr/local/miniconda3/bin/python -m py_compile acr-engine/scripts/prepare_fma_archive.py; /usr/local/miniconda3/bin/python acr-engine/scripts/prepare_fma_archive.py inspect Not-tested: Completion of the full archive and downstream extraction remain pending
Showing
2 changed files
with
28 additions
and
1 deletions
| ... | @@ -9,6 +9,7 @@ import subprocess | ... | @@ -9,6 +9,7 @@ import subprocess |
| 9 | from pathlib import Path | 9 | from pathlib import Path |
| 10 | 10 | ||
| 11 | FMA_SMALL_URL = "https://modelscope.cn/datasets/pengzhendong/fma/resolve/master/fma_small.zip" | 11 | FMA_SMALL_URL = "https://modelscope.cn/datasets/pengzhendong/fma/resolve/master/fma_small.zip" |
| 12 | FMA_SMALL_BYTES = 7679594875 | ||
| 12 | ARCHIVE_PATH = Path("data/raw/fma_small.zip") | 13 | ARCHIVE_PATH = Path("data/raw/fma_small.zip") |
| 13 | EXTRACT_DIR = Path("data/raw/fma_small_audio") | 14 | EXTRACT_DIR = Path("data/raw/fma_small_audio") |
| 14 | 15 | ||
| ... | @@ -42,12 +43,17 @@ def inspect() -> dict: | ... | @@ -42,12 +43,17 @@ def inspect() -> dict: |
| 42 | num_audio = 0 | 43 | num_audio = 0 |
| 43 | if extract_exists: | 44 | if extract_exists: |
| 44 | num_audio = len([p for p in EXTRACT_DIR.rglob('*') if p.suffix.lower() in {'.mp3', '.wav', '.flac', '.ogg'}]) | 45 | num_audio = len([p for p in EXTRACT_DIR.rglob('*') if p.suffix.lower() in {'.mp3', '.wav', '.flac', '.ogg'}]) |
| 46 | archive_size = ARCHIVE_PATH.stat().st_size if archive_exists else 0 | ||
| 47 | progress_ratio = (archive_size / FMA_SMALL_BYTES) if archive_exists and FMA_SMALL_BYTES else 0.0 | ||
| 45 | return { | 48 | return { |
| 46 | "action": "inspect", | 49 | "action": "inspect", |
| 47 | "archive_url": FMA_SMALL_URL, | 50 | "archive_url": FMA_SMALL_URL, |
| 51 | "archive_bytes_expected": FMA_SMALL_BYTES, | ||
| 48 | "archive_path": str(ARCHIVE_PATH.resolve()), | 52 | "archive_path": str(ARCHIVE_PATH.resolve()), |
| 49 | "archive_exists": archive_exists, | 53 | "archive_exists": archive_exists, |
| 50 | "archive_size": ARCHIVE_PATH.stat().st_size if archive_exists else 0, | 54 | "archive_size": archive_size, |
| 55 | "archive_progress_ratio": round(progress_ratio, 6), | ||
| 56 | "archive_progress_percent": round(progress_ratio * 100, 4), | ||
| 51 | "extract_dir": str(EXTRACT_DIR.resolve()), | 57 | "extract_dir": str(EXTRACT_DIR.resolve()), |
| 52 | "extract_exists": extract_exists, | 58 | "extract_exists": extract_exists, |
| 53 | "num_audio_files": num_audio, | 59 | "num_audio_files": num_audio, | ... | ... |
| ... | @@ -230,6 +230,27 @@ | ... | @@ -230,6 +230,27 @@ |
| 230 | 230 | ||
| 231 | 231 | ||
| 232 | 232 | ||
| 233 | |||
| 234 | ### Stage: FMA 下载进度可视化 | ||
| 235 | |||
| 236 | 完成项: | ||
| 237 | - 增强 [acr-engine/scripts/prepare_fma_archive.py](../acr-engine/scripts/prepare_fma_archive.py) 的 `inspect` 输出 | ||
| 238 | - 新增: | ||
| 239 | - `archive_bytes_expected` | ||
| 240 | - `archive_progress_ratio` | ||
| 241 | - `archive_progress_percent` | ||
| 242 | |||
| 243 | 验证结果: | ||
| 244 | - `/usr/local/miniconda3/bin/python -m py_compile scripts/prepare_fma_archive.py` 成功 | ||
| 245 | - `/usr/local/miniconda3/bin/python scripts/prepare_fma_archive.py inspect` 成功 | ||
| 246 | - 当前结果: | ||
| 247 | - `archive_size=61550592` | ||
| 248 | - `archive_progress_percent=0.8015` | ||
| 249 | |||
| 250 | 结论: | ||
| 251 | - 新 session 现在不需要手工换算大包下载进度 | ||
| 252 | - 长时间 FMA 下载的交接成本进一步降低 | ||
| 253 | |||
| 233 | ### Stage: FMA 源切换到 ModelScope | 254 | ### Stage: FMA 源切换到 ModelScope |
| 234 | 255 | ||
| 235 | 完成项: | 256 | 完成项: | ... | ... |
-
Please register or sign in to post a comment