fma_postdownload_ready.py
1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python3
"""Post-download automation for the real FMA small archive.
Runs extract + readiness checks once the archive is complete.
If the archive is incomplete, exits with a structured blocked result.
"""
from __future__ import annotations
import json
import subprocess
from pathlib import Path
PYTHON = "/usr/local/miniconda3/bin/python"
EXPECTED_BYTES = 7679594875
ARCHIVE = Path("data/raw/fma_small.zip")
EXTRACT_DIR = Path("data/raw/fma_small_audio")
def run_json(cmd: list[str]) -> dict:
out = subprocess.check_output(cmd, text=True)
return json.loads(out)
def main():
archive_exists = ARCHIVE.exists()
archive_size = ARCHIVE.stat().st_size if archive_exists else 0
if (not archive_exists) or archive_size < EXPECTED_BYTES:
print(json.dumps({
"status": "blocked",
"reason": "archive_not_complete",
"archive_exists": archive_exists,
"archive_size": archive_size,
"expected_bytes": EXPECTED_BYTES,
"progress_percent": round((archive_size / EXPECTED_BYTES) * 100, 4) if archive_exists else 0.0,
}, indent=2, ensure_ascii=False))
return
extract = run_json([PYTHON, "scripts/prepare_fma_archive.py", "extract"])
ready = run_json([PYTHON, "src/data/external_adapters.py", "check-local-ready", "fma", str(EXTRACT_DIR), "--eval-ratio", "0.2", "--query-duration", "8.0"])
inspect = run_json([PYTHON, "src/data/external_adapters.py", "inspect-local", "fma", str(EXTRACT_DIR), "--eval-ratio", "0.2", "--query-duration", "8.0"])
print(json.dumps({
"status": "ok",
"extract": extract,
"ready": ready,
"inspect": inspect,
}, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()