fma_postdownload_ready.py 1.74 KB
#!/usr/bin/env python3
"""Post-download automation for the real FMA small archive.

Runs extract + readiness checks once the archive is complete.
If the archive is incomplete, exits with a structured blocked result.
"""

from __future__ import annotations

import json
import subprocess
from pathlib import Path

PYTHON = "/usr/local/miniconda3/bin/python"
EXPECTED_BYTES = 7679594875
ARCHIVE = Path("data/raw/fma_small.zip")
EXTRACT_DIR = Path("data/raw/fma_small_audio")


def run_json(cmd: list[str]) -> dict:
    out = subprocess.check_output(cmd, text=True)
    return json.loads(out)


def main():
    archive_exists = ARCHIVE.exists()
    archive_size = ARCHIVE.stat().st_size if archive_exists else 0
    if (not archive_exists) or archive_size < EXPECTED_BYTES:
        print(json.dumps({
            "status": "blocked",
            "reason": "archive_not_complete",
            "archive_exists": archive_exists,
            "archive_size": archive_size,
            "expected_bytes": EXPECTED_BYTES,
            "progress_percent": round((archive_size / EXPECTED_BYTES) * 100, 4) if archive_exists else 0.0,
        }, indent=2, ensure_ascii=False))
        return

    extract = run_json([PYTHON, "scripts/prepare_fma_archive.py", "extract"])
    ready = run_json([PYTHON, "src/data/external_adapters.py", "check-local-ready", "fma", str(EXTRACT_DIR), "--eval-ratio", "0.2", "--query-duration", "8.0"])
    inspect = run_json([PYTHON, "src/data/external_adapters.py", "inspect-local", "fma", str(EXTRACT_DIR), "--eval-ratio", "0.2", "--query-duration", "8.0"])

    print(json.dumps({
        "status": "ok",
        "extract": extract,
        "ready": ready,
        "inspect": inspect,
    }, indent=2, ensure_ascii=False))


if __name__ == "__main__":
    main()