status_snapshot.py 3.34 KB
#!/usr/bin/env python3
import argparse
import json
import subprocess
from pathlib import Path

root = Path.cwd()
workspace_root = root.parent
PYTHON = "/usr/local/miniconda3/bin/python"


def sh(cmd):
    return subprocess.check_output(cmd, shell=True, text=True).strip()


def check_dataset(dataset: str, input_dir: str):
    cmd = [
        PYTHON,
        "src/data/external_adapters.py",
        "check-local-ready",
        dataset,
        input_dir,
        "--eval-ratio", "0.2",
        "--query-duration", "8.0",
    ]
    result = subprocess.check_output(cmd, text=True)
    return json.loads(result)


def build_snapshot():
    fma_dir = "data/raw/fma_small_audio"
    jamendo_dir = "data/raw/mtg_jamendo_audio"
    fma_ready = check_dataset("fma", fma_dir)
    jamendo_ready = check_dataset("mtg_jamendo", jamendo_dir)
    return {
        "latest_commit": sh("git log --oneline -n 1"),
        "docs": {
            "readme": str((workspace_root / "docs/README.md").resolve()),
            "handoff": str((workspace_root / "docs/session-handoff.md").resolve()),
            "workflow": str((workspace_root / "docs/open-dataset-workflow.md").resolve()),
            "capability_map": str((workspace_root / "docs/current-capability-map.md").resolve()),
        },
        "drop_zones": {
            "fma": str((root / fma_dir).resolve()),
            "mtg_jamendo": str((root / jamendo_dir).resolve()),
        },
        "dataset_readiness": {
            "fma": fma_ready,
            "mtg_jamendo": jamendo_ready,
        },
        "verified_open_smoke_dirs": {
            "manifests": str((root / "data/external_ingested/synthetic_as_open_fixed/fma/manifests").resolve()),
            "reports": str((root / "reports/open-smoke-fixed/fma").resolve()),
            "one_shot_reports": str((root / "data/external_smoke/fma_reports_smoke").resolve()),
        },
        "next_commands": {
            "check_fma": f"{PYTHON} src/data/external_adapters.py check-local-ready fma data/raw/fma_small_audio --eval-ratio 0.2 --query-duration 8.0",
            "inspect_fma": f"{PYTHON} src/data/external_adapters.py inspect-local fma data/raw/fma_small_audio --eval-ratio 0.2 --query-duration 8.0",
            "smoke_fma": f"{PYTHON} src/data/external_adapters.py smoke-local fma data/raw/fma_small_audio --output-root data/external_smoke --eval-ratio 0.2 --query-duration 8.0 --train-epochs 1 --batch-size 2",
            "check_jamendo": f"{PYTHON} src/data/external_adapters.py check-local-ready mtg_jamendo data/raw/mtg_jamendo_audio --eval-ratio 0.2 --query-duration 8.0",
            "inspect_jamendo": f"{PYTHON} src/data/external_adapters.py inspect-local mtg_jamendo data/raw/mtg_jamendo_audio --eval-ratio 0.2 --query-duration 8.0",
            "smoke_jamendo": f"{PYTHON} src/data/external_adapters.py smoke-local mtg_jamendo data/raw/mtg_jamendo_audio --output-root data/external_smoke --eval-ratio 0.2 --query-duration 8.0 --train-epochs 1 --batch-size 2",
        },
    }


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--output", default=None)
    args = parser.parse_args()

    snapshot = build_snapshot()
    text = json.dumps(snapshot, ensure_ascii=False, indent=2)
    if args.output:
        out = Path(args.output)
        out.parent.mkdir(parents=True, exist_ok=True)
        out.write_text(text)
    print(text)


if __name__ == "__main__":
    main()