status_snapshot.py
3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
import argparse
import json
import subprocess
from pathlib import Path
root = Path.cwd()
workspace_root = root.parent
PYTHON = "/usr/local/miniconda3/bin/python"
def sh(cmd):
return subprocess.check_output(cmd, shell=True, text=True).strip()
def check_dataset(dataset: str, input_dir: str):
cmd = [
PYTHON,
"src/data/external_adapters.py",
"check-local-ready",
dataset,
input_dir,
"--eval-ratio", "0.2",
"--query-duration", "8.0",
]
result = subprocess.check_output(cmd, text=True)
return json.loads(result)
def build_snapshot():
fma_dir = "data/raw/fma_small_audio"
jamendo_dir = "data/raw/mtg_jamendo_audio"
fma_ready = check_dataset("fma", fma_dir)
jamendo_ready = check_dataset("mtg_jamendo", jamendo_dir)
return {
"latest_commit": sh("git log --oneline -n 1"),
"docs": {
"readme": str((workspace_root / "docs/README.md").resolve()),
"handoff": str((workspace_root / "docs/session-handoff.md").resolve()),
"workflow": str((workspace_root / "docs/open-dataset-workflow.md").resolve()),
"capability_map": str((workspace_root / "docs/current-capability-map.md").resolve()),
},
"drop_zones": {
"fma": str((root / fma_dir).resolve()),
"mtg_jamendo": str((root / jamendo_dir).resolve()),
},
"dataset_readiness": {
"fma": fma_ready,
"mtg_jamendo": jamendo_ready,
},
"verified_open_smoke_dirs": {
"manifests": str((root / "data/external_ingested/synthetic_as_open_fixed/fma/manifests").resolve()),
"reports": str((root / "reports/open-smoke-fixed/fma").resolve()),
"one_shot_reports": str((root / "data/external_smoke/fma_reports_smoke").resolve()),
},
"next_commands": {
"check_fma": f"{PYTHON} src/data/external_adapters.py check-local-ready fma data/raw/fma_small_audio --eval-ratio 0.2 --query-duration 8.0",
"inspect_fma": f"{PYTHON} src/data/external_adapters.py inspect-local fma data/raw/fma_small_audio --eval-ratio 0.2 --query-duration 8.0",
"smoke_fma": f"{PYTHON} src/data/external_adapters.py smoke-local fma data/raw/fma_small_audio --output-root data/external_smoke --eval-ratio 0.2 --query-duration 8.0 --train-epochs 1 --batch-size 2",
"check_jamendo": f"{PYTHON} src/data/external_adapters.py check-local-ready mtg_jamendo data/raw/mtg_jamendo_audio --eval-ratio 0.2 --query-duration 8.0",
"inspect_jamendo": f"{PYTHON} src/data/external_adapters.py inspect-local mtg_jamendo data/raw/mtg_jamendo_audio --eval-ratio 0.2 --query-duration 8.0",
"smoke_jamendo": f"{PYTHON} src/data/external_adapters.py smoke-local mtg_jamendo data/raw/mtg_jamendo_audio --output-root data/external_smoke --eval-ratio 0.2 --query-duration 8.0 --train-epochs 1 --batch-size 2",
},
}
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--output", default=None)
args = parser.parse_args()
snapshot = build_snapshot()
text = json.dumps(snapshot, ensure_ascii=False, indent=2)
if args.output:
out = Path(args.output)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(text)
print(text)
if __name__ == "__main__":
main()