download_open_dataset.py 1018 Bytes
#!/usr/bin/env python3
"""Helpers for optional open music dataset integration."""

import argparse
import json
from pathlib import Path

DATASETS = {
    "fma_small": {
        "url": "https://github.com/mdeff/fma",
        "notes": "Use FMA small subset first; convert clips into catalog/query JSON for local experiments.",
    },
    "mtg_jamendo": {
        "url": "https://github.com/MTG/mtg-jamendo-dataset",
        "notes": "Use upstream download scripts; sample a small subset into catalog/query structure.",
    },
}


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("dataset", choices=sorted(DATASETS))
    parser.add_argument("--output", default="../docs/open-datasets.json")
    args = parser.parse_args()
    out = Path(args.output)
    out.parent.mkdir(parents=True, exist_ok=True)
    with open(out, "w") as f:
        json.dump({args.dataset: DATASETS[args.dataset]}, f, indent=2)
    print(f"Wrote dataset integration note to {out}")


if __name__ == "__main__":
    main()