Commit f1795609 f179560988c8a9565d0635fcd11df0508efd034e by cnb.bofCdSsphPA

Add service and dataset-ingest scaffolding for an industrial ACR path

Prepare the prototype for industrial evolution by adding a service surface, external manifest conversion tools, and dataset adapter scaffolding with explicit licensing checkpoints.

Constraint: Commercialization requires auditable data ingress and callable service boundaries, not just offline notebooks
Rejected: Delay service and data-ingest work until after model perfection | would block end-to-end productization and ops readiness
Confidence: medium
Scope-risk: moderate
Directive: Next stages should connect real whitelisted datasets, benchmark latency, and improve hard-case acceptance/rejection quality
Tested: dataset adapter registry/describe/init commands; manifest csv-to-catalog; service health; service build_index; service recognize; train.py --dry-run
Not-tested: live uvicorn deployment; external dataset downloads; ANN-backed production indexing
1 parent 31a72045
Showing 174 changed files with 0 additions and 228 deletions
[
{
"name": "FMA",
"source_url": "https://github.com/mdeff/fma",
"license": "Track-dependent / metadata CC BY 4.0; verify per subset",
"commercial_use": "review_required",
"notes": "Good first realistic MIR baseline"
},
{
"name": "MTG-Jamendo",
"source_url": "https://github.com/MTG/mtg-jamendo-dataset",
"license": "Creative Commons source tracks; verify exact subset terms",
"commercial_use": "review_required",
"notes": "Good retrieval/tagging corpus with scripts"
},
{
"name": "CCMusic",
"source_url": "https://ccmusic-database.github.io/en/database/ccm.html",
"license": "varies / application may be required",
"commercial_use": "review_required",
"notes": "Useful Chinese MIR source, needs permission review"
},
{
"name": "ModelScope-music",
"source_url": "https://modelscope.cn/search?page=1&search=music&type=dataset",
"license": "varies by dataset",
"commercial_use": "deny_until_whitelisted",
"notes": "Discovery surface only until per-dataset review is complete"
}
]
\ No newline at end of file
{
"dataset": "modelscope_music",
"root": "data/external/modelscope_music",
"status": "initialized",
"next_steps": [
"download raw audio according to upstream license terms",
"convert to catalog/query manifests",
"record license evidence before training"
]
}
\ No newline at end of file
This file is too large to display.
{
"song_0000": 0,
"song_0001": 1,
"song_0002": 2,
"song_0003": 3,
"song_0004": 4,
"song_0005": 5,
"song_0006": 6,
"song_0007": 7,
"song_0008": 8,
"song_0009": 9,
"song_0010": 10,
"song_0011": 11,
"song_0012": 12,
"song_0013": 13,
"song_0014": 14,
"song_0015": 15
}
\ No newline at end of file
[
{
"song_id": "song_0000",
"audio_path": "songs/song_0000.wav",
"duration": 15.0,
"base_freq": 130.81,
"type": "reference"
},
{
"song_id": "song_0001",
"audio_path": "songs/song_0001.wav",
"duration": 15.0,
"base_freq": 146.83,
"type": "reference"
},
{
"song_id": "song_0002",
"audio_path": "songs/song_0002.wav",
"duration": 15.0,
"base_freq": 164.81,
"type": "reference"
},
{
"song_id": "song_0003",
"audio_path": "songs/song_0003.wav",
"duration": 15.0,
"base_freq": 174.61,
"type": "reference"
},
{
"song_id": "song_0004",
"audio_path": "songs/song_0004.wav",
"duration": 15.0,
"base_freq": 196.0,
"type": "reference"
},
{
"song_id": "song_0005",
"audio_path": "songs/song_0005.wav",
"duration": 15.0,
"base_freq": 220.0,
"type": "reference"
},
{
"song_id": "song_0006",
"audio_path": "songs/song_0006.wav",
"duration": 15.0,
"base_freq": 246.94,
"type": "reference"
},
{
"song_id": "song_0007",
"audio_path": "songs/song_0007.wav",
"duration": 15.0,
"base_freq": 261.63,
"type": "reference"
},
{
"song_id": "song_0008",
"audio_path": "songs/song_0008.wav",
"duration": 15.0,
"base_freq": 293.66,
"type": "reference"
},
{
"song_id": "song_0009",
"audio_path": "songs/song_0009.wav",
"duration": 15.0,
"base_freq": 329.63,
"type": "reference"
},
{
"song_id": "song_0010",
"audio_path": "songs/song_0010.wav",
"duration": 15.0,
"base_freq": 349.23,
"type": "reference"
},
{
"song_id": "song_0011",
"audio_path": "songs/song_0011.wav",
"duration": 15.0,
"base_freq": 392.0,
"type": "reference"
},
{
"song_id": "song_0012",
"audio_path": "songs/song_0012.wav",
"duration": 15.0,
"base_freq": 440.0,
"type": "reference"
},
{
"song_id": "song_0013",
"audio_path": "songs/song_0013.wav",
"duration": 15.0,
"base_freq": 493.88,
"type": "reference"
},
{
"song_id": "song_0014",
"audio_path": "songs/song_0014.wav",
"duration": 15.0,
"base_freq": 523.25,
"type": "reference"
},
{
"song_id": "song_0015",
"audio_path": "songs/song_0015.wav",
"duration": 15.0,
"base_freq": 587.33,
"type": "reference"
},
{
"song_id": "song_0016",
"audio_path": "songs/song_0016.wav",
"duration": 15.0,
"base_freq": 659.25,
"type": "reference"
},
{
"song_id": "song_0017",
"audio_path": "songs/song_0017.wav",
"duration": 15.0,
"base_freq": 698.46,
"type": "reference"
},
{
"song_id": "song_0018",
"audio_path": "songs/song_0018.wav",
"duration": 15.0,
"base_freq": 783.99,
"type": "reference"
},
{
"song_id": "song_0019",
"audio_path": "songs/song_0019.wav",
"duration": 15.0,
"base_freq": 880.0,
"type": "reference"
},
{
"song_id": "song_0020",
"audio_path": "songs/song_0020.wav",
"duration": 15.0,
"base_freq": 987.77,
"type": "reference"
},
{
"song_id": "song_0021",
"audio_path": "songs/song_0021.wav",
"duration": 15.0,
"base_freq": 146.8292605393491,
"type": "reference"
},
{
"song_id": "song_0022",
"audio_path": "songs/song_0022.wav",
"duration": 15.0,
"base_freq": 164.81110255326524,
"type": "reference"
},
{
"song_id": "song_0023",
"audio_path": "songs/song_0023.wav",
"duration": 15.0,
"base_freq": 184.99297018186778,
"type": "reference"
}
]
\ No newline at end of file