Commit f1795609 f179560988c8a9565d0635fcd11df0508efd034e by cnb.bofCdSsphPA

Add service and dataset-ingest scaffolding for an industrial ACR path

Prepare the prototype for industrial evolution by adding a service surface, external manifest conversion tools, and dataset adapter scaffolding with explicit licensing checkpoints.

Constraint: Commercialization requires auditable data ingress and callable service boundaries, not just offline notebooks
Rejected: Delay service and data-ingest work until after model perfection | would block end-to-end productization and ops readiness
Confidence: medium
Scope-risk: moderate
Directive: Next stages should connect real whitelisted datasets, benchmark latency, and improve hard-case acceptance/rejection quality
Tested: dataset adapter registry/describe/init commands; manifest csv-to-catalog; service health; service build_index; service recognize; train.py --dry-run
Not-tested: live uvicorn deployment; external dataset downloads; ANN-backed production indexing
1 parent 31a72045
Showing 174 changed files with 0 additions and 228 deletions
1 [
2 {
3 "name": "FMA",
4 "source_url": "https://github.com/mdeff/fma",
5 "license": "Track-dependent / metadata CC BY 4.0; verify per subset",
6 "commercial_use": "review_required",
7 "notes": "Good first realistic MIR baseline"
8 },
9 {
10 "name": "MTG-Jamendo",
11 "source_url": "https://github.com/MTG/mtg-jamendo-dataset",
12 "license": "Creative Commons source tracks; verify exact subset terms",
13 "commercial_use": "review_required",
14 "notes": "Good retrieval/tagging corpus with scripts"
15 },
16 {
17 "name": "CCMusic",
18 "source_url": "https://ccmusic-database.github.io/en/database/ccm.html",
19 "license": "varies / application may be required",
20 "commercial_use": "review_required",
21 "notes": "Useful Chinese MIR source, needs permission review"
22 },
23 {
24 "name": "ModelScope-music",
25 "source_url": "https://modelscope.cn/search?page=1&search=music&type=dataset",
26 "license": "varies by dataset",
27 "commercial_use": "deny_until_whitelisted",
28 "notes": "Discovery surface only until per-dataset review is complete"
29 }
30 ]
...\ No newline at end of file ...\ No newline at end of file
1 {
2 "dataset": "modelscope_music",
3 "root": "data/external/modelscope_music",
4 "status": "initialized",
5 "next_steps": [
6 "download raw audio according to upstream license terms",
7 "convert to catalog/query manifests",
8 "record license evidence before training"
9 ]
10 }
...\ No newline at end of file ...\ No newline at end of file
This file is too large to display.
1 {
2 "song_0000": 0,
3 "song_0001": 1,
4 "song_0002": 2,
5 "song_0003": 3,
6 "song_0004": 4,
7 "song_0005": 5,
8 "song_0006": 6,
9 "song_0007": 7,
10 "song_0008": 8,
11 "song_0009": 9,
12 "song_0010": 10,
13 "song_0011": 11,
14 "song_0012": 12,
15 "song_0013": 13,
16 "song_0014": 14,
17 "song_0015": 15
18 }
...\ No newline at end of file ...\ No newline at end of file
1 [
2 {
3 "song_id": "song_0000",
4 "audio_path": "songs/song_0000.wav",
5 "duration": 15.0,
6 "base_freq": 130.81,
7 "type": "reference"
8 },
9 {
10 "song_id": "song_0001",
11 "audio_path": "songs/song_0001.wav",
12 "duration": 15.0,
13 "base_freq": 146.83,
14 "type": "reference"
15 },
16 {
17 "song_id": "song_0002",
18 "audio_path": "songs/song_0002.wav",
19 "duration": 15.0,
20 "base_freq": 164.81,
21 "type": "reference"
22 },
23 {
24 "song_id": "song_0003",
25 "audio_path": "songs/song_0003.wav",
26 "duration": 15.0,
27 "base_freq": 174.61,
28 "type": "reference"
29 },
30 {
31 "song_id": "song_0004",
32 "audio_path": "songs/song_0004.wav",
33 "duration": 15.0,
34 "base_freq": 196.0,
35 "type": "reference"
36 },
37 {
38 "song_id": "song_0005",
39 "audio_path": "songs/song_0005.wav",
40 "duration": 15.0,
41 "base_freq": 220.0,
42 "type": "reference"
43 },
44 {
45 "song_id": "song_0006",
46 "audio_path": "songs/song_0006.wav",
47 "duration": 15.0,
48 "base_freq": 246.94,
49 "type": "reference"
50 },
51 {
52 "song_id": "song_0007",
53 "audio_path": "songs/song_0007.wav",
54 "duration": 15.0,
55 "base_freq": 261.63,
56 "type": "reference"
57 },
58 {
59 "song_id": "song_0008",
60 "audio_path": "songs/song_0008.wav",
61 "duration": 15.0,
62 "base_freq": 293.66,
63 "type": "reference"
64 },
65 {
66 "song_id": "song_0009",
67 "audio_path": "songs/song_0009.wav",
68 "duration": 15.0,
69 "base_freq": 329.63,
70 "type": "reference"
71 },
72 {
73 "song_id": "song_0010",
74 "audio_path": "songs/song_0010.wav",
75 "duration": 15.0,
76 "base_freq": 349.23,
77 "type": "reference"
78 },
79 {
80 "song_id": "song_0011",
81 "audio_path": "songs/song_0011.wav",
82 "duration": 15.0,
83 "base_freq": 392.0,
84 "type": "reference"
85 },
86 {
87 "song_id": "song_0012",
88 "audio_path": "songs/song_0012.wav",
89 "duration": 15.0,
90 "base_freq": 440.0,
91 "type": "reference"
92 },
93 {
94 "song_id": "song_0013",
95 "audio_path": "songs/song_0013.wav",
96 "duration": 15.0,
97 "base_freq": 493.88,
98 "type": "reference"
99 },
100 {
101 "song_id": "song_0014",
102 "audio_path": "songs/song_0014.wav",
103 "duration": 15.0,
104 "base_freq": 523.25,
105 "type": "reference"
106 },
107 {
108 "song_id": "song_0015",
109 "audio_path": "songs/song_0015.wav",
110 "duration": 15.0,
111 "base_freq": 587.33,
112 "type": "reference"
113 },
114 {
115 "song_id": "song_0016",
116 "audio_path": "songs/song_0016.wav",
117 "duration": 15.0,
118 "base_freq": 659.25,
119 "type": "reference"
120 },
121 {
122 "song_id": "song_0017",
123 "audio_path": "songs/song_0017.wav",
124 "duration": 15.0,
125 "base_freq": 698.46,
126 "type": "reference"
127 },
128 {
129 "song_id": "song_0018",
130 "audio_path": "songs/song_0018.wav",
131 "duration": 15.0,
132 "base_freq": 783.99,
133 "type": "reference"
134 },
135 {
136 "song_id": "song_0019",
137 "audio_path": "songs/song_0019.wav",
138 "duration": 15.0,
139 "base_freq": 880.0,
140 "type": "reference"
141 },
142 {
143 "song_id": "song_0020",
144 "audio_path": "songs/song_0020.wav",
145 "duration": 15.0,
146 "base_freq": 987.77,
147 "type": "reference"
148 },
149 {
150 "song_id": "song_0021",
151 "audio_path": "songs/song_0021.wav",
152 "duration": 15.0,
153 "base_freq": 146.8292605393491,
154 "type": "reference"
155 },
156 {
157 "song_id": "song_0022",
158 "audio_path": "songs/song_0022.wav",
159 "duration": 15.0,
160 "base_freq": 164.81110255326524,
161 "type": "reference"
162 },
163 {
164 "song_id": "song_0023",
165 "audio_path": "songs/song_0023.wav",
166 "duration": 15.0,
167 "base_freq": 184.99297018186778,
168 "type": "reference"
169 }
170 ]
...\ No newline at end of file ...\ No newline at end of file