Commit b766c74e b766c74e9ff1c9be3223d226d9ef4e0da9a7cb03 by cnb.bofCdSsphPA

Make open-dataset manifests trainable end to end

Constraint: Open dataset onboarding was incomplete until generated manifests could enter train.py without manual path fixes
Rejected: Keep manifests as ingestion-only artifacts | Fails the actual training handoff and leaves the workflow broken
Confidence: high
Scope-risk: moderate
Directive: Preserve the self-contained output layout (audio plus manifests) for all future external dataset imports
Tested: /usr/local/miniconda3/bin/python src/data/external_adapters.py prepare-local fma data/synthetic_v2/songs --output-root data/external_ingested/synthetic_as_open_fixed --eval-ratio 0.2 --query-duration 5.0; /usr/local/miniconda3/bin/python src/data/external_adapters.py validate-local fma data/external_ingested/synthetic_as_open_fixed/fma/manifests; /usr/local/miniconda3/bin/python train.py --data data/external_ingested/synthetic_as_open_fixed/fma/manifests --output data/models_open_smoke_fixed --device cpu --epochs 1 --batch-size 2 --dry-run; /usr/local/miniconda3/bin/python -m py_compile src/data/dataset.py train.py src/data/manifest_tools.py src/data/external_adapters.py
Not-tested: Full multi-epoch training and index/eval loop on a real downloaded FMA or MTG-Jamendo corpus
1 parent fa231444
Showing 32 changed files with 772 additions and 9 deletions
1 [
2 {
3 "song_id": "fma_00000",
4 "audio_path": "audio/fma_00000.wav",
5 "duration": 15.0,
6 "type": "reference",
7 "source_dataset": "fma"
8 },
9 {
10 "song_id": "fma_00001",
11 "audio_path": "audio/fma_00001.wav",
12 "duration": 15.0,
13 "type": "reference",
14 "source_dataset": "fma"
15 },
16 {
17 "song_id": "fma_00002",
18 "audio_path": "audio/fma_00002.wav",
19 "duration": 15.0,
20 "type": "reference",
21 "source_dataset": "fma"
22 },
23 {
24 "song_id": "fma_00003",
25 "audio_path": "audio/fma_00003.wav",
26 "duration": 15.0,
27 "type": "reference",
28 "source_dataset": "fma"
29 },
30 {
31 "song_id": "fma_00004",
32 "audio_path": "audio/fma_00004.wav",
33 "duration": 15.0,
34 "type": "reference",
35 "source_dataset": "fma"
36 },
37 {
38 "song_id": "fma_00005",
39 "audio_path": "audio/fma_00005.wav",
40 "duration": 15.0,
41 "type": "reference",
42 "source_dataset": "fma"
43 },
44 {
45 "song_id": "fma_00006",
46 "audio_path": "audio/fma_00006.wav",
47 "duration": 15.0,
48 "type": "reference",
49 "source_dataset": "fma"
50 },
51 {
52 "song_id": "fma_00007",
53 "audio_path": "audio/fma_00007.wav",
54 "duration": 15.0,
55 "type": "reference",
56 "source_dataset": "fma"
57 },
58 {
59 "song_id": "fma_00008",
60 "audio_path": "audio/fma_00008.wav",
61 "duration": 15.0,
62 "type": "reference",
63 "source_dataset": "fma"
64 },
65 {
66 "song_id": "fma_00009",
67 "audio_path": "audio/fma_00009.wav",
68 "duration": 15.0,
69 "type": "reference",
70 "source_dataset": "fma"
71 },
72 {
73 "song_id": "fma_00010",
74 "audio_path": "audio/fma_00010.wav",
75 "duration": 15.0,
76 "type": "reference",
77 "source_dataset": "fma"
78 },
79 {
80 "song_id": "fma_00011",
81 "audio_path": "audio/fma_00011.wav",
82 "duration": 15.0,
83 "type": "reference",
84 "source_dataset": "fma"
85 },
86 {
87 "song_id": "fma_00012",
88 "audio_path": "audio/fma_00012.wav",
89 "duration": 15.0,
90 "type": "reference",
91 "source_dataset": "fma"
92 },
93 {
94 "song_id": "fma_00013",
95 "audio_path": "audio/fma_00013.wav",
96 "duration": 15.0,
97 "type": "reference",
98 "source_dataset": "fma"
99 },
100 {
101 "song_id": "fma_00014",
102 "audio_path": "audio/fma_00014.wav",
103 "duration": 15.0,
104 "type": "reference",
105 "source_dataset": "fma"
106 },
107 {
108 "song_id": "fma_00015",
109 "audio_path": "audio/fma_00015.wav",
110 "duration": 15.0,
111 "type": "reference",
112 "source_dataset": "fma"
113 },
114 {
115 "song_id": "fma_00016",
116 "audio_path": "audio/fma_00016.wav",
117 "duration": 15.0,
118 "type": "reference",
119 "source_dataset": "fma"
120 },
121 {
122 "song_id": "fma_00017",
123 "audio_path": "audio/fma_00017.wav",
124 "duration": 15.0,
125 "type": "reference",
126 "source_dataset": "fma"
127 },
128 {
129 "song_id": "fma_00018",
130 "audio_path": "audio/fma_00018.wav",
131 "duration": 15.0,
132 "type": "reference",
133 "source_dataset": "fma"
134 },
135 {
136 "song_id": "fma_00019",
137 "audio_path": "audio/fma_00019.wav",
138 "duration": 15.0,
139 "type": "reference",
140 "source_dataset": "fma"
141 },
142 {
143 "song_id": "fma_00020",
144 "audio_path": "audio/fma_00020.wav",
145 "duration": 15.0,
146 "type": "reference",
147 "source_dataset": "fma"
148 },
149 {
150 "song_id": "fma_00021",
151 "audio_path": "audio/fma_00021.wav",
152 "duration": 15.0,
153 "type": "reference",
154 "source_dataset": "fma"
155 },
156 {
157 "song_id": "fma_00022",
158 "audio_path": "audio/fma_00022.wav",
159 "duration": 15.0,
160 "type": "reference",
161 "source_dataset": "fma"
162 },
163 {
164 "song_id": "fma_00023",
165 "audio_path": "audio/fma_00023.wav",
166 "duration": 15.0,
167 "type": "reference",
168 "source_dataset": "fma"
169 }
170 ]
...\ No newline at end of file ...\ No newline at end of file
1 [
2 {
3 "song_id": "fma_00000",
4 "audio_path": "audio/fma_00000.wav",
5 "duration": 5.0,
6 "type": "clean",
7 "offset": 6.394,
8 "segment_type": "external_query",
9 "source_dataset": "fma"
10 },
11 {
12 "song_id": "fma_00003",
13 "audio_path": "audio/fma_00003.wav",
14 "duration": 5.0,
15 "type": "clean",
16 "offset": 8.922,
17 "segment_type": "external_query",
18 "source_dataset": "fma"
19 },
20 {
21 "song_id": "fma_00004",
22 "audio_path": "audio/fma_00004.wav",
23 "duration": 5.0,
24 "type": "clean",
25 "offset": 4.219,
26 "segment_type": "external_query",
27 "source_dataset": "fma"
28 },
29 {
30 "song_id": "fma_00006",
31 "audio_path": "audio/fma_00006.wav",
32 "duration": 5.0,
33 "type": "clean",
34 "offset": 0.265,
35 "segment_type": "external_query",
36 "source_dataset": "fma"
37 },
38 {
39 "song_id": "fma_00009",
40 "audio_path": "audio/fma_00009.wav",
41 "duration": 5.0,
42 "type": "clean",
43 "offset": 8.094,
44 "segment_type": "external_query",
45 "source_dataset": "fma"
46 },
47 {
48 "song_id": "fma_00011",
49 "audio_path": "audio/fma_00011.wav",
50 "duration": 5.0,
51 "type": "clean",
52 "offset": 3.403,
53 "segment_type": "external_query",
54 "source_dataset": "fma"
55 },
56 {
57 "song_id": "fma_00013",
58 "audio_path": "audio/fma_00013.wav",
59 "duration": 5.0,
60 "type": "clean",
61 "offset": 0.927,
62 "segment_type": "external_query",
63 "source_dataset": "fma"
64 },
65 {
66 "song_id": "fma_00020",
67 "audio_path": "audio/fma_00020.wav",
68 "duration": 5.0,
69 "type": "clean",
70 "offset": 7.046,
71 "segment_type": "external_query",
72 "source_dataset": "fma"
73 },
74 {
75 "song_id": "fma_00000",
76 "audio_path": "audio/fma_00000.wav",
77 "duration": 15.0,
78 "type": "reference",
79 "source_dataset": "fma"
80 },
81 {
82 "song_id": "fma_00001",
83 "audio_path": "audio/fma_00001.wav",
84 "duration": 15.0,
85 "type": "reference",
86 "source_dataset": "fma"
87 },
88 {
89 "song_id": "fma_00002",
90 "audio_path": "audio/fma_00002.wav",
91 "duration": 15.0,
92 "type": "reference",
93 "source_dataset": "fma"
94 },
95 {
96 "song_id": "fma_00003",
97 "audio_path": "audio/fma_00003.wav",
98 "duration": 15.0,
99 "type": "reference",
100 "source_dataset": "fma"
101 },
102 {
103 "song_id": "fma_00004",
104 "audio_path": "audio/fma_00004.wav",
105 "duration": 15.0,
106 "type": "reference",
107 "source_dataset": "fma"
108 },
109 {
110 "song_id": "fma_00005",
111 "audio_path": "audio/fma_00005.wav",
112 "duration": 15.0,
113 "type": "reference",
114 "source_dataset": "fma"
115 },
116 {
117 "song_id": "fma_00006",
118 "audio_path": "audio/fma_00006.wav",
119 "duration": 15.0,
120 "type": "reference",
121 "source_dataset": "fma"
122 },
123 {
124 "song_id": "fma_00007",
125 "audio_path": "audio/fma_00007.wav",
126 "duration": 15.0,
127 "type": "reference",
128 "source_dataset": "fma"
129 },
130 {
131 "song_id": "fma_00008",
132 "audio_path": "audio/fma_00008.wav",
133 "duration": 15.0,
134 "type": "reference",
135 "source_dataset": "fma"
136 },
137 {
138 "song_id": "fma_00009",
139 "audio_path": "audio/fma_00009.wav",
140 "duration": 15.0,
141 "type": "reference",
142 "source_dataset": "fma"
143 },
144 {
145 "song_id": "fma_00010",
146 "audio_path": "audio/fma_00010.wav",
147 "duration": 15.0,
148 "type": "reference",
149 "source_dataset": "fma"
150 },
151 {
152 "song_id": "fma_00011",
153 "audio_path": "audio/fma_00011.wav",
154 "duration": 15.0,
155 "type": "reference",
156 "source_dataset": "fma"
157 },
158 {
159 "song_id": "fma_00012",
160 "audio_path": "audio/fma_00012.wav",
161 "duration": 15.0,
162 "type": "reference",
163 "source_dataset": "fma"
164 },
165 {
166 "song_id": "fma_00013",
167 "audio_path": "audio/fma_00013.wav",
168 "duration": 15.0,
169 "type": "reference",
170 "source_dataset": "fma"
171 },
172 {
173 "song_id": "fma_00014",
174 "audio_path": "audio/fma_00014.wav",
175 "duration": 15.0,
176 "type": "reference",
177 "source_dataset": "fma"
178 },
179 {
180 "song_id": "fma_00015",
181 "audio_path": "audio/fma_00015.wav",
182 "duration": 15.0,
183 "type": "reference",
184 "source_dataset": "fma"
185 },
186 {
187 "song_id": "fma_00016",
188 "audio_path": "audio/fma_00016.wav",
189 "duration": 15.0,
190 "type": "reference",
191 "source_dataset": "fma"
192 },
193 {
194 "song_id": "fma_00017",
195 "audio_path": "audio/fma_00017.wav",
196 "duration": 15.0,
197 "type": "reference",
198 "source_dataset": "fma"
199 },
200 {
201 "song_id": "fma_00018",
202 "audio_path": "audio/fma_00018.wav",
203 "duration": 15.0,
204 "type": "reference",
205 "source_dataset": "fma"
206 },
207 {
208 "song_id": "fma_00019",
209 "audio_path": "audio/fma_00019.wav",
210 "duration": 15.0,
211 "type": "reference",
212 "source_dataset": "fma"
213 },
214 {
215 "song_id": "fma_00020",
216 "audio_path": "audio/fma_00020.wav",
217 "duration": 15.0,
218 "type": "reference",
219 "source_dataset": "fma"
220 },
221 {
222 "song_id": "fma_00021",
223 "audio_path": "audio/fma_00021.wav",
224 "duration": 15.0,
225 "type": "reference",
226 "source_dataset": "fma"
227 },
228 {
229 "song_id": "fma_00022",
230 "audio_path": "audio/fma_00022.wav",
231 "duration": 15.0,
232 "type": "reference",
233 "source_dataset": "fma"
234 },
235 {
236 "song_id": "fma_00023",
237 "audio_path": "audio/fma_00023.wav",
238 "duration": 15.0,
239 "type": "reference",
240 "source_dataset": "fma"
241 }
242 ]
...\ No newline at end of file ...\ No newline at end of file
1 [
2 {
3 "song_id": "fma_00001",
4 "audio_path": "audio/fma_00001.wav",
5 "duration": 5.0,
6 "type": "clean",
7 "offset": 2.75,
8 "segment_type": "external_query",
9 "source_dataset": "fma"
10 },
11 {
12 "song_id": "fma_00002",
13 "audio_path": "audio/fma_00002.wav",
14 "duration": 5.0,
15 "type": "clean",
16 "offset": 7.365,
17 "segment_type": "external_query",
18 "source_dataset": "fma"
19 },
20 {
21 "song_id": "fma_00005",
22 "audio_path": "audio/fma_00005.wav",
23 "duration": 5.0,
24 "type": "clean",
25 "offset": 2.186,
26 "segment_type": "external_query",
27 "source_dataset": "fma"
28 },
29 {
30 "song_id": "fma_00007",
31 "audio_path": "audio/fma_00007.wav",
32 "duration": 5.0,
33 "type": "clean",
34 "offset": 6.499,
35 "segment_type": "external_query",
36 "source_dataset": "fma"
37 },
38 {
39 "song_id": "fma_00008",
40 "audio_path": "audio/fma_00008.wav",
41 "duration": 5.0,
42 "type": "clean",
43 "offset": 2.204,
44 "segment_type": "external_query",
45 "source_dataset": "fma"
46 },
47 {
48 "song_id": "fma_00010",
49 "audio_path": "audio/fma_00010.wav",
50 "duration": 5.0,
51 "type": "clean",
52 "offset": 8.058,
53 "segment_type": "external_query",
54 "source_dataset": "fma"
55 },
56 {
57 "song_id": "fma_00012",
58 "audio_path": "audio/fma_00012.wav",
59 "duration": 5.0,
60 "type": "clean",
61 "offset": 9.572,
62 "segment_type": "external_query",
63 "source_dataset": "fma"
64 },
65 {
66 "song_id": "fma_00014",
67 "audio_path": "audio/fma_00014.wav",
68 "duration": 5.0,
69 "type": "clean",
70 "offset": 8.475,
71 "segment_type": "external_query",
72 "source_dataset": "fma"
73 },
74 {
75 "song_id": "fma_00015",
76 "audio_path": "audio/fma_00015.wav",
77 "duration": 5.0,
78 "type": "clean",
79 "offset": 8.071,
80 "segment_type": "external_query",
81 "source_dataset": "fma"
82 },
83 {
84 "song_id": "fma_00016",
85 "audio_path": "audio/fma_00016.wav",
86 "duration": 5.0,
87 "type": "clean",
88 "offset": 5.362,
89 "segment_type": "external_query",
90 "source_dataset": "fma"
91 },
92 {
93 "song_id": "fma_00017",
94 "audio_path": "audio/fma_00017.wav",
95 "duration": 5.0,
96 "type": "clean",
97 "offset": 3.785,
98 "segment_type": "external_query",
99 "source_dataset": "fma"
100 },
101 {
102 "song_id": "fma_00018",
103 "audio_path": "audio/fma_00018.wav",
104 "duration": 5.0,
105 "type": "clean",
106 "offset": 8.294,
107 "segment_type": "external_query",
108 "source_dataset": "fma"
109 },
110 {
111 "song_id": "fma_00019",
112 "audio_path": "audio/fma_00019.wav",
113 "duration": 5.0,
114 "type": "clean",
115 "offset": 8.617,
116 "segment_type": "external_query",
117 "source_dataset": "fma"
118 },
119 {
120 "song_id": "fma_00021",
121 "audio_path": "audio/fma_00021.wav",
122 "duration": 5.0,
123 "type": "clean",
124 "offset": 2.279,
125 "segment_type": "external_query",
126 "source_dataset": "fma"
127 },
128 {
129 "song_id": "fma_00022",
130 "audio_path": "audio/fma_00022.wav",
131 "duration": 5.0,
132 "type": "clean",
133 "offset": 0.798,
134 "segment_type": "external_query",
135 "source_dataset": "fma"
136 },
137 {
138 "song_id": "fma_00023",
139 "audio_path": "audio/fma_00023.wav",
140 "duration": 5.0,
141 "type": "clean",
142 "offset": 1.01,
143 "segment_type": "external_query",
144 "source_dataset": "fma"
145 },
146 {
147 "song_id": "fma_00000",
148 "audio_path": "audio/fma_00000.wav",
149 "duration": 15.0,
150 "type": "reference",
151 "source_dataset": "fma"
152 },
153 {
154 "song_id": "fma_00001",
155 "audio_path": "audio/fma_00001.wav",
156 "duration": 15.0,
157 "type": "reference",
158 "source_dataset": "fma"
159 },
160 {
161 "song_id": "fma_00002",
162 "audio_path": "audio/fma_00002.wav",
163 "duration": 15.0,
164 "type": "reference",
165 "source_dataset": "fma"
166 },
167 {
168 "song_id": "fma_00003",
169 "audio_path": "audio/fma_00003.wav",
170 "duration": 15.0,
171 "type": "reference",
172 "source_dataset": "fma"
173 },
174 {
175 "song_id": "fma_00004",
176 "audio_path": "audio/fma_00004.wav",
177 "duration": 15.0,
178 "type": "reference",
179 "source_dataset": "fma"
180 },
181 {
182 "song_id": "fma_00005",
183 "audio_path": "audio/fma_00005.wav",
184 "duration": 15.0,
185 "type": "reference",
186 "source_dataset": "fma"
187 },
188 {
189 "song_id": "fma_00006",
190 "audio_path": "audio/fma_00006.wav",
191 "duration": 15.0,
192 "type": "reference",
193 "source_dataset": "fma"
194 },
195 {
196 "song_id": "fma_00007",
197 "audio_path": "audio/fma_00007.wav",
198 "duration": 15.0,
199 "type": "reference",
200 "source_dataset": "fma"
201 },
202 {
203 "song_id": "fma_00008",
204 "audio_path": "audio/fma_00008.wav",
205 "duration": 15.0,
206 "type": "reference",
207 "source_dataset": "fma"
208 },
209 {
210 "song_id": "fma_00009",
211 "audio_path": "audio/fma_00009.wav",
212 "duration": 15.0,
213 "type": "reference",
214 "source_dataset": "fma"
215 },
216 {
217 "song_id": "fma_00010",
218 "audio_path": "audio/fma_00010.wav",
219 "duration": 15.0,
220 "type": "reference",
221 "source_dataset": "fma"
222 },
223 {
224 "song_id": "fma_00011",
225 "audio_path": "audio/fma_00011.wav",
226 "duration": 15.0,
227 "type": "reference",
228 "source_dataset": "fma"
229 },
230 {
231 "song_id": "fma_00012",
232 "audio_path": "audio/fma_00012.wav",
233 "duration": 15.0,
234 "type": "reference",
235 "source_dataset": "fma"
236 },
237 {
238 "song_id": "fma_00013",
239 "audio_path": "audio/fma_00013.wav",
240 "duration": 15.0,
241 "type": "reference",
242 "source_dataset": "fma"
243 },
244 {
245 "song_id": "fma_00014",
246 "audio_path": "audio/fma_00014.wav",
247 "duration": 15.0,
248 "type": "reference",
249 "source_dataset": "fma"
250 },
251 {
252 "song_id": "fma_00015",
253 "audio_path": "audio/fma_00015.wav",
254 "duration": 15.0,
255 "type": "reference",
256 "source_dataset": "fma"
257 },
258 {
259 "song_id": "fma_00016",
260 "audio_path": "audio/fma_00016.wav",
261 "duration": 15.0,
262 "type": "reference",
263 "source_dataset": "fma"
264 },
265 {
266 "song_id": "fma_00017",
267 "audio_path": "audio/fma_00017.wav",
268 "duration": 15.0,
269 "type": "reference",
270 "source_dataset": "fma"
271 },
272 {
273 "song_id": "fma_00018",
274 "audio_path": "audio/fma_00018.wav",
275 "duration": 15.0,
276 "type": "reference",
277 "source_dataset": "fma"
278 },
279 {
280 "song_id": "fma_00019",
281 "audio_path": "audio/fma_00019.wav",
282 "duration": 15.0,
283 "type": "reference",
284 "source_dataset": "fma"
285 },
286 {
287 "song_id": "fma_00020",
288 "audio_path": "audio/fma_00020.wav",
289 "duration": 15.0,
290 "type": "reference",
291 "source_dataset": "fma"
292 },
293 {
294 "song_id": "fma_00021",
295 "audio_path": "audio/fma_00021.wav",
296 "duration": 15.0,
297 "type": "reference",
298 "source_dataset": "fma"
299 },
300 {
301 "song_id": "fma_00022",
302 "audio_path": "audio/fma_00022.wav",
303 "duration": 15.0,
304 "type": "reference",
305 "source_dataset": "fma"
306 },
307 {
308 "song_id": "fma_00023",
309 "audio_path": "audio/fma_00023.wav",
310 "duration": 15.0,
311 "type": "reference",
312 "source_dataset": "fma"
313 }
314 ]
...\ No newline at end of file ...\ No newline at end of file
1 []
...\ No newline at end of file ...\ No newline at end of file
...@@ -32,6 +32,7 @@ class ACRDataset(Dataset): ...@@ -32,6 +32,7 @@ class ACRDataset(Dataset):
32 self.augment = augment 32 self.augment = augment
33 self.n_crops = n_crops_per_song 33 self.n_crops = n_crops_per_song
34 self.data_dir = Path(data_dir) 34 self.data_dir = Path(data_dir)
35 self.asset_root = self.data_dir.parent if self.data_dir.name == "manifests" else self.data_dir
35 36
36 meta_path = self.data_dir / f"{split}.json" 37 meta_path = self.data_dir / f"{split}.json"
37 with open(meta_path) as f: 38 with open(meta_path) as f:
...@@ -41,7 +42,7 @@ class ACRDataset(Dataset): ...@@ -41,7 +42,7 @@ class ACRDataset(Dataset):
41 for item in self.metadata: 42 for item in self.metadata:
42 if references_only and item.get("type") != "reference": 43 if references_only and item.get("type") != "reference":
43 continue 44 continue
44 song_path = self.data_dir / item["audio_path"] 45 song_path = self.asset_root / item["audio_path"]
45 if song_path.exists(): 46 if song_path.exists():
46 self.samples.append(item) 47 self.samples.append(item)
47 48
...@@ -75,7 +76,7 @@ class ACRDataset(Dataset): ...@@ -75,7 +76,7 @@ class ACRDataset(Dataset):
75 max_offset = max(0, duration - 5.0) 76 max_offset = max(0, duration - 5.0)
76 offset = random.uniform(0, max_offset) if max_offset > 0 else 0 77 offset = random.uniform(0, max_offset) if max_offset > 0 else 0
77 78
78 audio_path = self.data_dir / sample["audio_path"] 79 audio_path = self.asset_root / sample["audio_path"]
79 y = self._load_segment(str(audio_path), offset, 5.0) 80 y = self._load_segment(str(audio_path), offset, 5.0)
80 81
81 if self.augment and sample.get("type") != "reference": 82 if self.augment and sample.get("type") != "reference":
...@@ -113,6 +114,7 @@ class ACRTestDataset(Dataset): ...@@ -113,6 +114,7 @@ class ACRTestDataset(Dataset):
113 self.n_fft = n_fft 114 self.n_fft = n_fft
114 self.hop_length = hop_length 115 self.hop_length = hop_length
115 self.data_dir = Path(data_dir) 116 self.data_dir = Path(data_dir)
117 self.asset_root = self.data_dir.parent if self.data_dir.name == "manifests" else self.data_dir
116 118
117 meta_path = self.data_dir / f"{split}.json" 119 meta_path = self.data_dir / f"{split}.json"
118 with open(meta_path) as f: 120 with open(meta_path) as f:
...@@ -120,7 +122,7 @@ class ACRTestDataset(Dataset): ...@@ -120,7 +122,7 @@ class ACRTestDataset(Dataset):
120 122
121 self.samples = [] 123 self.samples = []
122 for item in self.metadata: 124 for item in self.metadata:
123 p = self.data_dir / item["audio_path"] 125 p = self.asset_root / item["audio_path"]
124 if p.exists(): 126 if p.exists():
125 self.samples.append(item) 127 self.samples.append(item)
126 128
...@@ -132,7 +134,7 @@ class ACRTestDataset(Dataset): ...@@ -132,7 +134,7 @@ class ACRTestDataset(Dataset):
132 134
133 def __getitem__(self, idx): 135 def __getitem__(self, idx):
134 sample = self.samples[idx] 136 sample = self.samples[idx]
135 audio_path = self.data_dir / sample["audio_path"] 137 audio_path = self.asset_root / sample["audio_path"]
136 y, _ = librosa.load(str(audio_path), sr=self.sr, mono=True, offset=0, duration=min(sample["duration"], 5.0)) 138 y, _ = librosa.load(str(audio_path), sr=self.sr, mono=True, offset=0, duration=min(sample["duration"], 5.0))
137 seg_len = 5 * self.sr 139 seg_len = 5 * self.sr
138 if len(y) < seg_len: 140 if len(y) < seg_len:
...@@ -178,6 +180,7 @@ class SongPairDataset(Dataset): ...@@ -178,6 +180,7 @@ class SongPairDataset(Dataset):
178 self.segment_len = int(segment_dur * sr) 180 self.segment_len = int(segment_dur * sr)
179 self.augment = augment 181 self.augment = augment
180 self.data_dir = Path(data_dir) 182 self.data_dir = Path(data_dir)
183 self.asset_root = self.data_dir.parent if self.data_dir.name == "manifests" else self.data_dir
181 184
182 with open(self.data_dir / f"{split}.json") as f: 185 with open(self.data_dir / f"{split}.json") as f:
183 metadata = json.load(f) 186 metadata = json.load(f)
...@@ -186,7 +189,7 @@ class SongPairDataset(Dataset): ...@@ -186,7 +189,7 @@ class SongPairDataset(Dataset):
186 for item in metadata: 189 for item in metadata:
187 if item.get("type") == "reference": 190 if item.get("type") == "reference":
188 continue 191 continue
189 p = self.data_dir / item["audio_path"] 192 p = self.asset_root / item["audio_path"]
190 if p.exists(): 193 if p.exists():
191 self.by_song.setdefault(item["song_id"], []).append(item) 194 self.by_song.setdefault(item["song_id"], []).append(item)
192 195
...@@ -207,7 +210,7 @@ class SongPairDataset(Dataset): ...@@ -207,7 +210,7 @@ class SongPairDataset(Dataset):
207 return len(self.sample_song_ids) 210 return len(self.sample_song_ids)
208 211
209 def _load_clip(self, sample: Dict) -> np.ndarray: 212 def _load_clip(self, sample: Dict) -> np.ndarray:
210 path = self.data_dir / sample["audio_path"] 213 path = self.asset_root / sample["audio_path"]
211 y, _ = librosa.load(str(path), sr=self.sr, mono=True, duration=5.0) 214 y, _ = librosa.load(str(path), sr=self.sr, mono=True, duration=5.0)
212 if len(y) < self.segment_len: 215 if len(y) < self.segment_len:
213 y = np.pad(y, (0, self.segment_len - len(y))) 216 y = np.pad(y, (0, self.segment_len - len(y)))
......
...@@ -6,6 +6,7 @@ import argparse ...@@ -6,6 +6,7 @@ import argparse
6 import csv 6 import csv
7 import json 7 import json
8 import random 8 import random
9 import shutil
9 from pathlib import Path 10 from pathlib import Path
10 from typing import List, Dict 11 from typing import List, Dict
11 import soundfile as sf 12 import soundfile as sf
...@@ -49,13 +50,19 @@ def build_train_eval_from_audio_dir( ...@@ -49,13 +50,19 @@ def build_train_eval_from_audio_dir(
49 output_dir.mkdir(parents=True, exist_ok=True) 50 output_dir.mkdir(parents=True, exist_ok=True)
50 manifests_dir = output_dir / "manifests" 51 manifests_dir = output_dir / "manifests"
51 manifests_dir.mkdir(parents=True, exist_ok=True) 52 manifests_dir.mkdir(parents=True, exist_ok=True)
53 audio_out_dir = output_dir / "audio"
54 audio_out_dir.mkdir(parents=True, exist_ok=True)
52 55
53 refs = [] 56 refs = []
54 train = [] 57 train = []
55 test = [] 58 test = []
56 59
57 for idx, path in enumerate(files): 60 for idx, path in enumerate(files):
58 rel = path.relative_to(output_dir.parent if output_dir.parent in path.parents else audio_dir.parent) 61 target_name = f"{source_dataset}_{idx:05d}{path.suffix.lower()}"
62 target_path = audio_out_dir / target_name
63 if not target_path.exists():
64 shutil.copy2(path, target_path)
65 rel = target_path.relative_to(output_dir)
59 song_id = f"{source_dataset}_{idx:05d}" 66 song_id = f"{source_dataset}_{idx:05d}"
60 try: 67 try:
61 info = sf.info(str(path)) 68 info = sf.info(str(path))
......
...@@ -50,6 +50,28 @@ ...@@ -50,6 +50,28 @@
50 - 现在开放数据接入路径已经浓缩成单页可执行工作流 50 - 现在开放数据接入路径已经浓缩成单页可执行工作流
51 - 后续接真实 FMA / MTG-Jamendo 本地目录时,上手成本更低 51 - 后续接真实 FMA / MTG-Jamendo 本地目录时,上手成本更低
52 52
53 ### Stage: 开放数据 manifests 直连训练
54
55 完成项:
56 - 修复 `src/data/manifest_tools.py` 生成的开放数据 manifests 路径自洽性
57 - 让开放数据音频复制到输出根下的 `audio/`
58 - 修复 `src/data/dataset.py``.../manifests` 目录布局的路径解析
59 - 打通 `prepare-local -> validate-local -> train.py --dry-run`
60
61 验证结果:
62 - `/usr/local/miniconda3/bin/python src/data/external_adapters.py prepare-local fma data/synthetic_v2/songs --output-root data/external_ingested/synthetic_as_open_fixed --eval-ratio 0.2 --query-duration 5.0` 成功
63 - `/usr/local/miniconda3/bin/python src/data/external_adapters.py validate-local fma data/external_ingested/synthetic_as_open_fixed/fma/manifests` 成功
64 - `/usr/local/miniconda3/bin/python train.py --data data/external_ingested/synthetic_as_open_fixed/fma/manifests --output data/models_open_smoke_fixed --device cpu --epochs 1 --batch-size 2 --dry-run` 成功
65 - 当前结果:
66 - `catalog=24`
67 - `train_queries=16`
68 - `test_queries=8`
69 - `Dry run passed!`
70
71 结论:
72 - 开放数据路径现在不仅能生成 manifests,还能真正进入训练
73 - 后续接入真实 FMA / MTG-Jamendo 时,可以直接走同一链路
74
53 ### Stage: confused 定向优化 v6(sample-level weighting) 75 ### Stage: confused 定向优化 v6(sample-level weighting)
54 76
55 完成项: 77 完成项:
......
...@@ -20,8 +20,8 @@ flowchart LR ...@@ -20,8 +20,8 @@ flowchart LR
20 A[Local Open Audio Dir] --> B[inspect-local / inspect-batch] 20 A[Local Open Audio Dir] --> B[inspect-local / inspect-batch]
21 B --> C[prepare-local] 21 B --> C[prepare-local]
22 C --> D[validate-local] 22 C --> D[validate-local]
23 D --> E[train.json] 23 D --> E[train.py]
24 D --> F[test.json] 24 D --> F[evaluate.py]
25 ``` 25 ```
26 26
27 --- 27 ---
...@@ -34,6 +34,7 @@ flowchart LR ...@@ -34,6 +34,7 @@ flowchart LR
34 | 批量比较 | [`src/data/external_adapters.py`](../acr-engine/src/data/external_adapters.py) `inspect-batch ...` | 比较多个候选目录 | 34 | 批量比较 | [`src/data/external_adapters.py`](../acr-engine/src/data/external_adapters.py) `inspect-batch ...` | 比较多个候选目录 |
35 | 生成清单 | [`src/data/external_adapters.py`](../acr-engine/src/data/external_adapters.py) `prepare-local ...` | 产出 train/test/catalog | 35 | 生成清单 | [`src/data/external_adapters.py`](../acr-engine/src/data/external_adapters.py) `prepare-local ...` | 产出 train/test/catalog |
36 | 训练前校验 | [`src/data/external_adapters.py`](../acr-engine/src/data/external_adapters.py) `validate-local ...` | 确认结构正确 | 36 | 训练前校验 | [`src/data/external_adapters.py`](../acr-engine/src/data/external_adapters.py) `validate-local ...` | 确认结构正确 |
37 | 训练 smoke | [`train.py`](../acr-engine/train.py) `--data ... --dry-run` | 验证 manifests 可直接进入训练 |
37 38
38 --- 39 ---
39 40
...@@ -45,6 +46,7 @@ flowchart LR ...@@ -45,6 +46,7 @@ flowchart LR
45 /usr/local/miniconda3/bin/python src/data/external_adapters.py inspect-local fma data/raw/fma_small_audio --eval-ratio 0.2 --query-duration 8.0 46 /usr/local/miniconda3/bin/python src/data/external_adapters.py inspect-local fma data/raw/fma_small_audio --eval-ratio 0.2 --query-duration 8.0
46 /usr/local/miniconda3/bin/python src/data/external_adapters.py prepare-local fma data/raw/fma_small_audio --output-root data/external_ingested --eval-ratio 0.2 --query-duration 8.0 47 /usr/local/miniconda3/bin/python src/data/external_adapters.py prepare-local fma data/raw/fma_small_audio --output-root data/external_ingested --eval-ratio 0.2 --query-duration 8.0
47 /usr/local/miniconda3/bin/python src/data/external_adapters.py validate-local fma data/external_ingested/fma/manifests 48 /usr/local/miniconda3/bin/python src/data/external_adapters.py validate-local fma data/external_ingested/fma/manifests
49 /usr/local/miniconda3/bin/python train.py --data data/external_ingested/fma/manifests --output data/models_fma_smoke --device cpu --epochs 1 --batch-size 2 --dry-run
48 ``` 50 ```
49 51
50 ### 3.2 多目录比较 52 ### 3.2 多目录比较
...@@ -78,6 +80,8 @@ flowchart LR ...@@ -78,6 +80,8 @@ flowchart LR
78 - `test_queries=8` 80 - `test_queries=8`
79 - `validate-local` 81 - `validate-local`
80 - `ok=true` 82 - `ok=true`
83 - `train.py --dry-run`
84 - `Dry run passed! Pipeline is working.`
81 85
82 --- 86 ---
83 87
......