build_songcentric_manifest_from_directory.py
4.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env /usr/local/miniconda3/bin/python
from __future__ import annotations
import argparse
import json
import math
import wave
from pathlib import Path
AUDIO_EXTS = {'.wav', '.mp3', '.flac', '.ogg', '.m4a'}
def detect_duration_ms(path: Path) -> int | None:
if path.suffix.lower() == '.wav':
try:
with wave.open(str(path), 'rb') as wf:
frames = wf.getnframes()
rate = wf.getframerate()
if rate > 0:
return int(frames * 1000 / rate)
except Exception:
return None
return None
def infer_song_meta(file_path: Path, root: Path) -> tuple[str, str, str]:
rel = file_path.relative_to(root)
if len(rel.parts) >= 2:
song_key = rel.parts[0]
title = rel.parts[0].replace('_', ' ')
artist = rel.parts[1].replace('_', ' ') if len(rel.parts) >= 3 else 'unknown'
return song_key, title, artist
stem = file_path.stem
return stem, stem.replace('_', ' '), 'unknown'
def build_windows(duration_ms: int | None, window_ms: int, stride_ms: int) -> list[dict]:
if duration_ms is None:
return [{'start_ms': 0, 'end_ms': window_ms}]
if duration_ms <= window_ms:
return [{'start_ms': 0, 'end_ms': duration_ms}]
windows = []
start = 0
while start + window_ms <= duration_ms:
windows.append({'start_ms': start, 'end_ms': start + window_ms})
start += stride_ms
if not windows or windows[-1]['end_ms'] < duration_ms:
windows.append({'start_ms': max(duration_ms - window_ms, 0), 'end_ms': duration_ms})
dedup = []
seen = set()
for w in windows:
key = (w['start_ms'], w['end_ms'])
if key not in seen:
seen.add(key)
dedup.append(w)
return dedup
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument('--input-root', required=True)
parser.add_argument('--output', required=True)
parser.add_argument('--window-ms', type=int, default=5000)
parser.add_argument('--stride-ms', type=int, default=2500)
parser.add_argument('--set-type', default='reference_set')
parser.add_argument('--set-name', default='phase1_hot_reference_v1')
parser.add_argument('--source-type', default='official')
parser.add_argument('--report-output')
args = parser.parse_args()
root = Path(args.input_root).resolve()
output = Path(args.output).resolve()
output.parent.mkdir(parents=True, exist_ok=True)
report_output = Path(args.report_output).resolve() if args.report_output else None
if report_output:
report_output.parent.mkdir(parents=True, exist_ok=True)
rows = []
file_count = 0
window_count = 0
song_keys = set()
for path in sorted(root.rglob('*')):
if not path.is_file() or path.suffix.lower() not in AUDIO_EXTS:
continue
file_count += 1
song_key, title, artist = infer_song_meta(path, root)
song_keys.add(song_key)
duration_ms = detect_duration_ms(path)
windows = build_windows(duration_ms, args.window_ms, args.stride_ms)
window_count += len(windows)
rows.append(
{
'song': {
'biz_key': song_key,
'title': title,
'artist_name': artist,
},
'asset': {
'source_type': args.source_type,
'storage_uri': str(path),
'storage_scheme': 'file',
'checksum': f'path:{path}',
'codec': path.suffix.lower().lstrip('.'),
'sample_rate': 16000 if path.suffix.lower() == '.wav' else None,
'channels': 1 if path.suffix.lower() == '.wav' else None,
'duration_ms': duration_ms,
},
'windows': windows,
'memberships': [
{
'set_type': args.set_type,
'set_name': args.set_name,
'member_type': 'asset',
'priority': 100,
}
],
}
)
output.write_text('\n'.join(json.dumps(row, ensure_ascii=False) for row in rows) + ('\n' if rows else ''))
report = {
'input_root': str(root),
'output': str(output),
'song_count': len(song_keys),
'asset_count': file_count,
'window_count': window_count,
'window_ms': args.window_ms,
'stride_ms': args.stride_ms,
'set_name': args.set_name,
}
if report_output:
report_output.write_text(json.dumps(report, ensure_ascii=False, indent=2))
print(json.dumps(report, ensure_ascii=False, indent=2))
return 0
if __name__ == '__main__':
raise SystemExit(main())