build_songcentric_manifest_from_directory.py 4.77 KB
#!/usr/bin/env /usr/local/miniconda3/bin/python
from __future__ import annotations

import argparse
import json
import math
import wave
from pathlib import Path

AUDIO_EXTS = {'.wav', '.mp3', '.flac', '.ogg', '.m4a'}


def detect_duration_ms(path: Path) -> int | None:
    if path.suffix.lower() == '.wav':
        try:
            with wave.open(str(path), 'rb') as wf:
                frames = wf.getnframes()
                rate = wf.getframerate()
                if rate > 0:
                    return int(frames * 1000 / rate)
        except Exception:
            return None
    return None


def infer_song_meta(file_path: Path, root: Path) -> tuple[str, str, str]:
    rel = file_path.relative_to(root)
    if len(rel.parts) >= 2:
        song_key = rel.parts[0]
        title = rel.parts[0].replace('_', ' ')
        artist = rel.parts[1].replace('_', ' ') if len(rel.parts) >= 3 else 'unknown'
        return song_key, title, artist
    stem = file_path.stem
    return stem, stem.replace('_', ' '), 'unknown'


def build_windows(duration_ms: int | None, window_ms: int, stride_ms: int) -> list[dict]:
    if duration_ms is None:
        return [{'start_ms': 0, 'end_ms': window_ms}]
    if duration_ms <= window_ms:
        return [{'start_ms': 0, 'end_ms': duration_ms}]
    windows = []
    start = 0
    while start + window_ms <= duration_ms:
        windows.append({'start_ms': start, 'end_ms': start + window_ms})
        start += stride_ms
    if not windows or windows[-1]['end_ms'] < duration_ms:
        windows.append({'start_ms': max(duration_ms - window_ms, 0), 'end_ms': duration_ms})
    dedup = []
    seen = set()
    for w in windows:
        key = (w['start_ms'], w['end_ms'])
        if key not in seen:
            seen.add(key)
            dedup.append(w)
    return dedup


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-root', required=True)
    parser.add_argument('--output', required=True)
    parser.add_argument('--window-ms', type=int, default=5000)
    parser.add_argument('--stride-ms', type=int, default=2500)
    parser.add_argument('--set-type', default='reference_set')
    parser.add_argument('--set-name', default='phase1_hot_reference_v1')
    parser.add_argument('--source-type', default='official')
    parser.add_argument('--report-output')
    args = parser.parse_args()

    root = Path(args.input_root).resolve()
    output = Path(args.output).resolve()
    output.parent.mkdir(parents=True, exist_ok=True)
    report_output = Path(args.report_output).resolve() if args.report_output else None
    if report_output:
        report_output.parent.mkdir(parents=True, exist_ok=True)

    rows = []
    file_count = 0
    window_count = 0
    song_keys = set()

    for path in sorted(root.rglob('*')):
        if not path.is_file() or path.suffix.lower() not in AUDIO_EXTS:
            continue
        file_count += 1
        song_key, title, artist = infer_song_meta(path, root)
        song_keys.add(song_key)
        duration_ms = detect_duration_ms(path)
        windows = build_windows(duration_ms, args.window_ms, args.stride_ms)
        window_count += len(windows)
        rows.append(
            {
                'song': {
                    'biz_key': song_key,
                    'title': title,
                    'artist_name': artist,
                },
                'asset': {
                    'source_type': args.source_type,
                    'storage_uri': str(path),
                    'storage_scheme': 'file',
                    'checksum': f'path:{path}',
                    'codec': path.suffix.lower().lstrip('.'),
                    'sample_rate': 16000 if path.suffix.lower() == '.wav' else None,
                    'channels': 1 if path.suffix.lower() == '.wav' else None,
                    'duration_ms': duration_ms,
                },
                'windows': windows,
                'memberships': [
                    {
                        'set_type': args.set_type,
                        'set_name': args.set_name,
                        'member_type': 'asset',
                        'priority': 100,
                    }
                ],
            }
        )

    output.write_text('\n'.join(json.dumps(row, ensure_ascii=False) for row in rows) + ('\n' if rows else ''))

    report = {
        'input_root': str(root),
        'output': str(output),
        'song_count': len(song_keys),
        'asset_count': file_count,
        'window_count': window_count,
        'window_ms': args.window_ms,
        'stride_ms': args.stride_ms,
        'set_name': args.set_name,
    }
    if report_output:
        report_output.write_text(json.dumps(report, ensure_ascii=False, indent=2))
    print(json.dumps(report, ensure_ascii=False, indent=2))
    return 0


if __name__ == '__main__':
    raise SystemExit(main())