run_songcentric_directory_pipeline_live.py 4.16 KB
#!/usr/bin/env /usr/local/miniconda3/bin/python
from __future__ import annotations

import argparse
import json
import subprocess
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
PYTHON = '/usr/local/miniconda3/bin/python'


def run_step(name: str, cmd: list[str]) -> dict:
    proc = subprocess.run(cmd, cwd=str(ROOT.parent), capture_output=True, text=True)
    return {
        'name': name,
        'command': ' '.join(cmd),
        'returncode': proc.returncode,
        'stdout': proc.stdout,
        'stderr': proc.stderr,
    }


def load_json(path: Path) -> dict:
    return json.loads(path.read_text())


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument('--dsn', required=True)
    parser.add_argument('--schema', default='acr_songcentric_test')
    parser.add_argument('--input-root', default='acr-engine/data/songcentric_builder_smoke')
    parser.add_argument('--output-dir', default='acr-engine/data/pgvector_eval/music20')
    args = parser.parse_args()

    out_dir = (ROOT.parent / args.output_dir).resolve()
    out_dir.mkdir(parents=True, exist_ok=True)

    manifest = out_dir / 'songcentric_pipeline_manifest.jsonl'
    build_report = out_dir / 'songcentric_pipeline_build_report.json'
    enriched_manifest = out_dir / 'songcentric_pipeline_manifest_with_features.jsonl'
    enrich_report = out_dir / 'songcentric_pipeline_enrich_report.json'
    import_report = out_dir / 'songcentric_pipeline_import_report.json'

    steps = []
    steps.append(run_step('build_manifest', [
        PYTHON, 'acr-engine/scripts/build_songcentric_manifest_from_directory.py',
        '--input-root', args.input_root,
        '--output', str(manifest.relative_to(ROOT.parent)),
        '--report-output', str(build_report.relative_to(ROOT.parent)),
    ]))
    if steps[-1]['returncode'] != 0:
        raise SystemExit(json.dumps({'failed_step': steps[-1]}, ensure_ascii=False, indent=2))

    steps.append(run_step('enrich_features', [
        PYTHON, 'acr-engine/scripts/enrich_songcentric_manifest_with_local_features.py',
        '--input-manifest', str(manifest.relative_to(ROOT.parent)),
        '--output-manifest', str(enriched_manifest.relative_to(ROOT.parent)),
        '--report-output', str(enrich_report.relative_to(ROOT.parent)),
    ]))
    if steps[-1]['returncode'] != 0:
        raise SystemExit(json.dumps({'failed_step': steps[-1]}, ensure_ascii=False, indent=2))

    steps.append(run_step('import_manifest', [
        PYTHON, 'acr-engine/scripts/import_songcentric_manifest_live.py',
        '--dsn', args.dsn,
        '--schema', args.schema,
        '--manifest', str(enriched_manifest.relative_to(ROOT.parent)),
        '--output', str(import_report.relative_to(ROOT.parent)),
    ]))
    if steps[-1]['returncode'] != 0:
        raise SystemExit(json.dumps({'failed_step': steps[-1]}, ensure_ascii=False, indent=2))

    build = load_json(build_report)
    enrich = load_json(enrich_report)
    imp = load_json(import_report)

    summary = {
        'schema': args.schema,
        'input_root': args.input_root,
        'steps': [{k: v for k, v in s.items() if k in ('name', 'command', 'returncode')} for s in steps],
        'build_summary': build,
        'enrich_summary': {
            'wav_windows_seen': enrich['wav_windows_seen'],
            'features_added': enrich['features_added'],
            'matcher_fingerprint_count': enrich['matcher_fingerprint_count'],
            'fallback_fingerprint_count': enrich['fallback_fingerprint_count'],
            'semantic_runtime_available': enrich['semantic_runtime_available'],
            'semantic_runtime_missing': enrich['semantic_runtime_missing'],
            'semantic_runtime_ready_count': enrich['semantic_runtime_ready_count'],
            'semantic_fallback_count': enrich['semantic_fallback_count'],
        },
        'import_counts': imp['counts'],
        'feature_lineage_sample': imp.get('feature_lineage_sample'),
    }
    report_path = out_dir / 'songcentric_pipeline_runner_report.json'
    report_path.write_text(json.dumps(summary, ensure_ascii=False, indent=2))
    print(json.dumps(summary, ensure_ascii=False, indent=2))
    return 0


if __name__ == '__main__':
    raise SystemExit(main())