run_songcentric_directory_pipeline_live.py
4.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env /usr/local/miniconda3/bin/python
from __future__ import annotations
import argparse
import json
import subprocess
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
PYTHON = '/usr/local/miniconda3/bin/python'
def run_step(name: str, cmd: list[str]) -> dict:
proc = subprocess.run(cmd, cwd=str(ROOT.parent), capture_output=True, text=True)
return {
'name': name,
'command': ' '.join(cmd),
'returncode': proc.returncode,
'stdout': proc.stdout,
'stderr': proc.stderr,
}
def load_json(path: Path) -> dict:
return json.loads(path.read_text())
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument('--dsn', required=True)
parser.add_argument('--schema', default='acr_songcentric_test')
parser.add_argument('--input-root', default='acr-engine/data/songcentric_builder_smoke')
parser.add_argument('--output-dir', default='acr-engine/data/pgvector_eval/music20')
args = parser.parse_args()
out_dir = (ROOT.parent / args.output_dir).resolve()
out_dir.mkdir(parents=True, exist_ok=True)
manifest = out_dir / 'songcentric_pipeline_manifest.jsonl'
build_report = out_dir / 'songcentric_pipeline_build_report.json'
enriched_manifest = out_dir / 'songcentric_pipeline_manifest_with_features.jsonl'
enrich_report = out_dir / 'songcentric_pipeline_enrich_report.json'
import_report = out_dir / 'songcentric_pipeline_import_report.json'
steps = []
steps.append(run_step('build_manifest', [
PYTHON, 'acr-engine/scripts/build_songcentric_manifest_from_directory.py',
'--input-root', args.input_root,
'--output', str(manifest.relative_to(ROOT.parent)),
'--report-output', str(build_report.relative_to(ROOT.parent)),
]))
if steps[-1]['returncode'] != 0:
raise SystemExit(json.dumps({'failed_step': steps[-1]}, ensure_ascii=False, indent=2))
steps.append(run_step('enrich_features', [
PYTHON, 'acr-engine/scripts/enrich_songcentric_manifest_with_local_features.py',
'--input-manifest', str(manifest.relative_to(ROOT.parent)),
'--output-manifest', str(enriched_manifest.relative_to(ROOT.parent)),
'--report-output', str(enrich_report.relative_to(ROOT.parent)),
]))
if steps[-1]['returncode'] != 0:
raise SystemExit(json.dumps({'failed_step': steps[-1]}, ensure_ascii=False, indent=2))
steps.append(run_step('import_manifest', [
PYTHON, 'acr-engine/scripts/import_songcentric_manifest_live.py',
'--dsn', args.dsn,
'--schema', args.schema,
'--manifest', str(enriched_manifest.relative_to(ROOT.parent)),
'--output', str(import_report.relative_to(ROOT.parent)),
]))
if steps[-1]['returncode'] != 0:
raise SystemExit(json.dumps({'failed_step': steps[-1]}, ensure_ascii=False, indent=2))
build = load_json(build_report)
enrich = load_json(enrich_report)
imp = load_json(import_report)
summary = {
'schema': args.schema,
'input_root': args.input_root,
'steps': [{k: v for k, v in s.items() if k in ('name', 'command', 'returncode')} for s in steps],
'build_summary': build,
'enrich_summary': {
'wav_windows_seen': enrich['wav_windows_seen'],
'features_added': enrich['features_added'],
'matcher_fingerprint_count': enrich['matcher_fingerprint_count'],
'fallback_fingerprint_count': enrich['fallback_fingerprint_count'],
'semantic_runtime_available': enrich['semantic_runtime_available'],
'semantic_runtime_missing': enrich['semantic_runtime_missing'],
'semantic_runtime_ready_count': enrich['semantic_runtime_ready_count'],
'semantic_fallback_count': enrich['semantic_fallback_count'],
},
'import_counts': imp['counts'],
'feature_lineage_sample': imp.get('feature_lineage_sample'),
}
report_path = out_dir / 'songcentric_pipeline_runner_report.json'
report_path.write_text(json.dumps(summary, ensure_ascii=False, indent=2))
print(json.dumps(summary, ensure_ascii=False, indent=2))
return 0
if __name__ == '__main__':
raise SystemExit(main())