Prove the offline business-export chain with a runnable smoke over local audio
Constraint: Keep verification offline-only and avoid touching real databases or production assets Rejected: Stop at manifest generation without execution evidence | A dry-run smoke gives the next session stronger handoff confidence Confidence: high Scope-risk: narrow Directive: Stage local sample audio inside the smoke workspace so manifest paths remain self-contained and reproducible Tested: Ran business_export_offline_smoke.py end-to-end; verified normalize/build summaries and train.py --dry-run success; rechecked adapter doc links Not-tested: Did not run full training/evaluation on live business exports or connect to any database
Showing
1 changed file
with
170 additions
and
0 deletions
| 1 | #!/usr/bin/env python3 | ||
| 2 | from __future__ import annotations | ||
| 3 | |||
| 4 | import argparse | ||
| 5 | import csv | ||
| 6 | import json | ||
| 7 | import shutil | ||
| 8 | import subprocess | ||
| 9 | from pathlib import Path | ||
| 10 | |||
| 11 | PYTHON = "/usr/local/miniconda3/bin/python" | ||
| 12 | |||
| 13 | |||
| 14 | def run(cmd: list[str], cwd: Path) -> str: | ||
| 15 | return subprocess.check_output(cmd, cwd=str(cwd), text=True) | ||
| 16 | |||
| 17 | |||
| 18 | def stage_audio(output_root: Path, src_rel: str, dst_name: str) -> str: | ||
| 19 | repo = Path(__file__).resolve().parents[1] | ||
| 20 | src = (repo / src_rel).resolve() | ||
| 21 | audio_dir = output_root / "audio" | ||
| 22 | audio_dir.mkdir(parents=True, exist_ok=True) | ||
| 23 | dst = audio_dir / dst_name | ||
| 24 | if not dst.exists(): | ||
| 25 | shutil.copy2(src, dst) | ||
| 26 | return str(dst.relative_to(output_root)) | ||
| 27 | |||
| 28 | |||
| 29 | def main() -> None: | ||
| 30 | parser = argparse.ArgumentParser(description="Run an offline smoke for the business-export adapter chain") | ||
| 31 | parser.add_argument("--output-root", default="/tmp/business_export_offline_smoke") | ||
| 32 | parser.add_argument("--device", default="cpu") | ||
| 33 | args = parser.parse_args() | ||
| 34 | |||
| 35 | repo = Path(__file__).resolve().parents[1] | ||
| 36 | output_root = Path(args.output_root).resolve() | ||
| 37 | output_root.mkdir(parents=True, exist_ok=True) | ||
| 38 | |||
| 39 | sample_csv = output_root / "business_asset_export_real_smoke.csv" | ||
| 40 | manifests_dir = output_root / "manifests" | ||
| 41 | manifest_ready = output_root / "manifest_ready.jsonl" | ||
| 42 | |||
| 43 | rows = [ | ||
| 44 | { | ||
| 45 | "song_id": "song_smoke_a", | ||
| 46 | "asset_id": "asset_smoke_a_ref", | ||
| 47 | "type": 11, | ||
| 48 | "role": "reference", | ||
| 49 | "split": "train", | ||
| 50 | "audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00000.mp3", "song_smoke_a_ref.mp3"), | ||
| 51 | "source_dataset": "internal_catalog", | ||
| 52 | "title": "Smoke Song A", | ||
| 53 | "artist": "Smoke Artist", | ||
| 54 | "album_id": "smoke_album", | ||
| 55 | "bucket": "lossless_reference_core", | ||
| 56 | "offset_sec": 0, | ||
| 57 | "duration_sec": 29.976, | ||
| 58 | "sample_rate": 44100, | ||
| 59 | "bitrate": 192, | ||
| 60 | "license": "licensed", | ||
| 61 | "is_lossless": False, | ||
| 62 | }, | ||
| 63 | { | ||
| 64 | "song_id": "song_smoke_a", | ||
| 65 | "asset_id": "asset_smoke_a_query", | ||
| 66 | "type": 7, | ||
| 67 | "role": "query", | ||
| 68 | "split": "test", | ||
| 69 | "audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00000.mp3", "song_smoke_a_query.mp3"), | ||
| 70 | "source_dataset": "internal_catalog", | ||
| 71 | "title": "Smoke Song A", | ||
| 72 | "artist": "Smoke Artist", | ||
| 73 | "album_id": "smoke_album", | ||
| 74 | "bucket": "short_video_hook", | ||
| 75 | "offset_sec": 4.0, | ||
| 76 | "duration_sec": 8.0, | ||
| 77 | "sample_rate": 44100, | ||
| 78 | "bitrate": 192, | ||
| 79 | "license": "licensed", | ||
| 80 | "is_lossless": False, | ||
| 81 | }, | ||
| 82 | { | ||
| 83 | "song_id": "song_smoke_b", | ||
| 84 | "asset_id": "asset_smoke_b_ref", | ||
| 85 | "type": 11, | ||
| 86 | "role": "reference", | ||
| 87 | "split": "train", | ||
| 88 | "audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00001.mp3", "song_smoke_b_ref.mp3"), | ||
| 89 | "source_dataset": "internal_catalog", | ||
| 90 | "title": "Smoke Song B", | ||
| 91 | "artist": "Smoke Artist", | ||
| 92 | "album_id": "smoke_album", | ||
| 93 | "bucket": "lossless_reference_core", | ||
| 94 | "offset_sec": 0, | ||
| 95 | "duration_sec": 30.002, | ||
| 96 | "sample_rate": 44100, | ||
| 97 | "bitrate": 192, | ||
| 98 | "license": "licensed", | ||
| 99 | "is_lossless": False, | ||
| 100 | }, | ||
| 101 | { | ||
| 102 | "song_id": "song_smoke_b", | ||
| 103 | "asset_id": "asset_smoke_b_query", | ||
| 104 | "type": 8, | ||
| 105 | "role": "query", | ||
| 106 | "split": "train", | ||
| 107 | "audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00001.mp3", "song_smoke_b_query.mp3"), | ||
| 108 | "source_dataset": "internal_catalog", | ||
| 109 | "title": "Smoke Song B", | ||
| 110 | "artist": "Smoke Artist", | ||
| 111 | "album_id": "smoke_album", | ||
| 112 | "bucket": "short_video_hook", | ||
| 113 | "offset_sec": 6.0, | ||
| 114 | "duration_sec": 8.0, | ||
| 115 | "sample_rate": 44100, | ||
| 116 | "bitrate": 192, | ||
| 117 | "license": "licensed", | ||
| 118 | "is_lossless": False, | ||
| 119 | }, | ||
| 120 | { | ||
| 121 | "song_id": "song_smoke_c", | ||
| 122 | "asset_id": "asset_smoke_c_excluded", | ||
| 123 | "type": 18, | ||
| 124 | "role": "excluded", | ||
| 125 | "split": "holdout", | ||
| 126 | "audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00002.mp3", "song_smoke_c_excluded.mp3"), | ||
| 127 | "source_dataset": "internal_catalog", | ||
| 128 | "title": "Smoke Song C", | ||
| 129 | "artist": "Smoke Artist", | ||
| 130 | "album_id": "smoke_album", | ||
| 131 | "bucket": "demo_variation_pool", | ||
| 132 | "offset_sec": 0, | ||
| 133 | "duration_sec": 29.976, | ||
| 134 | "sample_rate": 44100, | ||
| 135 | "bitrate": 192, | ||
| 136 | "license": "review_pending", | ||
| 137 | "is_lossless": False, | ||
| 138 | }, | ||
| 139 | ] | ||
| 140 | |||
| 141 | with sample_csv.open("w", newline="") as f: | ||
| 142 | writer = csv.DictWriter(f, fieldnames=list(rows[0].keys())) | ||
| 143 | writer.writeheader() | ||
| 144 | writer.writerows(rows) | ||
| 145 | |||
| 146 | normalize_summary = json.loads(run([PYTHON, "scripts/normalize_business_export.py", "--input", str(sample_csv), "--output", str(manifest_ready)], cwd=repo)) | ||
| 147 | build_summary = json.loads(run([PYTHON, "scripts/build_business_project_manifests.py", "--input", str(manifest_ready), "--output-dir", str(manifests_dir)], cwd=repo)) | ||
| 148 | dryrun = run([ | ||
| 149 | PYTHON, | ||
| 150 | "train.py", | ||
| 151 | "--data", str(manifests_dir), | ||
| 152 | "--output", str(output_root / "models"), | ||
| 153 | "--device", args.device, | ||
| 154 | "--epochs", "1", | ||
| 155 | "--batch-size", "2", | ||
| 156 | "--dry-run", | ||
| 157 | ], cwd=repo) | ||
| 158 | |||
| 159 | summary = { | ||
| 160 | "manifest_ready": str(manifest_ready), | ||
| 161 | "manifests_dir": str(manifests_dir), | ||
| 162 | "normalize_summary": normalize_summary, | ||
| 163 | "build_summary": build_summary, | ||
| 164 | "dry_run_passed": "Dry run passed!" in dryrun, | ||
| 165 | } | ||
| 166 | print(json.dumps(summary, ensure_ascii=False, indent=2)) | ||
| 167 | |||
| 168 | |||
| 169 | if __name__ == "__main__": | ||
| 170 | main() |
-
Please register or sign in to post a comment