Commit 7eff944b 7eff944bedeed205a3bf5d9f2225d954ceb13a61 by cnb.bofCdSsphPA

Prove the offline business-export chain with a runnable smoke over local audio

Constraint: Keep verification offline-only and avoid touching real databases or production assets
Rejected: Stop at manifest generation without execution evidence | A dry-run smoke gives the next session stronger handoff confidence
Confidence: high
Scope-risk: narrow
Directive: Stage local sample audio inside the smoke workspace so manifest paths remain self-contained and reproducible
Tested: Ran business_export_offline_smoke.py end-to-end; verified normalize/build summaries and train.py --dry-run success; rechecked adapter doc links
Not-tested: Did not run full training/evaluation on live business exports or connect to any database
1 parent 3bdc0139
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import csv
import json
import shutil
import subprocess
from pathlib import Path
PYTHON = "/usr/local/miniconda3/bin/python"
def run(cmd: list[str], cwd: Path) -> str:
return subprocess.check_output(cmd, cwd=str(cwd), text=True)
def stage_audio(output_root: Path, src_rel: str, dst_name: str) -> str:
repo = Path(__file__).resolve().parents[1]
src = (repo / src_rel).resolve()
audio_dir = output_root / "audio"
audio_dir.mkdir(parents=True, exist_ok=True)
dst = audio_dir / dst_name
if not dst.exists():
shutil.copy2(src, dst)
return str(dst.relative_to(output_root))
def main() -> None:
parser = argparse.ArgumentParser(description="Run an offline smoke for the business-export adapter chain")
parser.add_argument("--output-root", default="/tmp/business_export_offline_smoke")
parser.add_argument("--device", default="cpu")
args = parser.parse_args()
repo = Path(__file__).resolve().parents[1]
output_root = Path(args.output_root).resolve()
output_root.mkdir(parents=True, exist_ok=True)
sample_csv = output_root / "business_asset_export_real_smoke.csv"
manifests_dir = output_root / "manifests"
manifest_ready = output_root / "manifest_ready.jsonl"
rows = [
{
"song_id": "song_smoke_a",
"asset_id": "asset_smoke_a_ref",
"type": 11,
"role": "reference",
"split": "train",
"audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00000.mp3", "song_smoke_a_ref.mp3"),
"source_dataset": "internal_catalog",
"title": "Smoke Song A",
"artist": "Smoke Artist",
"album_id": "smoke_album",
"bucket": "lossless_reference_core",
"offset_sec": 0,
"duration_sec": 29.976,
"sample_rate": 44100,
"bitrate": 192,
"license": "licensed",
"is_lossless": False,
},
{
"song_id": "song_smoke_a",
"asset_id": "asset_smoke_a_query",
"type": 7,
"role": "query",
"split": "test",
"audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00000.mp3", "song_smoke_a_query.mp3"),
"source_dataset": "internal_catalog",
"title": "Smoke Song A",
"artist": "Smoke Artist",
"album_id": "smoke_album",
"bucket": "short_video_hook",
"offset_sec": 4.0,
"duration_sec": 8.0,
"sample_rate": 44100,
"bitrate": 192,
"license": "licensed",
"is_lossless": False,
},
{
"song_id": "song_smoke_b",
"asset_id": "asset_smoke_b_ref",
"type": 11,
"role": "reference",
"split": "train",
"audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00001.mp3", "song_smoke_b_ref.mp3"),
"source_dataset": "internal_catalog",
"title": "Smoke Song B",
"artist": "Smoke Artist",
"album_id": "smoke_album",
"bucket": "lossless_reference_core",
"offset_sec": 0,
"duration_sec": 30.002,
"sample_rate": 44100,
"bitrate": 192,
"license": "licensed",
"is_lossless": False,
},
{
"song_id": "song_smoke_b",
"asset_id": "asset_smoke_b_query",
"type": 8,
"role": "query",
"split": "train",
"audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00001.mp3", "song_smoke_b_query.mp3"),
"source_dataset": "internal_catalog",
"title": "Smoke Song B",
"artist": "Smoke Artist",
"album_id": "smoke_album",
"bucket": "short_video_hook",
"offset_sec": 6.0,
"duration_sec": 8.0,
"sample_rate": 44100,
"bitrate": 192,
"license": "licensed",
"is_lossless": False,
},
{
"song_id": "song_smoke_c",
"asset_id": "asset_smoke_c_excluded",
"type": 18,
"role": "excluded",
"split": "holdout",
"audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00002.mp3", "song_smoke_c_excluded.mp3"),
"source_dataset": "internal_catalog",
"title": "Smoke Song C",
"artist": "Smoke Artist",
"album_id": "smoke_album",
"bucket": "demo_variation_pool",
"offset_sec": 0,
"duration_sec": 29.976,
"sample_rate": 44100,
"bitrate": 192,
"license": "review_pending",
"is_lossless": False,
},
]
with sample_csv.open("w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
writer.writeheader()
writer.writerows(rows)
normalize_summary = json.loads(run([PYTHON, "scripts/normalize_business_export.py", "--input", str(sample_csv), "--output", str(manifest_ready)], cwd=repo))
build_summary = json.loads(run([PYTHON, "scripts/build_business_project_manifests.py", "--input", str(manifest_ready), "--output-dir", str(manifests_dir)], cwd=repo))
dryrun = run([
PYTHON,
"train.py",
"--data", str(manifests_dir),
"--output", str(output_root / "models"),
"--device", args.device,
"--epochs", "1",
"--batch-size", "2",
"--dry-run",
], cwd=repo)
summary = {
"manifest_ready": str(manifest_ready),
"manifests_dir": str(manifests_dir),
"normalize_summary": normalize_summary,
"build_summary": build_summary,
"dry_run_passed": "Dry run passed!" in dryrun,
}
print(json.dumps(summary, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()