Prove the offline business-export chain with a runnable smoke over local audio

Constraint: Keep verification offline-only and avoid touching real databases or production assets Rejected: Stop at manifest generation without execution evidence | A dry-run smoke gives the next session stronger handoff confidence Confidence: high Scope-risk: narrow Directive: Stage local sample audio inside the smoke workspace so manifest paths remain self-contained and reproducible Tested: Ran business_export_offline_smoke.py end-to-end; verified normalize/build summaries and train.py --dry-run success; rechecked adapter doc links Not-tested: Did not run full training/evaluation on live business exports or connect to any database

Prove the offline business-export chain with a runnable smoke over local audio
Constraint: Keep verification offline-only and avoid touching real databases or production assets Rejected: Stop at manifest generation without execution evidence | A dry-run smoke gives the next session stronger handoff confidence Confidence: high Scope-risk: narrow Directive: Stage local sample audio inside the smoke workspace so manifest paths remain self-contained and reproducible Tested: Ran business_export_offline_smoke.py end-to-end; verified normalize/build summaries and train.py --dry-run success; rechecked adapter doc links Not-tested: Did not run full training/evaluation on live business exports or connect to any database
cnb.bofCdSsphPA
Commit 7eff944b ... 7eff944bedeed205a3bf5d9f2225d954ceb13a61 authored 2026-06-02 19:02:36 +0800 by cnb.bofCdSsphPA
Showing 1 changed file with 170 additions and 0 deletions
acr-engine/scripts/business_export_offline_smoke.py
--- a/acr-engine/scripts/business_export_offline_smoke.py 0 → 100755
View file @7eff944
+++ b/acr-engine/scripts/business_export_offline_smoke.py 0 → 100755
View file @7eff944
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import shutil
+import subprocess
+from pathlib import Path
+
+PYTHON = "/usr/local/miniconda3/bin/python"
+
+
+def run(cmd: list[str], cwd: Path) -> str:
+    return subprocess.check_output(cmd, cwd=str(cwd), text=True)
+
+
+def stage_audio(output_root: Path, src_rel: str, dst_name: str) -> str:
+    repo = Path(__file__).resolve().parents[1]
+    src = (repo / src_rel).resolve()
+    audio_dir = output_root / "audio"
+    audio_dir.mkdir(parents=True, exist_ok=True)
+    dst = audio_dir / dst_name
+    if not dst.exists():
+        shutil.copy2(src, dst)
+    return str(dst.relative_to(output_root))
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Run an offline smoke for the business-export adapter chain")
+    parser.add_argument("--output-root", default="/tmp/business_export_offline_smoke")
+    parser.add_argument("--device", default="cpu")
+    args = parser.parse_args()
+
+    repo = Path(__file__).resolve().parents[1]
+    output_root = Path(args.output_root).resolve()
+    output_root.mkdir(parents=True, exist_ok=True)
+
+    sample_csv = output_root / "business_asset_export_real_smoke.csv"
+    manifests_dir = output_root / "manifests"
+    manifest_ready = output_root / "manifest_ready.jsonl"
+
+    rows = [
+        {
+            "song_id": "song_smoke_a",
+            "asset_id": "asset_smoke_a_ref",
+            "type": 11,
+            "role": "reference",
+            "split": "train",
+            "audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00000.mp3", "song_smoke_a_ref.mp3"),
+            "source_dataset": "internal_catalog",
+            "title": "Smoke Song A",
+            "artist": "Smoke Artist",
+            "album_id": "smoke_album",
+            "bucket": "lossless_reference_core",
+            "offset_sec": 0,
+            "duration_sec": 29.976,
+            "sample_rate": 44100,
+            "bitrate": 192,
+            "license": "licensed",
+            "is_lossless": False,
+        },
+        {
+            "song_id": "song_smoke_a",
+            "asset_id": "asset_smoke_a_query",
+            "type": 7,
+            "role": "query",
+            "split": "test",
+            "audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00000.mp3", "song_smoke_a_query.mp3"),
+            "source_dataset": "internal_catalog",
+            "title": "Smoke Song A",
+            "artist": "Smoke Artist",
+            "album_id": "smoke_album",
+            "bucket": "short_video_hook",
+            "offset_sec": 4.0,
+            "duration_sec": 8.0,
+            "sample_rate": 44100,
+            "bitrate": 192,
+            "license": "licensed",
+            "is_lossless": False,
+        },
+        {
+            "song_id": "song_smoke_b",
+            "asset_id": "asset_smoke_b_ref",
+            "type": 11,
+            "role": "reference",
+            "split": "train",
+            "audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00001.mp3", "song_smoke_b_ref.mp3"),
+            "source_dataset": "internal_catalog",
+            "title": "Smoke Song B",
+            "artist": "Smoke Artist",
+            "album_id": "smoke_album",
+            "bucket": "lossless_reference_core",
+            "offset_sec": 0,
+            "duration_sec": 30.002,
+            "sample_rate": 44100,
+            "bitrate": 192,
+            "license": "licensed",
+            "is_lossless": False,
+        },
+        {
+            "song_id": "song_smoke_b",
+            "asset_id": "asset_smoke_b_query",
+            "type": 8,
+            "role": "query",
+            "split": "train",
+            "audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00001.mp3", "song_smoke_b_query.mp3"),
+            "source_dataset": "internal_catalog",
+            "title": "Smoke Song B",
+            "artist": "Smoke Artist",
+            "album_id": "smoke_album",
+            "bucket": "short_video_hook",
+            "offset_sec": 6.0,
+            "duration_sec": 8.0,
+            "sample_rate": 44100,
+            "bitrate": 192,
+            "license": "licensed",
+            "is_lossless": False,
+        },
+        {
+            "song_id": "song_smoke_c",
+            "asset_id": "asset_smoke_c_excluded",
+            "type": 18,
+            "role": "excluded",
+            "split": "holdout",
+            "audio_path": stage_audio(output_root, "data/external_smoke/fma/audio/fma_00002.mp3", "song_smoke_c_excluded.mp3"),
+            "source_dataset": "internal_catalog",
+            "title": "Smoke Song C",
+            "artist": "Smoke Artist",
+            "album_id": "smoke_album",
+            "bucket": "demo_variation_pool",
+            "offset_sec": 0,
+            "duration_sec": 29.976,
+            "sample_rate": 44100,
+            "bitrate": 192,
+            "license": "review_pending",
+            "is_lossless": False,
+        },
+    ]
+
+    with sample_csv.open("w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
+        writer.writeheader()
+        writer.writerows(rows)
+
+    normalize_summary = json.loads(run([PYTHON, "scripts/normalize_business_export.py", "--input", str(sample_csv), "--output", str(manifest_ready)], cwd=repo))
+    build_summary = json.loads(run([PYTHON, "scripts/build_business_project_manifests.py", "--input", str(manifest_ready), "--output-dir", str(manifests_dir)], cwd=repo))
+    dryrun = run([
+        PYTHON,
+        "train.py",
+        "--data", str(manifests_dir),
+        "--output", str(output_root / "models"),
+        "--device", args.device,
+        "--epochs", "1",
+        "--batch-size", "2",
+        "--dry-run",
+    ], cwd=repo)
+
+    summary = {
+        "manifest_ready": str(manifest_ready),
+        "manifests_dir": str(manifests_dir),
+        "normalize_summary": normalize_summary,
+        "build_summary": build_summary,
+        "dry_run_passed": "Dry run passed!" in dryrun,
+    }
+    print(json.dumps(summary, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    main()