generate_artifacts.py
5.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
"""Generate benchmark report, model card, and release bundle artifacts."""
from __future__ import annotations
import argparse
import json
from datetime import datetime, timezone
from pathlib import Path
def utc_now():
return datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
def load_json(path: str):
with open(path) as f:
return json.load(f)
def write_text(path: Path, text: str):
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(text)
def benchmark_md(model_version: str, data_version: str, report: dict) -> str:
by_type = report.get('by_type', {})
rows = []
for k, v in by_type.items():
rows.append(f"| {k} | {v.get('top1','')} | {v.get('topk','')} | | | |")
rows_text = '\n'.join(rows) if rows else '| n/a | | | | | |'
return f'''# Benchmark Report\n\n## 一页结论\n- 模型版本:{model_version}\n- 数据版本:{data_version}\n- 核心结论:top1={report.get('top1')} top5={report.get('topk')}\n- 是否通过上线门禁:TBD\n\n## 1. 评测范围图\n\n```mermaid\nflowchart LR\n A[{model_version}] --> B[{data_version}]\n A --> C[Scenario Buckets]\n A --> D[Latency / Ops]\n```\n\n## 2. 指标表\n\n| Bucket | top1 | top5 | MRR | FAR | Notes |\n|---|---:|---:|---:|---:|---|\n{rows_text}\n\n## 3. 文字分析\n- 最强项:clean/augmented buckets if present\n- 最弱项:see hard-case summary\n- 与上一版本对比:TBD\n\n## 4. 细节附录\n- 原始 JSON 报告:embedded source\n\n## Sources\n- docs/industrial-benchmark-spec.md\n'''
def model_card_md(model_version: str, config: dict, benchmark_path: str) -> str:
model_cfg = config.get('model', {})
return f'''# Model Card\n\n## 一页结论\n- 模型名称:ACR Hybrid Encoder\n- 版本:{model_version}\n- 适用场景:music ACR prototype / retrieval\n- 不适用场景:未经白名单数据验证的生产商用全量上线\n\n## 1. 模型结构图\n\n```mermaid\nflowchart LR\n A[Input Audio] --> B[128 Mel + BandSplit]\n B --> C[Encoder]\n C --> D[Embedding]\n D --> E[Hybrid Retrieval]\n```\n\n## 2. 关键信息表\n\n| 项 | 内容 |\n|---|---|\n| embed_dim | {model_cfg.get('embed_dim')} |\n| channels | {model_cfg.get('channels')} |\n| n_mels | {model_cfg.get('n_mels')} |\n| use_band_split | {model_cfg.get('use_band_split')} |\n| benchmark report | {benchmark_path} |\n\n## 3. 文字说明\n- 训练方式:retrieval-oriented pair training\n- 模型限制:hard-case accuracy still evolving\n- 风险提示:requires whitelist-reviewed datasets for commercial deployment\n\n## 4. 细节附录\n- config embedded from source JSON\n\n## Sources\n- docs/dataset-spec.md\n- docs/benchmark-report-template.md\n'''
def release_checklist_md(model_version: str, benchmark_path: str, model_card_path: str) -> str:
return f'''# Release Checklist\n\n## 一页结论\n发布前必须同时满足:质量通过、合规通过、服务通过、文档齐全。\n\n## 1. 发布门禁图\n\n```mermaid\nflowchart TD\n A[{model_version}] --> B[Benchmark Pass]\n A --> C[License Review Pass]\n A --> D[Service Smoke Pass]\n A --> E[Docs Complete]\n```\n\n## 2. Checklist 表\n\n| 项目 | 状态 |\n|---|---|\n| benchmark report 已生成 | yes |\n| model card 已生成 | yes |\n| license registry 已更新 | pending |\n| service smoke test 通过 | yes |\n| dataset whitelist 已确认 | pending |\n| changelog 已更新 | pending |\n\n## 3. 文字说明\n- 当前用于工程治理与预发布检查,不代表已满足商用法律门槛。\n\n## 4. 细节附录\n- benchmark 报告路径:{benchmark_path}\n- model card 路径:{model_card_path}\n\n## Sources\n- docs/dataset-sources-and-licensing.md\n- docs/industrial-benchmark-spec.md\n'''
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--eval-json', required=True)
parser.add_argument('--config-json', required=True)
parser.add_argument('--output-dir', required=True)
parser.add_argument('--model-version', default='dev')
parser.add_argument('--data-version', default='synthetic')
args = parser.parse_args()
out_dir = Path(args.output_dir)
out_dir.mkdir(parents=True, exist_ok=True)
eval_report = load_json(args.eval_json)
config = load_json(args.config_json)
bench_path = out_dir / 'benchmark-report.md'
card_path = out_dir / 'model-card.md'
checklist_path = out_dir / 'release-checklist.md'
manifest_path = out_dir / 'artifact-manifest.json'
write_text(bench_path, benchmark_md(args.model_version, args.data_version, eval_report))
write_text(card_path, model_card_md(args.model_version, config, str(bench_path)))
write_text(checklist_path, release_checklist_md(args.model_version, str(bench_path), str(card_path)))
manifest = {
'generated_at': utc_now(),
'model_version': args.model_version,
'data_version': args.data_version,
'files': {
'benchmark_report': str(bench_path),
'model_card': str(card_path),
'release_checklist': str(checklist_path),
},
}
manifest_path.write_text(json.dumps(manifest, indent=2))
print(json.dumps(manifest, indent=2))
if __name__ == '__main__':
main()