Commit 83a3f89f 83a3f89f5fe7f8fe3c81f2a13e2e1a2604069838 by cnb.bofCdSsphPA

Recover stalled real-dataset transfer with a durable background resume path

Constraint: Long FMA archive downloads cannot rely on fragile foreground execution if Ralph-style work must continue across sessions
Rejected: Keep manually reissuing foreground download commands after stalls | Increases interruption risk and weakens resumability evidence
Confidence: high
Scope-risk: narrow
Directive: Prefer prepare_fma_archive.py bg-download for future large archive recovery so PID and log evidence remain standardized
Tested: /usr/local/miniconda3/bin/python acr-engine/scripts/prepare_fma_archive.py bg-download; /usr/local/miniconda3/bin/python acr-engine/scripts/prepare_fma_archive.py inspect; tail -n 40 /tmp/fma_modelscope_download.log
Not-tested: Full archive completion, extraction, and real-data smoke remain pending
1 parent 730d9b90
...@@ -37,6 +37,28 @@ def download(resume: bool = True) -> dict: ...@@ -37,6 +37,28 @@ def download(resume: bool = True) -> dict:
37 } 37 }
38 38
39 39
40 def bg_download(log_path: Path, resume: bool = True) -> dict:
41 ARCHIVE_PATH.parent.mkdir(parents=True, exist_ok=True)
42 log_path.parent.mkdir(parents=True, exist_ok=True)
43 cmd = ["nohup", "curl", "-L"]
44 if resume:
45 cmd += ["--continue-at", "-"]
46 cmd += ["--output", str(ARCHIVE_PATH), FMA_SMALL_URL]
47 shell_cmd = " ".join(cmd) + f" >> {log_path} 2>&1 & echo $!"
48 proc = subprocess.run(["bash", "-lc", shell_cmd], text=True, capture_output=True)
49 pid = proc.stdout.strip()
50 return {
51 "action": "bg-download",
52 "returncode": proc.returncode,
53 "pid": pid,
54 "log_path": str(log_path.resolve()),
55 "archive_path": str(ARCHIVE_PATH.resolve()),
56 "archive_exists": ARCHIVE_PATH.exists(),
57 "archive_size": ARCHIVE_PATH.stat().st_size if ARCHIVE_PATH.exists() else 0,
58 "stderr_tail": proc.stderr[-1200:],
59 }
60
61
40 def inspect() -> dict: 62 def inspect() -> dict:
41 archive_exists = ARCHIVE_PATH.exists() 63 archive_exists = ARCHIVE_PATH.exists()
42 extract_exists = EXTRACT_DIR.exists() 64 extract_exists = EXTRACT_DIR.exists()
...@@ -95,6 +117,10 @@ def main(): ...@@ -95,6 +117,10 @@ def main():
95 p = sub.add_parser("download") 117 p = sub.add_parser("download")
96 p.add_argument("--no-resume", action="store_true") 118 p.add_argument("--no-resume", action="store_true")
97 119
120 p = sub.add_parser("bg-download")
121 p.add_argument("--no-resume", action="store_true")
122 p.add_argument("--log-path", default="/tmp/fma_modelscope_download.log")
123
98 sub.add_parser("inspect") 124 sub.add_parser("inspect")
99 125
100 p = sub.add_parser("extract") 126 p = sub.add_parser("extract")
...@@ -103,6 +129,8 @@ def main(): ...@@ -103,6 +129,8 @@ def main():
103 args = parser.parse_args() 129 args = parser.parse_args()
104 if args.cmd == "download": 130 if args.cmd == "download":
105 result = download(resume=not args.no_resume) 131 result = download(resume=not args.no_resume)
132 elif args.cmd == "bg-download":
133 result = bg_download(Path(args.log_path), resume=not args.no_resume)
106 elif args.cmd == "inspect": 134 elif args.cmd == "inspect":
107 result = inspect() 135 result = inspect()
108 elif args.cmd == "extract": 136 elif args.cmd == "extract":
......
...@@ -231,6 +231,31 @@ ...@@ -231,6 +231,31 @@
231 231
232 232
233 233
234
235 ### Stage: FMA 后台续传恢复
236
237 完成项:
238 -[acr-engine/scripts/prepare_fma_archive.py](../acr-engine/scripts/prepare_fma_archive.py) 新增 `bg-download`
239 - 使用 `nohup curl` + 日志文件的方式增强大文件后台续传稳定性
240 - 在发现下载停滞后,切换到新的后台恢复路径并重新托管 ModelScope 下载
241
242 验证结果:
243 - `/usr/local/miniconda3/bin/python scripts/prepare_fma_archive.py bg-download` 成功
244 - 当前返回:
245 - `returncode=0`
246 - `pid=47175`
247 - `log_path=/tmp/fma_modelscope_download.log`
248 - 重新 inspect 后结果:
249 - `archive_size``61550592` 增长到 `71835648`
250 - `archive_progress_percent=0.9354`
251 - 日志验证:
252 - `Resuming transfer from byte position 61550592`
253 - 当前吞吐已达到 MB/s 级别
254
255 结论:
256 - FMA 真实数据下载不再依赖脆弱的一次性前台命令
257 - 当前已恢复到可持续的后台续传状态,后续 session 更容易接力
258
234 ### Stage: FMA 下载进度可视化 259 ### Stage: FMA 下载进度可视化
235 260
236 完成项: 261 完成项:
......