Commit aa6e1583 aa6e15838077bf6e335ea24dcd492bc3fbf3d8e2 by cnb.bofCdSsphPA

Expose service readiness and cache state before scaling the API surface

Constraint: Industrializing the service path requires visibility into model/index availability and repeated-load behavior before adding heavier production features
Rejected: Keep stateless per-request loading until later | Hides readiness problems and wastes time on repeated engine initialization
Confidence: high
Scope-risk: narrow
Directive: Preserve /ready and /cache as low-cost operational probes even if the serving stack evolves behind them
Tested: /usr/local/miniconda3/bin/python -m py_compile acr-engine/src/service/app.py; /usr/local/miniconda3/bin/python /tmp/test_service_readiness.py; /usr/local/miniconda3/bin/python /tmp/test_service_cache.py
Not-tested: Live FastAPI HTTP serving and concurrent request behavior remain pending
1 parent 2b389caa
1 from __future__ import annotations
2
1 from pathlib import Path 3 from pathlib import Path
4 from threading import Lock
2 from typing import Optional 5 from typing import Optional
3 6
4 import numpy as np 7 import numpy as np
...@@ -27,8 +30,10 @@ class BuildIndexRequest(BaseModel): ...@@ -27,8 +30,10 @@ class BuildIndexRequest(BaseModel):
27 device: Optional[str] = None 30 device: Optional[str] = None
28 31
29 32
30 app = FastAPI(title="ACR Service", version="0.2.0") 33 app = FastAPI(title="ACR Service", version="0.3.0")
31 settings = ServiceSettings() 34 settings = ServiceSettings()
35 _engine_cache: dict[tuple[str, str, str, str], HybridEngine] = {}
36 _cache_lock = Lock()
32 37
33 38
34 def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_device=None): 39 def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_device=None):
...@@ -40,7 +45,26 @@ def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_ ...@@ -40,7 +45,26 @@ def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_
40 } 45 }
41 46
42 47
43 def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) -> HybridEngine: 48 def _readiness_snapshot(data_dir: str, model_path: str, index_prefix: str) -> dict:
49 chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl")
50 embs_path = f"{index_prefix}_embs.npy"
51 ids_path = f"{index_prefix}_ids.npy"
52 manifest_candidates = [str((Path(data_dir) / split).resolve()) for split in ["catalog.json", "train.json", "val.json", "test.json"] if (Path(data_dir) / split).exists()]
53 files = {
54 "data_dir": {"path": str(Path(data_dir).resolve()), "exists": Path(data_dir).exists()},
55 "model": {"path": str(Path(model_path).resolve()), "exists": Path(model_path).exists()},
56 "chromaprint_index": {"path": str(Path(chroma_path).resolve()), "exists": Path(chroma_path).exists()},
57 "embedding_index": {"path": str(Path(embs_path).resolve()), "exists": Path(embs_path).exists()},
58 "id_index": {"path": str(Path(ids_path).resolve()), "exists": Path(ids_path).exists()},
59 }
60 return {
61 "ready": all(item["exists"] for item in files.values()),
62 "files": files,
63 "manifests": manifest_candidates,
64 }
65
66
67 def _load_engine_uncached(data_dir: str, model_path: str, index_prefix: str, device: str) -> HybridEngine:
44 matcher = ChromaprintMatcher() 68 matcher = ChromaprintMatcher()
45 chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl") 69 chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl")
46 if not Path(chroma_path).exists(): 70 if not Path(chroma_path).exists():
...@@ -66,9 +90,46 @@ def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) ...@@ -66,9 +90,46 @@ def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str)
66 return engine 90 return engine
67 91
68 92
93 def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) -> tuple[HybridEngine, bool]:
94 key = (str(Path(data_dir).resolve()), str(Path(model_path).resolve()), str(Path(index_prefix).resolve()), device)
95 with _cache_lock:
96 cached = _engine_cache.get(key)
97 if cached is not None:
98 return cached, True
99 engine = _load_engine_uncached(data_dir, model_path, index_prefix, device)
100 with _cache_lock:
101 _engine_cache[key] = engine
102 return engine, False
103
104
105 def _cache_stats() -> dict:
106 with _cache_lock:
107 keys = list(_engine_cache.keys())
108 return {"engine_cache_size": len(keys), "cache_keys": keys}
109
110
69 @app.get("/health") 111 @app.get("/health")
70 def health(): 112 def health():
71 return {"status": "ok", "service": "acr", "version": "0.2.0"} 113 resolved = _resolve()
114 readiness = _readiness_snapshot(resolved["data_dir"], resolved["model_path"], resolved["index_prefix"])
115 return {
116 "status": "ok",
117 "service": "acr",
118 "version": "0.3.0",
119 "ready": readiness["ready"],
120 }
121
122
123 @app.get("/ready")
124 def ready():
125 resolved = _resolve()
126 readiness = _readiness_snapshot(resolved["data_dir"], resolved["model_path"], resolved["index_prefix"])
127 return {
128 "service": "acr",
129 "version": "0.3.0",
130 **readiness,
131 **_cache_stats(),
132 }
72 133
73 134
74 @app.get("/config") 135 @app.get("/config")
...@@ -76,13 +137,23 @@ def config(): ...@@ -76,13 +137,23 @@ def config():
76 return settings.model_dump() 137 return settings.model_dump()
77 138
78 139
140 @app.get("/cache")
141 def cache_status():
142 return _cache_stats()
143
144
79 @app.post("/recognize") 145 @app.post("/recognize")
80 def recognize(req: RecognizeRequest): 146 def recognize(req: RecognizeRequest):
81 resolved = _resolve(req.data_dir, req.model_path, req.index_prefix, req.device) 147 resolved = _resolve(req.data_dir, req.model_path, req.index_prefix, req.device)
82 if not Path(req.query_path).exists(): 148 if not Path(req.query_path).exists():
83 raise HTTPException(status_code=400, detail=f"Missing query file: {req.query_path}") 149 raise HTTPException(status_code=400, detail=f"Missing query file: {req.query_path}")
84 engine = _load_engine(**resolved) 150 engine, cache_hit = _load_engine(**resolved)
85 return engine.recognize(req.query_path, top_n=req.top_n) 151 result = engine.recognize(req.query_path, top_n=req.top_n)
152 return {
153 "cache_hit": cache_hit,
154 "resolved": resolved,
155 "result": result,
156 }
86 157
87 158
88 @app.post("/index/build") 159 @app.post("/index/build")
...@@ -95,4 +166,9 @@ def build_index(req: BuildIndexRequest): ...@@ -95,4 +166,9 @@ def build_index(req: BuildIndexRequest):
95 out_dir.mkdir(parents=True, exist_ok=True) 166 out_dir.mkdir(parents=True, exist_ok=True)
96 build_chroma_index(data_dir, out_dir) 167 build_chroma_index(data_dir, out_dir)
97 _, ref_embs, ref_ids = build_embedding_index(data_dir, Path(resolved["model_path"]), out_dir / "reference", resolved["device"]) 168 _, ref_embs, ref_ids = build_embedding_index(data_dir, Path(resolved["model_path"]), out_dir / "reference", resolved["device"])
98 return {"status": "ok", "num_reference_windows": len(ref_ids), "embedding_dim": int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0} 169 return {
170 "status": "ok",
171 "num_reference_windows": len(ref_ids),
172 "embedding_dim": int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0,
173 "output_dir": str(out_dir.resolve()),
174 }
......
...@@ -227,6 +227,32 @@ ...@@ -227,6 +227,32 @@
227 227
228 228
229 229
230
231 ### Stage: 服务就绪探针与缓存可见性
232
233 完成项:
234 - 增强 [acr-engine/src/service/app.py](../acr-engine/src/service/app.py)
235 - 新增:
236 - `/ready`
237 - `/cache`
238 - 为服务默认模型/索引/manifest 增加 readiness 快照
239 -`HybridEngine` 增加进程内缓存,避免同配置重复重载
240 - 更新 [docs/service-api.md](./service-api.md)
241
242 验证结果:
243 - `/usr/local/miniconda3/bin/python -m py_compile src/service/app.py` 成功
244 - 直接调用 `health()` / `ready()` / `cache_status()` 成功
245 - 直接调用 `_load_engine()` 两次成功
246 - 当前结果:
247 - 默认 service `ready=true`
248 - 首次加载 `cache_hit=false`
249 - 第二次加载 `cache_hit=true`
250 - `same_object=true`
251
252 结论:
253 - 服务层现在不再只是“能调接口”的骨架
254 - 已具备最基本的就绪探针与缓存可见性,更接近工业级内网服务原型
255
230 ### Stage: FMA 整包下载/解压脚手架 256 ### Stage: FMA 整包下载/解压脚手架
231 257
232 完成项: 258 完成项:
......
...@@ -7,9 +7,12 @@ ...@@ -7,9 +7,12 @@
7 - 当前服务是工业化骨架,不是最终生产网关 7 - 当前服务是工业化骨架,不是最终生产网关
8 - 已提供最小可调用能力: 8 - 已提供最小可调用能力:
9 1. health 9 1. health
10 2. config 10 2. ready
11 3. recognize 11 3. config
12 4. index build 12 4. cache
13 5. recognize
14 6. index build
15 - 已补充:服务就绪探针、基础缓存可见性、索引/模型存在性检查
13 - 下一阶段重点是:鉴权、异步任务、ANN 索引、监控、错误码规范化 16 - 下一阶段重点是:鉴权、异步任务、ANN 索引、监控、错误码规范化
14 17
15 --- 18 ---
...@@ -19,11 +22,13 @@ ...@@ -19,11 +22,13 @@
19 ```mermaid 22 ```mermaid
20 flowchart LR 23 flowchart LR
21 C[Client] --> H[/health] 24 C[Client] --> H[/health]
25 C --> H2[/ready]
22 C --> G[/config] 26 C --> G[/config]
27 C --> C2[/cache]
23 C --> R[/recognize] 28 C --> R[/recognize]
24 C --> I[/index/build] 29 C --> I[/index/build]
25 30
26 R --> E[Hybrid Engine] 31 R --> E[Hybrid Engine Cache]
27 I --> B[Index Builders] 32 I --> B[Index Builders]
28 ``` 33 ```
29 34
...@@ -35,6 +40,8 @@ flowchart LR ...@@ -35,6 +40,8 @@ flowchart LR
35 |---|---|---| 40 |---|---|---|
36 | `/health` | GET | 健康检查 | 41 | `/health` | GET | 健康检查 |
37 | `/config` | GET | 查看默认配置 | 42 | `/config` | GET | 查看默认配置 |
43 | `/ready` | GET | 查看模型/索引/manifest 是否就绪 |
44 | `/cache` | GET | 查看当前 engine cache 状态 |
38 | `/recognize` | POST | 输入 query,输出候选 | 45 | `/recognize` | POST | 输入 query,输出候选 |
39 | `/index/build` | POST | 触发离线索引构建 | 46 | `/index/build` | POST | 触发离线索引构建 |
40 47
...@@ -93,5 +100,22 @@ sequenceDiagram ...@@ -93,5 +100,22 @@ sequenceDiagram
93 ``` 100 ```
94 101
95 102
103 ### `/ready`
104 返回:
105 ```json
106 {
107 "service":"acr",
108 "version":"0.3.0",
109 "ready":true,
110 "files":{...},
111 "manifests":[...],
112 "engine_cache_size":0
113 }
114 ```
115
116 ### `/cache`
117 返回当前进程内 engine cache 统计。
118
119
96 ## Sources 120 ## Sources
97 - See [references-and-sources.md](./references-and-sources.md) for the current source map. 121 - See [references-and-sources.md](./references-and-sources.md) for the current source map.
......