Expose service readiness and cache state before scaling the API surface
Constraint: Industrializing the service path requires visibility into model/index availability and repeated-load behavior before adding heavier production features Rejected: Keep stateless per-request loading until later | Hides readiness problems and wastes time on repeated engine initialization Confidence: high Scope-risk: narrow Directive: Preserve /ready and /cache as low-cost operational probes even if the serving stack evolves behind them Tested: /usr/local/miniconda3/bin/python -m py_compile acr-engine/src/service/app.py; /usr/local/miniconda3/bin/python /tmp/test_service_readiness.py; /usr/local/miniconda3/bin/python /tmp/test_service_cache.py Not-tested: Live FastAPI HTTP serving and concurrent request behavior remain pending
Showing
3 changed files
with
136 additions
and
10 deletions
| 1 | from __future__ import annotations | ||
| 2 | |||
| 1 | from pathlib import Path | 3 | from pathlib import Path |
| 4 | from threading import Lock | ||
| 2 | from typing import Optional | 5 | from typing import Optional |
| 3 | 6 | ||
| 4 | import numpy as np | 7 | import numpy as np |
| ... | @@ -27,8 +30,10 @@ class BuildIndexRequest(BaseModel): | ... | @@ -27,8 +30,10 @@ class BuildIndexRequest(BaseModel): |
| 27 | device: Optional[str] = None | 30 | device: Optional[str] = None |
| 28 | 31 | ||
| 29 | 32 | ||
| 30 | app = FastAPI(title="ACR Service", version="0.2.0") | 33 | app = FastAPI(title="ACR Service", version="0.3.0") |
| 31 | settings = ServiceSettings() | 34 | settings = ServiceSettings() |
| 35 | _engine_cache: dict[tuple[str, str, str, str], HybridEngine] = {} | ||
| 36 | _cache_lock = Lock() | ||
| 32 | 37 | ||
| 33 | 38 | ||
| 34 | def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_device=None): | 39 | def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_device=None): |
| ... | @@ -40,7 +45,26 @@ def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_ | ... | @@ -40,7 +45,26 @@ def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_ |
| 40 | } | 45 | } |
| 41 | 46 | ||
| 42 | 47 | ||
| 43 | def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) -> HybridEngine: | 48 | def _readiness_snapshot(data_dir: str, model_path: str, index_prefix: str) -> dict: |
| 49 | chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl") | ||
| 50 | embs_path = f"{index_prefix}_embs.npy" | ||
| 51 | ids_path = f"{index_prefix}_ids.npy" | ||
| 52 | manifest_candidates = [str((Path(data_dir) / split).resolve()) for split in ["catalog.json", "train.json", "val.json", "test.json"] if (Path(data_dir) / split).exists()] | ||
| 53 | files = { | ||
| 54 | "data_dir": {"path": str(Path(data_dir).resolve()), "exists": Path(data_dir).exists()}, | ||
| 55 | "model": {"path": str(Path(model_path).resolve()), "exists": Path(model_path).exists()}, | ||
| 56 | "chromaprint_index": {"path": str(Path(chroma_path).resolve()), "exists": Path(chroma_path).exists()}, | ||
| 57 | "embedding_index": {"path": str(Path(embs_path).resolve()), "exists": Path(embs_path).exists()}, | ||
| 58 | "id_index": {"path": str(Path(ids_path).resolve()), "exists": Path(ids_path).exists()}, | ||
| 59 | } | ||
| 60 | return { | ||
| 61 | "ready": all(item["exists"] for item in files.values()), | ||
| 62 | "files": files, | ||
| 63 | "manifests": manifest_candidates, | ||
| 64 | } | ||
| 65 | |||
| 66 | |||
| 67 | def _load_engine_uncached(data_dir: str, model_path: str, index_prefix: str, device: str) -> HybridEngine: | ||
| 44 | matcher = ChromaprintMatcher() | 68 | matcher = ChromaprintMatcher() |
| 45 | chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl") | 69 | chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl") |
| 46 | if not Path(chroma_path).exists(): | 70 | if not Path(chroma_path).exists(): |
| ... | @@ -66,9 +90,46 @@ def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) | ... | @@ -66,9 +90,46 @@ def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) |
| 66 | return engine | 90 | return engine |
| 67 | 91 | ||
| 68 | 92 | ||
| 93 | def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) -> tuple[HybridEngine, bool]: | ||
| 94 | key = (str(Path(data_dir).resolve()), str(Path(model_path).resolve()), str(Path(index_prefix).resolve()), device) | ||
| 95 | with _cache_lock: | ||
| 96 | cached = _engine_cache.get(key) | ||
| 97 | if cached is not None: | ||
| 98 | return cached, True | ||
| 99 | engine = _load_engine_uncached(data_dir, model_path, index_prefix, device) | ||
| 100 | with _cache_lock: | ||
| 101 | _engine_cache[key] = engine | ||
| 102 | return engine, False | ||
| 103 | |||
| 104 | |||
| 105 | def _cache_stats() -> dict: | ||
| 106 | with _cache_lock: | ||
| 107 | keys = list(_engine_cache.keys()) | ||
| 108 | return {"engine_cache_size": len(keys), "cache_keys": keys} | ||
| 109 | |||
| 110 | |||
| 69 | @app.get("/health") | 111 | @app.get("/health") |
| 70 | def health(): | 112 | def health(): |
| 71 | return {"status": "ok", "service": "acr", "version": "0.2.0"} | 113 | resolved = _resolve() |
| 114 | readiness = _readiness_snapshot(resolved["data_dir"], resolved["model_path"], resolved["index_prefix"]) | ||
| 115 | return { | ||
| 116 | "status": "ok", | ||
| 117 | "service": "acr", | ||
| 118 | "version": "0.3.0", | ||
| 119 | "ready": readiness["ready"], | ||
| 120 | } | ||
| 121 | |||
| 122 | |||
| 123 | @app.get("/ready") | ||
| 124 | def ready(): | ||
| 125 | resolved = _resolve() | ||
| 126 | readiness = _readiness_snapshot(resolved["data_dir"], resolved["model_path"], resolved["index_prefix"]) | ||
| 127 | return { | ||
| 128 | "service": "acr", | ||
| 129 | "version": "0.3.0", | ||
| 130 | **readiness, | ||
| 131 | **_cache_stats(), | ||
| 132 | } | ||
| 72 | 133 | ||
| 73 | 134 | ||
| 74 | @app.get("/config") | 135 | @app.get("/config") |
| ... | @@ -76,13 +137,23 @@ def config(): | ... | @@ -76,13 +137,23 @@ def config(): |
| 76 | return settings.model_dump() | 137 | return settings.model_dump() |
| 77 | 138 | ||
| 78 | 139 | ||
| 140 | @app.get("/cache") | ||
| 141 | def cache_status(): | ||
| 142 | return _cache_stats() | ||
| 143 | |||
| 144 | |||
| 79 | @app.post("/recognize") | 145 | @app.post("/recognize") |
| 80 | def recognize(req: RecognizeRequest): | 146 | def recognize(req: RecognizeRequest): |
| 81 | resolved = _resolve(req.data_dir, req.model_path, req.index_prefix, req.device) | 147 | resolved = _resolve(req.data_dir, req.model_path, req.index_prefix, req.device) |
| 82 | if not Path(req.query_path).exists(): | 148 | if not Path(req.query_path).exists(): |
| 83 | raise HTTPException(status_code=400, detail=f"Missing query file: {req.query_path}") | 149 | raise HTTPException(status_code=400, detail=f"Missing query file: {req.query_path}") |
| 84 | engine = _load_engine(**resolved) | 150 | engine, cache_hit = _load_engine(**resolved) |
| 85 | return engine.recognize(req.query_path, top_n=req.top_n) | 151 | result = engine.recognize(req.query_path, top_n=req.top_n) |
| 152 | return { | ||
| 153 | "cache_hit": cache_hit, | ||
| 154 | "resolved": resolved, | ||
| 155 | "result": result, | ||
| 156 | } | ||
| 86 | 157 | ||
| 87 | 158 | ||
| 88 | @app.post("/index/build") | 159 | @app.post("/index/build") |
| ... | @@ -95,4 +166,9 @@ def build_index(req: BuildIndexRequest): | ... | @@ -95,4 +166,9 @@ def build_index(req: BuildIndexRequest): |
| 95 | out_dir.mkdir(parents=True, exist_ok=True) | 166 | out_dir.mkdir(parents=True, exist_ok=True) |
| 96 | build_chroma_index(data_dir, out_dir) | 167 | build_chroma_index(data_dir, out_dir) |
| 97 | _, ref_embs, ref_ids = build_embedding_index(data_dir, Path(resolved["model_path"]), out_dir / "reference", resolved["device"]) | 168 | _, ref_embs, ref_ids = build_embedding_index(data_dir, Path(resolved["model_path"]), out_dir / "reference", resolved["device"]) |
| 98 | return {"status": "ok", "num_reference_windows": len(ref_ids), "embedding_dim": int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0} | 169 | return { |
| 170 | "status": "ok", | ||
| 171 | "num_reference_windows": len(ref_ids), | ||
| 172 | "embedding_dim": int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0, | ||
| 173 | "output_dir": str(out_dir.resolve()), | ||
| 174 | } | ... | ... |
| ... | @@ -227,6 +227,32 @@ | ... | @@ -227,6 +227,32 @@ |
| 227 | 227 | ||
| 228 | 228 | ||
| 229 | 229 | ||
| 230 | |||
| 231 | ### Stage: 服务就绪探针与缓存可见性 | ||
| 232 | |||
| 233 | 完成项: | ||
| 234 | - 增强 [acr-engine/src/service/app.py](../acr-engine/src/service/app.py) | ||
| 235 | - 新增: | ||
| 236 | - `/ready` | ||
| 237 | - `/cache` | ||
| 238 | - 为服务默认模型/索引/manifest 增加 readiness 快照 | ||
| 239 | - 为 `HybridEngine` 增加进程内缓存,避免同配置重复重载 | ||
| 240 | - 更新 [docs/service-api.md](./service-api.md) | ||
| 241 | |||
| 242 | 验证结果: | ||
| 243 | - `/usr/local/miniconda3/bin/python -m py_compile src/service/app.py` 成功 | ||
| 244 | - 直接调用 `health()` / `ready()` / `cache_status()` 成功 | ||
| 245 | - 直接调用 `_load_engine()` 两次成功 | ||
| 246 | - 当前结果: | ||
| 247 | - 默认 service `ready=true` | ||
| 248 | - 首次加载 `cache_hit=false` | ||
| 249 | - 第二次加载 `cache_hit=true` | ||
| 250 | - `same_object=true` | ||
| 251 | |||
| 252 | 结论: | ||
| 253 | - 服务层现在不再只是“能调接口”的骨架 | ||
| 254 | - 已具备最基本的就绪探针与缓存可见性,更接近工业级内网服务原型 | ||
| 255 | |||
| 230 | ### Stage: FMA 整包下载/解压脚手架 | 256 | ### Stage: FMA 整包下载/解压脚手架 |
| 231 | 257 | ||
| 232 | 完成项: | 258 | 完成项: | ... | ... |
| ... | @@ -7,9 +7,12 @@ | ... | @@ -7,9 +7,12 @@ |
| 7 | - 当前服务是工业化骨架,不是最终生产网关 | 7 | - 当前服务是工业化骨架,不是最终生产网关 |
| 8 | - 已提供最小可调用能力: | 8 | - 已提供最小可调用能力: |
| 9 | 1. health | 9 | 1. health |
| 10 | 2. config | 10 | 2. ready |
| 11 | 3. recognize | 11 | 3. config |
| 12 | 4. index build | 12 | 4. cache |
| 13 | 5. recognize | ||
| 14 | 6. index build | ||
| 15 | - 已补充:服务就绪探针、基础缓存可见性、索引/模型存在性检查 | ||
| 13 | - 下一阶段重点是:鉴权、异步任务、ANN 索引、监控、错误码规范化 | 16 | - 下一阶段重点是:鉴权、异步任务、ANN 索引、监控、错误码规范化 |
| 14 | 17 | ||
| 15 | --- | 18 | --- |
| ... | @@ -19,11 +22,13 @@ | ... | @@ -19,11 +22,13 @@ |
| 19 | ```mermaid | 22 | ```mermaid |
| 20 | flowchart LR | 23 | flowchart LR |
| 21 | C[Client] --> H[/health] | 24 | C[Client] --> H[/health] |
| 25 | C --> H2[/ready] | ||
| 22 | C --> G[/config] | 26 | C --> G[/config] |
| 27 | C --> C2[/cache] | ||
| 23 | C --> R[/recognize] | 28 | C --> R[/recognize] |
| 24 | C --> I[/index/build] | 29 | C --> I[/index/build] |
| 25 | 30 | ||
| 26 | R --> E[Hybrid Engine] | 31 | R --> E[Hybrid Engine Cache] |
| 27 | I --> B[Index Builders] | 32 | I --> B[Index Builders] |
| 28 | ``` | 33 | ``` |
| 29 | 34 | ||
| ... | @@ -35,6 +40,8 @@ flowchart LR | ... | @@ -35,6 +40,8 @@ flowchart LR |
| 35 | |---|---|---| | 40 | |---|---|---| |
| 36 | | `/health` | GET | 健康检查 | | 41 | | `/health` | GET | 健康检查 | |
| 37 | | `/config` | GET | 查看默认配置 | | 42 | | `/config` | GET | 查看默认配置 | |
| 43 | | `/ready` | GET | 查看模型/索引/manifest 是否就绪 | | ||
| 44 | | `/cache` | GET | 查看当前 engine cache 状态 | | ||
| 38 | | `/recognize` | POST | 输入 query,输出候选 | | 45 | | `/recognize` | POST | 输入 query,输出候选 | |
| 39 | | `/index/build` | POST | 触发离线索引构建 | | 46 | | `/index/build` | POST | 触发离线索引构建 | |
| 40 | 47 | ||
| ... | @@ -93,5 +100,22 @@ sequenceDiagram | ... | @@ -93,5 +100,22 @@ sequenceDiagram |
| 93 | ``` | 100 | ``` |
| 94 | 101 | ||
| 95 | 102 | ||
| 103 | ### `/ready` | ||
| 104 | 返回: | ||
| 105 | ```json | ||
| 106 | { | ||
| 107 | "service":"acr", | ||
| 108 | "version":"0.3.0", | ||
| 109 | "ready":true, | ||
| 110 | "files":{...}, | ||
| 111 | "manifests":[...], | ||
| 112 | "engine_cache_size":0 | ||
| 113 | } | ||
| 114 | ``` | ||
| 115 | |||
| 116 | ### `/cache` | ||
| 117 | 返回当前进程内 engine cache 统计。 | ||
| 118 | |||
| 119 | |||
| 96 | ## Sources | 120 | ## Sources |
| 97 | - See [references-and-sources.md](./references-and-sources.md) for the current source map. | 121 | - See [references-and-sources.md](./references-and-sources.md) for the current source map. | ... | ... |
-
Please register or sign in to post a comment