Expose service readiness and cache state before scaling the API surface

Constraint: Industrializing the service path requires visibility into model/index availability and repeated-load behavior before adding heavier production features Rejected: Keep stateless per-request loading until later | Hides readiness problems and wastes time on repeated engine initialization Confidence: high Scope-risk: narrow Directive: Preserve /ready and /cache as low-cost operational probes even if the serving stack evolves behind them Tested: /usr/local/miniconda3/bin/python -m py_compile acr-engine/src/service/app.py; /usr/local/miniconda3/bin/python /tmp/test_service_readiness.py; /usr/local/miniconda3/bin/python /tmp/test_service_cache.py Not-tested: Live FastAPI HTTP serving and concurrent request behavior remain pending

Expose service readiness and cache state before scaling the API surface
Constraint: Industrializing the service path requires visibility into model/index availability and repeated-load behavior before adding heavier production features Rejected: Keep stateless per-request loading until later | Hides readiness problems and wastes time on repeated engine initialization Confidence: high Scope-risk: narrow Directive: Preserve /ready and /cache as low-cost operational probes even if the serving stack evolves behind them Tested: /usr/local/miniconda3/bin/python -m py_compile acr-engine/src/service/app.py; /usr/local/miniconda3/bin/python /tmp/test_service_readiness.py; /usr/local/miniconda3/bin/python /tmp/test_service_cache.py Not-tested: Live FastAPI HTTP serving and concurrent request behavior remain pending
cnb.bofCdSsphPA
Commit aa6e1583 ... aa6e15838077bf6e335ea24dcd492bc3fbf3d8e2 authored 2026-06-02 13:36:50 +0800 by cnb.bofCdSsphPA
Showing 3 changed files with 136 additions and 10 deletions
acr-engine/src/service/app.py
docs/CHANGELOG.md
docs/service-api.md
--- a/acr-engine/src/service/app.py
View file @aa6e158
+++ b/acr-engine/src/service/app.py
View file @aa6e158
+from __future__ import annotations
+
 from pathlib import Path
+from threading import Lock
 from typing import Optional

 import numpy as np
@@ -27,8 +30,10 @@ class BuildIndexRequest(BaseModel):
    device: Optional[str] = None


-app = FastAPI(title="ACR Service", version="0.2.0")
+app = FastAPI(title="ACR Service", version="0.3.0")
 settings = ServiceSettings()
+_engine_cache: dict[tuple[str, str, str, str], HybridEngine] = {}
+_cache_lock = Lock()


 def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_device=None):
@@ -40,7 +45,26 @@ def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_
    }


-def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) -> HybridEngine:
+def _readiness_snapshot(data_dir: str, model_path: str, index_prefix: str) -> dict:
+    chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl")
+    embs_path = f"{index_prefix}_embs.npy"
+    ids_path = f"{index_prefix}_ids.npy"
+    manifest_candidates = [str((Path(data_dir) / split).resolve()) for split in ["catalog.json", "train.json", "val.json", "test.json"] if (Path(data_dir) / split).exists()]
+    files = {
+        "data_dir": {"path": str(Path(data_dir).resolve()), "exists": Path(data_dir).exists()},
+        "model": {"path": str(Path(model_path).resolve()), "exists": Path(model_path).exists()},
+        "chromaprint_index": {"path": str(Path(chroma_path).resolve()), "exists": Path(chroma_path).exists()},
+        "embedding_index": {"path": str(Path(embs_path).resolve()), "exists": Path(embs_path).exists()},
+        "id_index": {"path": str(Path(ids_path).resolve()), "exists": Path(ids_path).exists()},
+    }
+    return {
+        "ready": all(item["exists"] for item in files.values()),
+        "files": files,
+        "manifests": manifest_candidates,
+    }
+
+
+def _load_engine_uncached(data_dir: str, model_path: str, index_prefix: str, device: str) -> HybridEngine:
    matcher = ChromaprintMatcher()
    chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl")
    if not Path(chroma_path).exists():
@@ -66,9 +90,46 @@ def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str)
    return engine


+def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) -> tuple[HybridEngine, bool]:
+    key = (str(Path(data_dir).resolve()), str(Path(model_path).resolve()), str(Path(index_prefix).resolve()), device)
+    with _cache_lock:
+        cached = _engine_cache.get(key)
+    if cached is not None:
+        return cached, True
+    engine = _load_engine_uncached(data_dir, model_path, index_prefix, device)
+    with _cache_lock:
+        _engine_cache[key] = engine
+    return engine, False
+
+
+def _cache_stats() -> dict:
+    with _cache_lock:
+        keys = list(_engine_cache.keys())
+    return {"engine_cache_size": len(keys), "cache_keys": keys}
+
+
 @app.get("/health")
 def health():
-    return {"status": "ok", "service": "acr", "version": "0.2.0"}
+    resolved = _resolve()
+    readiness = _readiness_snapshot(resolved["data_dir"], resolved["model_path"], resolved["index_prefix"])
+    return {
+        "status": "ok",
+        "service": "acr",
+        "version": "0.3.0",
+        "ready": readiness["ready"],
+    }
+
+
+@app.get("/ready")
+def ready():
+    resolved = _resolve()
+    readiness = _readiness_snapshot(resolved["data_dir"], resolved["model_path"], resolved["index_prefix"])
+    return {
+        "service": "acr",
+        "version": "0.3.0",
+        **readiness,
+        **_cache_stats(),
+    }


 @app.get("/config")
@@ -76,13 +137,23 @@ def config():
    return settings.model_dump()


+@app.get("/cache")
+def cache_status():
+    return _cache_stats()
+
+
 @app.post("/recognize")
 def recognize(req: RecognizeRequest):
    resolved = _resolve(req.data_dir, req.model_path, req.index_prefix, req.device)
    if not Path(req.query_path).exists():
        raise HTTPException(status_code=400, detail=f"Missing query file: {req.query_path}")
-    engine = _load_engine(**resolved)
-    return engine.recognize(req.query_path, top_n=req.top_n)
+    engine, cache_hit = _load_engine(**resolved)
+    result = engine.recognize(req.query_path, top_n=req.top_n)
+    return {
+        "cache_hit": cache_hit,
+        "resolved": resolved,
+        "result": result,
+    }


 @app.post("/index/build")
@@ -95,4 +166,9 @@ def build_index(req: BuildIndexRequest):
    out_dir.mkdir(parents=True, exist_ok=True)
    build_chroma_index(data_dir, out_dir)
    _, ref_embs, ref_ids = build_embedding_index(data_dir, Path(resolved["model_path"]), out_dir / "reference", resolved["device"])
-    return {"status": "ok", "num_reference_windows": len(ref_ids), "embedding_dim": int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0}
+    return {
+        "status": "ok",
+        "num_reference_windows": len(ref_ids),
+        "embedding_dim": int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0,
+        "output_dir": str(out_dir.resolve()),
+    }
--- a/docs/CHANGELOG.md
View file @aa6e158
+++ b/docs/CHANGELOG.md
View file @aa6e158
@@ -227,6 +227,32 @@



+
+### Stage: 服务就绪探针与缓存可见性
+
+完成项：
+- 增强 [acr-engine/src/service/app.py](../acr-engine/src/service/app.py)
+- 新增：
+  - `/ready`
+  - `/cache`
+- 为服务默认模型/索引/manifest 增加 readiness 快照
+- 为 `HybridEngine` 增加进程内缓存，避免同配置重复重载
+- 更新 [docs/service-api.md](./service-api.md)
+
+验证结果：
+- `/usr/local/miniconda3/bin/python -m py_compile src/service/app.py` 成功
+- 直接调用 `health()` / `ready()` / `cache_status()` 成功
+- 直接调用 `_load_engine()` 两次成功
+- 当前结果：
+  - 默认 service `ready=true`
+  - 首次加载 `cache_hit=false`
+  - 第二次加载 `cache_hit=true`
+  - `same_object=true`
+
+结论：
+- 服务层现在不再只是“能调接口”的骨架
+- 已具备最基本的就绪探针与缓存可见性，更接近工业级内网服务原型
+
 ### Stage: FMA 整包下载/解压脚手架

 完成项：
--- a/docs/service-api.md
View file @aa6e158
+++ b/docs/service-api.md
View file @aa6e158
@@ -7,9 +7,12 @@
 - 当前服务是工业化骨架，不是最终生产网关
 - 已提供最小可调用能力：
  1. health
-  2. config
-  3. recognize
-  4. index build
+  2. ready
+  3. config
+  4. cache
+  5. recognize
+  6. index build
+- 已补充：服务就绪探针、基础缓存可见性、索引/模型存在性检查
 - 下一阶段重点是：鉴权、异步任务、ANN 索引、监控、错误码规范化

 ---
@@ -19,11 +22,13 @@
 ```mermaid
 flowchart LR
    C[Client] --> H[/health]
+    C --> H2[/ready]
    C --> G[/config]
+    C --> C2[/cache]
    C --> R[/recognize]
    C --> I[/index/build]

-    R --> E[Hybrid Engine]
+    R --> E[Hybrid Engine Cache]
    I --> B[Index Builders]
 ```

@@ -35,6 +40,8 @@ flowchart LR
 |---|---|---|
 | `/health` | GET | 健康检查 |
 | `/config` | GET | 查看默认配置 |
+| `/ready` | GET | 查看模型/索引/manifest 是否就绪 |
+| `/cache` | GET | 查看当前 engine cache 状态 |
 | `/recognize` | POST | 输入 query，输出候选 |
 | `/index/build` | POST | 触发离线索引构建 |

@@ -93,5 +100,22 @@ sequenceDiagram
 ```


+### `/ready`
+返回：
+```json
+{
+  "service":"acr",
+  "version":"0.3.0",
+  "ready":true,
+  "files":{...},
+  "manifests":[...],
+  "engine_cache_size":0
+}
+```
+
+### `/cache`
+返回当前进程内 engine cache 统计。
+
+
 ## Sources
 - See [references-and-sources.md](./references-and-sources.md) for the current source map.