Commit aa6e1583 aa6e15838077bf6e335ea24dcd492bc3fbf3d8e2 by cnb.bofCdSsphPA

Expose service readiness and cache state before scaling the API surface

Constraint: Industrializing the service path requires visibility into model/index availability and repeated-load behavior before adding heavier production features
Rejected: Keep stateless per-request loading until later | Hides readiness problems and wastes time on repeated engine initialization
Confidence: high
Scope-risk: narrow
Directive: Preserve /ready and /cache as low-cost operational probes even if the serving stack evolves behind them
Tested: /usr/local/miniconda3/bin/python -m py_compile acr-engine/src/service/app.py; /usr/local/miniconda3/bin/python /tmp/test_service_readiness.py; /usr/local/miniconda3/bin/python /tmp/test_service_cache.py
Not-tested: Live FastAPI HTTP serving and concurrent request behavior remain pending
1 parent 2b389caa
from __future__ import annotations
from pathlib import Path
from threading import Lock
from typing import Optional
import numpy as np
......@@ -27,8 +30,10 @@ class BuildIndexRequest(BaseModel):
device: Optional[str] = None
app = FastAPI(title="ACR Service", version="0.2.0")
app = FastAPI(title="ACR Service", version="0.3.0")
settings = ServiceSettings()
_engine_cache: dict[tuple[str, str, str, str], HybridEngine] = {}
_cache_lock = Lock()
def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_device=None):
......@@ -40,7 +45,26 @@ def _resolve(req_data_dir=None, req_model_path=None, req_index_prefix=None, req_
}
def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) -> HybridEngine:
def _readiness_snapshot(data_dir: str, model_path: str, index_prefix: str) -> dict:
chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl")
embs_path = f"{index_prefix}_embs.npy"
ids_path = f"{index_prefix}_ids.npy"
manifest_candidates = [str((Path(data_dir) / split).resolve()) for split in ["catalog.json", "train.json", "val.json", "test.json"] if (Path(data_dir) / split).exists()]
files = {
"data_dir": {"path": str(Path(data_dir).resolve()), "exists": Path(data_dir).exists()},
"model": {"path": str(Path(model_path).resolve()), "exists": Path(model_path).exists()},
"chromaprint_index": {"path": str(Path(chroma_path).resolve()), "exists": Path(chroma_path).exists()},
"embedding_index": {"path": str(Path(embs_path).resolve()), "exists": Path(embs_path).exists()},
"id_index": {"path": str(Path(ids_path).resolve()), "exists": Path(ids_path).exists()},
}
return {
"ready": all(item["exists"] for item in files.values()),
"files": files,
"manifests": manifest_candidates,
}
def _load_engine_uncached(data_dir: str, model_path: str, index_prefix: str, device: str) -> HybridEngine:
matcher = ChromaprintMatcher()
chroma_path = str(Path(index_prefix).parent / "chromaprint.pkl")
if not Path(chroma_path).exists():
......@@ -66,9 +90,46 @@ def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str)
return engine
def _load_engine(data_dir: str, model_path: str, index_prefix: str, device: str) -> tuple[HybridEngine, bool]:
key = (str(Path(data_dir).resolve()), str(Path(model_path).resolve()), str(Path(index_prefix).resolve()), device)
with _cache_lock:
cached = _engine_cache.get(key)
if cached is not None:
return cached, True
engine = _load_engine_uncached(data_dir, model_path, index_prefix, device)
with _cache_lock:
_engine_cache[key] = engine
return engine, False
def _cache_stats() -> dict:
with _cache_lock:
keys = list(_engine_cache.keys())
return {"engine_cache_size": len(keys), "cache_keys": keys}
@app.get("/health")
def health():
return {"status": "ok", "service": "acr", "version": "0.2.0"}
resolved = _resolve()
readiness = _readiness_snapshot(resolved["data_dir"], resolved["model_path"], resolved["index_prefix"])
return {
"status": "ok",
"service": "acr",
"version": "0.3.0",
"ready": readiness["ready"],
}
@app.get("/ready")
def ready():
resolved = _resolve()
readiness = _readiness_snapshot(resolved["data_dir"], resolved["model_path"], resolved["index_prefix"])
return {
"service": "acr",
"version": "0.3.0",
**readiness,
**_cache_stats(),
}
@app.get("/config")
......@@ -76,13 +137,23 @@ def config():
return settings.model_dump()
@app.get("/cache")
def cache_status():
return _cache_stats()
@app.post("/recognize")
def recognize(req: RecognizeRequest):
resolved = _resolve(req.data_dir, req.model_path, req.index_prefix, req.device)
if not Path(req.query_path).exists():
raise HTTPException(status_code=400, detail=f"Missing query file: {req.query_path}")
engine = _load_engine(**resolved)
return engine.recognize(req.query_path, top_n=req.top_n)
engine, cache_hit = _load_engine(**resolved)
result = engine.recognize(req.query_path, top_n=req.top_n)
return {
"cache_hit": cache_hit,
"resolved": resolved,
"result": result,
}
@app.post("/index/build")
......@@ -95,4 +166,9 @@ def build_index(req: BuildIndexRequest):
out_dir.mkdir(parents=True, exist_ok=True)
build_chroma_index(data_dir, out_dir)
_, ref_embs, ref_ids = build_embedding_index(data_dir, Path(resolved["model_path"]), out_dir / "reference", resolved["device"])
return {"status": "ok", "num_reference_windows": len(ref_ids), "embedding_dim": int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0}
return {
"status": "ok",
"num_reference_windows": len(ref_ids),
"embedding_dim": int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0,
"output_dir": str(out_dir.resolve()),
}
......
......@@ -227,6 +227,32 @@
### Stage: 服务就绪探针与缓存可见性
完成项:
- 增强 [acr-engine/src/service/app.py](../acr-engine/src/service/app.py)
- 新增:
- `/ready`
- `/cache`
- 为服务默认模型/索引/manifest 增加 readiness 快照
-`HybridEngine` 增加进程内缓存,避免同配置重复重载
- 更新 [docs/service-api.md](./service-api.md)
验证结果:
- `/usr/local/miniconda3/bin/python -m py_compile src/service/app.py` 成功
- 直接调用 `health()` / `ready()` / `cache_status()` 成功
- 直接调用 `_load_engine()` 两次成功
- 当前结果:
- 默认 service `ready=true`
- 首次加载 `cache_hit=false`
- 第二次加载 `cache_hit=true`
- `same_object=true`
结论:
- 服务层现在不再只是“能调接口”的骨架
- 已具备最基本的就绪探针与缓存可见性,更接近工业级内网服务原型
### Stage: FMA 整包下载/解压脚手架
完成项:
......
......@@ -7,9 +7,12 @@
- 当前服务是工业化骨架,不是最终生产网关
- 已提供最小可调用能力:
1. health
2. config
3. recognize
4. index build
2. ready
3. config
4. cache
5. recognize
6. index build
- 已补充:服务就绪探针、基础缓存可见性、索引/模型存在性检查
- 下一阶段重点是:鉴权、异步任务、ANN 索引、监控、错误码规范化
---
......@@ -19,11 +22,13 @@
```mermaid
flowchart LR
C[Client] --> H[/health]
C --> H2[/ready]
C --> G[/config]
C --> C2[/cache]
C --> R[/recognize]
C --> I[/index/build]
R --> E[Hybrid Engine]
R --> E[Hybrid Engine Cache]
I --> B[Index Builders]
```
......@@ -35,6 +40,8 @@ flowchart LR
|---|---|---|
| `/health` | GET | 健康检查 |
| `/config` | GET | 查看默认配置 |
| `/ready` | GET | 查看模型/索引/manifest 是否就绪 |
| `/cache` | GET | 查看当前 engine cache 状态 |
| `/recognize` | POST | 输入 query,输出候选 |
| `/index/build` | POST | 触发离线索引构建 |
......@@ -93,5 +100,22 @@ sequenceDiagram
```
### `/ready`
返回:
```json
{
"service":"acr",
"version":"0.3.0",
"ready":true,
"files":{...},
"manifests":[...],
"engine_cache_size":0
}
```
### `/cache`
返回当前进程内 engine cache 统计。
## Sources
- See [references-and-sources.md](./references-and-sources.md) for the current source map.
......