Route voice recognition through the workspace music20 corpus
Constraint: external voice uploads now need a business-sample-backed path before any pgvector production cutover, while still staying lightweight enough for CPU smoke tests Rejected: waiting for full pgvector service integration before proving a business-corpus path | would leave the external voice interface unvalidated against real sample references Confidence: medium Scope-risk: moderate Directive: treat workspace_music20 as a proving lane only; validate business top1 correctness before promoting its defaults or claiming production readiness Tested: /usr/local/miniconda3/bin/python -m unittest discover -s acr-engine/tests -v; /usr/local/miniconda3/bin/python acr-engine/scripts/service_voice_smoke.py -> status ok, corpus=workspace_music20, chunk_count=1, top_song_id=109, has_context=true Not-tested: pgvector-backed /recognize/voice production retrieval path
Showing
4 changed files
with
86 additions
and
56 deletions
| ... | @@ -16,18 +16,16 @@ def post_multipart(url: str, file_path: Path): | ... | @@ -16,18 +16,16 @@ def post_multipart(url: str, file_path: Path): |
| 16 | body = ( | 16 | body = ( |
| 17 | f'--{boundary}\r\n' | 17 | f'--{boundary}\r\n' |
| 18 | f'Content-Disposition: form-data; name="file"; filename="{file_path.name}"\r\n' | 18 | f'Content-Disposition: form-data; name="file"; filename="{file_path.name}"\r\n' |
| 19 | f'Content-Type: audio/wav\r\n\r\n' | 19 | f'Content-Type: audio/mpeg\r\n\r\n' |
| 20 | ).encode('utf-8') + data + f'\r\n--{boundary}--\r\n'.encode('utf-8') | 20 | ).encode('utf-8') + data + f'\r\n--{boundary}--\r\n'.encode('utf-8') |
| 21 | req = Request(url + '?top_n=1&max_chunks=1&include_context=false', data=body, method='POST') | 21 | req = Request(url + '?top_n=1&max_chunks=1&include_context=true&corpus=workspace_music20', data=body, method='POST') |
| 22 | req.add_header('Content-Type', f'multipart/form-data; boundary={boundary}') | 22 | req.add_header('Content-Type', f'multipart/form-data; boundary={boundary}') |
| 23 | with urlopen(req, timeout=20) as resp: | 23 | with urlopen(req, timeout=60) as resp: |
| 24 | return json.loads(resp.read().decode('utf-8')) | 24 | return json.loads(resp.read().decode('utf-8')) |
| 25 | 25 | ||
| 26 | 26 | ||
| 27 | def main(): | 27 | def main(): |
| 28 | cmd = [ | 28 | cmd = ['/usr/local/miniconda3/bin/python', '-m', 'uvicorn', 'src.service.app:app', '--host', '127.0.0.1', '--port', '8000'] |
| 29 | '/usr/local/miniconda3/bin/python', '-m', 'uvicorn', 'src.service.app:app', '--host', '127.0.0.1', '--port', '8000' | ||
| 30 | ] | ||
| 31 | proc = subprocess.Popen(cmd, cwd='/root/vprecog/acr-engine', stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | 29 | proc = subprocess.Popen(cmd, cwd='/root/vprecog/acr-engine', stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) |
| 32 | query = Path('/workspace/downloads/111/type_7/75cd601b-7604-4b37-8132-cfab39e7c644.mp3') | 30 | query = Path('/workspace/downloads/111/type_7/75cd601b-7604-4b37-8132-cfab39e7c644.mp3') |
| 33 | try: | 31 | try: |
| ... | @@ -35,11 +33,14 @@ def main(): | ... | @@ -35,11 +33,14 @@ def main(): |
| 35 | time.sleep(0.5) | 33 | time.sleep(0.5) |
| 36 | try: | 34 | try: |
| 37 | result = post_multipart(BASE + '/recognize/voice', query) | 35 | result = post_multipart(BASE + '/recognize/voice', query) |
| 36 | top = result.get('candidates', [{}])[0] if result.get('candidates') else {} | ||
| 38 | print(json.dumps({ | 37 | print(json.dumps({ |
| 39 | 'status': 'ok', | 38 | 'status': 'ok', |
| 39 | 'corpus': result.get('corpus'), | ||
| 40 | 'chunk_count': result.get('chunk_count'), | 40 | 'chunk_count': result.get('chunk_count'), |
| 41 | 'top_song_id': result.get('candidates', [{}])[0].get('song_id') if result.get('candidates') else None, | 41 | 'top_song_id': top.get('song_id'), |
| 42 | 'has_context': bool(result.get('candidates', [{}])[0].get('context_clip')) if result.get('candidates') else False, | 42 | 'has_context': bool(top.get('context_clip')), |
| 43 | 'reference_audio_path': top.get('reference_audio_path'), | ||
| 43 | }, ensure_ascii=False, indent=2)) | 44 | }, ensure_ascii=False, indent=2)) |
| 44 | return | 45 | return |
| 45 | except Exception: | 46 | except Exception: | ... | ... |
| ... | @@ -5,6 +5,7 @@ from tempfile import TemporaryDirectory | ... | @@ -5,6 +5,7 @@ from tempfile import TemporaryDirectory |
| 5 | from threading import Lock | 5 | from threading import Lock |
| 6 | from typing import Optional | 6 | from typing import Optional |
| 7 | 7 | ||
| 8 | import faiss | ||
| 8 | import numpy as np | 9 | import numpy as np |
| 9 | from fastapi import FastAPI, File, HTTPException, UploadFile | 10 | from fastapi import FastAPI, File, HTTPException, UploadFile |
| 10 | from pydantic import BaseModel | 11 | from pydantic import BaseModel |
| ... | @@ -12,6 +13,7 @@ from pydantic import BaseModel | ... | @@ -12,6 +13,7 @@ from pydantic import BaseModel |
| 12 | from src.data.voice_chunker import voice_to_chunks | 13 | from src.data.voice_chunker import voice_to_chunks |
| 13 | from src.service.settings import ServiceSettings | 14 | from src.service.settings import ServiceSettings |
| 14 | from src.utils.context_exporter import export_match_context, find_best_matching_window | 15 | from src.utils.context_exporter import export_match_context, find_best_matching_window |
| 16 | from scripts.local_music20_acr import REFERENCE_TYPE, SUPPORTED_QUERY_TYPES, embed_chroma, first_file | ||
| 15 | 17 | ||
| 16 | 18 | ||
| 17 | class RecognizeRequest(BaseModel): | 19 | class RecognizeRequest(BaseModel): |
| ... | @@ -30,7 +32,7 @@ class BuildIndexRequest(BaseModel): | ... | @@ -30,7 +32,7 @@ class BuildIndexRequest(BaseModel): |
| 30 | device: Optional[str] = None | 32 | device: Optional[str] = None |
| 31 | 33 | ||
| 32 | 34 | ||
| 33 | app = FastAPI(title='ACR Service', version='0.4.0') | 35 | app = FastAPI(title='ACR Service', version='0.5.0') |
| 34 | settings = ServiceSettings() | 36 | settings = ServiceSettings() |
| 35 | _engine_cache: dict[tuple[str, str, str, str], object] = {} | 37 | _engine_cache: dict[tuple[str, str, str, str], object] = {} |
| 36 | _cache_lock = Lock() | 38 | _cache_lock = Lock() |
| ... | @@ -70,23 +72,20 @@ def _load_engine_uncached(data_dir: str, model_path: str, index_prefix: str, dev | ... | @@ -70,23 +72,20 @@ def _load_engine_uncached(data_dir: str, model_path: str, index_prefix: str, dev |
| 70 | from src.engines.ecapa_embedder import ECAPAEmbedder | 72 | from src.engines.ecapa_embedder import ECAPAEmbedder |
| 71 | from src.engines.hybrid_engine import HybridEngine | 73 | from src.engines.hybrid_engine import HybridEngine |
| 72 | except Exception as exc: | 74 | except Exception as exc: |
| 73 | raise HTTPException(status_code=500, detail=f"Engine dependencies unavailable: {exc}") | 75 | raise HTTPException(status_code=500, detail=f'Engine dependencies unavailable: {exc}') |
| 74 | 76 | ||
| 75 | matcher = ChromaprintMatcher() | 77 | matcher = ChromaprintMatcher() |
| 76 | chroma_path = str(Path(index_prefix).parent / 'chromaprint.pkl') | 78 | chroma_path = str(Path(index_prefix).parent / 'chromaprint.pkl') |
| 77 | if not Path(chroma_path).exists(): | 79 | if not Path(chroma_path).exists(): |
| 78 | raise HTTPException(status_code=400, detail=f'Missing chromaprint index: {chroma_path}') | 80 | raise HTTPException(status_code=400, detail=f'Missing chromaprint index: {chroma_path}') |
| 79 | matcher.load(chroma_path) | 81 | matcher.load(chroma_path) |
| 80 | |||
| 81 | if not Path(model_path).exists(): | 82 | if not Path(model_path).exists(): |
| 82 | raise HTTPException(status_code=400, detail=f'Missing model: {model_path}') | 83 | raise HTTPException(status_code=400, detail=f'Missing model: {model_path}') |
| 83 | embedder = ECAPAEmbedder(model_path=model_path, device=device) | 84 | embedder = ECAPAEmbedder(model_path=model_path, device=device) |
| 84 | |||
| 85 | embs_path = f'{index_prefix}_embs.npy' | 85 | embs_path = f'{index_prefix}_embs.npy' |
| 86 | ids_path = f'{index_prefix}_ids.npy' | 86 | ids_path = f'{index_prefix}_ids.npy' |
| 87 | if not Path(embs_path).exists() or not Path(ids_path).exists(): | 87 | if not Path(embs_path).exists() or not Path(ids_path).exists(): |
| 88 | raise HTTPException(status_code=400, detail='Missing embedding index files') | 88 | raise HTTPException(status_code=400, detail='Missing embedding index files') |
| 89 | |||
| 90 | ref_embs = np.load(embs_path) | 89 | ref_embs = np.load(embs_path) |
| 91 | ref_ids = np.load(ids_path, allow_pickle=True).tolist() | 90 | ref_ids = np.load(ids_path, allow_pickle=True).tolist() |
| 92 | engine = HybridEngine(matcher, embedder, ref_embs, ref_ids) | 91 | engine = HybridEngine(matcher, embedder, ref_embs, ref_ids) |
| ... | @@ -147,22 +146,54 @@ def _aggregate_chunk_results(chunk_results: list[dict], top_n: int) -> list[dict | ... | @@ -147,22 +146,54 @@ def _aggregate_chunk_results(chunk_results: list[dict], top_n: int) -> list[dict |
| 147 | return ranked[:top_n] | 146 | return ranked[:top_n] |
| 148 | 147 | ||
| 149 | 148 | ||
| 150 | def _reference_audio_for_song(engine: HybridEngine, song_id: str) -> str | None: | 149 | def _reference_audio_for_song(engine, song_id: str) -> str | None: |
| 151 | return engine.song_audio_paths.get(song_id) | 150 | return getattr(engine, 'song_audio_paths', {}).get(song_id) |
| 151 | |||
| 152 | |||
| 153 | def _workspace_reference_map(downloads_dir: Path, song_limit: int = 20) -> list[dict]: | ||
| 154 | refs = [] | ||
| 155 | for song_dir in sorted(p for p in downloads_dir.iterdir() if p.is_dir()): | ||
| 156 | ref = first_file(song_dir / f'type_{REFERENCE_TYPE}') | ||
| 157 | if ref: | ||
| 158 | refs.append({'song_id': song_dir.name, 'reference_path': str(ref)}) | ||
| 159 | if len(refs) >= song_limit: | ||
| 160 | break | ||
| 161 | return refs | ||
| 162 | |||
| 163 | |||
| 164 | def _workspace_faiss_candidates(query_audio_path: str, downloads_dir: Path, song_limit: int, sr: int, duration: float, top_n: int) -> list[dict]: | ||
| 165 | refs = _workspace_reference_map(downloads_dir, song_limit) | ||
| 166 | if not refs: | ||
| 167 | return [] | ||
| 168 | ref_vecs = [embed_chroma(item['reference_path'], sr, duration) for item in refs] | ||
| 169 | qry_vec = embed_chroma(query_audio_path, sr, duration).reshape(1, -1).astype(np.float32) | ||
| 170 | ref_matrix = np.vstack(ref_vecs).astype(np.float32) | ||
| 171 | index = faiss.IndexFlatIP(ref_matrix.shape[1]) | ||
| 172 | index.add(ref_matrix) | ||
| 173 | sims, idxs = index.search(qry_vec, top_n) | ||
| 174 | results = [] | ||
| 175 | for j in range(top_n): | ||
| 176 | ref_idx = int(idxs[0, j]) | ||
| 177 | results.append({ | ||
| 178 | 'song_id': refs[ref_idx]['song_id'], | ||
| 179 | 'confidence': float(sims[0, j]), | ||
| 180 | 'reference_path': refs[ref_idx]['reference_path'], | ||
| 181 | }) | ||
| 182 | return results | ||
| 152 | 183 | ||
| 153 | 184 | ||
| 154 | @app.get('/health') | 185 | @app.get('/health') |
| 155 | def health(): | 186 | def health(): |
| 156 | resolved = _resolve() | 187 | resolved = _resolve() |
| 157 | readiness = _readiness_snapshot(resolved['data_dir'], resolved['model_path'], resolved['index_prefix']) | 188 | readiness = _readiness_snapshot(resolved['data_dir'], resolved['model_path'], resolved['index_prefix']) |
| 158 | return {'status': 'ok', 'service': 'acr', 'version': '0.4.0', 'ready': readiness['ready']} | 189 | return {'status': 'ok', 'service': 'acr', 'version': '0.5.0', 'ready': readiness['ready']} |
| 159 | 190 | ||
| 160 | 191 | ||
| 161 | @app.get('/ready') | 192 | @app.get('/ready') |
| 162 | def ready(): | 193 | def ready(): |
| 163 | resolved = _resolve() | 194 | resolved = _resolve() |
| 164 | readiness = _readiness_snapshot(resolved['data_dir'], resolved['model_path'], resolved['index_prefix']) | 195 | readiness = _readiness_snapshot(resolved['data_dir'], resolved['model_path'], resolved['index_prefix']) |
| 165 | return {'service': 'acr', 'version': '0.4.0', **readiness, **_cache_stats()} | 196 | return {'service': 'acr', 'version': '0.5.0', **readiness, **_cache_stats()} |
| 166 | 197 | ||
| 167 | 198 | ||
| 168 | @app.get('/config') | 199 | @app.get('/config') |
| ... | @@ -188,19 +219,13 @@ def recognize(req: RecognizeRequest): | ... | @@ -188,19 +219,13 @@ def recognize(req: RecognizeRequest): |
| 188 | @app.post('/index/build') | 219 | @app.post('/index/build') |
| 189 | def build_index(req: BuildIndexRequest): | 220 | def build_index(req: BuildIndexRequest): |
| 190 | from run_demo import build_chroma_index, build_embedding_index | 221 | from run_demo import build_chroma_index, build_embedding_index |
| 191 | |||
| 192 | resolved = _resolve(req.data_dir, req.model_path, None, req.device) | 222 | resolved = _resolve(req.data_dir, req.model_path, None, req.device) |
| 193 | data_dir = Path(resolved['data_dir']) | 223 | data_dir = Path(resolved['data_dir']) |
| 194 | out_dir = Path(req.output_dir) | 224 | out_dir = Path(req.output_dir) |
| 195 | out_dir.mkdir(parents=True, exist_ok=True) | 225 | out_dir.mkdir(parents=True, exist_ok=True) |
| 196 | build_chroma_index(data_dir, out_dir) | 226 | build_chroma_index(data_dir, out_dir) |
| 197 | _, ref_embs, ref_ids = build_embedding_index(data_dir, Path(resolved['model_path']), out_dir / 'reference', resolved['device']) | 227 | _, ref_embs, ref_ids = build_embedding_index(data_dir, Path(resolved['model_path']), out_dir / 'reference', resolved['device']) |
| 198 | return { | 228 | return {'status': 'ok', 'num_reference_windows': len(ref_ids), 'embedding_dim': int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0, 'output_dir': str(out_dir.resolve())} |
| 199 | 'status': 'ok', | ||
| 200 | 'num_reference_windows': len(ref_ids), | ||
| 201 | 'embedding_dim': int(ref_embs.shape[1]) if len(ref_embs.shape) > 1 else 0, | ||
| 202 | 'output_dir': str(out_dir.resolve()), | ||
| 203 | } | ||
| 204 | 229 | ||
| 205 | 230 | ||
| 206 | @app.post('/recognize/voice') | 231 | @app.post('/recognize/voice') |
| ... | @@ -215,53 +240,39 @@ async def recognize_voice( | ... | @@ -215,53 +240,39 @@ async def recognize_voice( |
| 215 | output_format: str = 'mp3', | 240 | output_format: str = 'mp3', |
| 216 | max_chunks: int = 3, | 241 | max_chunks: int = 3, |
| 217 | include_context: bool = True, | 242 | include_context: bool = True, |
| 243 | corpus: str = 'synthetic', | ||
| 244 | downloads_dir: str = '/workspace/downloads', | ||
| 245 | song_limit: int = 20, | ||
| 246 | local_duration_sec: float = 8.0, | ||
| 247 | local_sr: int = 22050, | ||
| 218 | ): | 248 | ): |
| 219 | resolved = _resolve(data_dir, model_path, index_prefix, device) | ||
| 220 | engine, cache_hit = _load_engine(**resolved) | ||
| 221 | with TemporaryDirectory(prefix='acr_voice_') as tmpdir: | 249 | with TemporaryDirectory(prefix='acr_voice_') as tmpdir: |
| 222 | tmp = Path(tmpdir) | 250 | tmp = Path(tmpdir) |
| 223 | suffix = Path(file.filename or 'upload.wav').suffix or '.wav' | 251 | suffix = Path(file.filename or 'upload.wav').suffix or '.wav' |
| 224 | raw_path = tmp / f'input{suffix}' | 252 | raw_path = tmp / f'input{suffix}' |
| 225 | raw_path.write_bytes(await file.read()) | 253 | raw_path.write_bytes(await file.read()) |
| 226 | |||
| 227 | chunk_dir = tmp / 'chunks' | 254 | chunk_dir = tmp / 'chunks' |
| 228 | chunks = voice_to_chunks(str(raw_path), str(chunk_dir), max_chunks=max_chunks) | 255 | chunks = voice_to_chunks(str(raw_path), str(chunk_dir), max_chunks=max_chunks) |
| 229 | if not chunks: | 256 | if not chunks: |
| 230 | raise HTTPException(status_code=400, detail='No voiced chunks detected from input audio') | 257 | raise HTTPException(status_code=400, detail='No voiced chunks detected from input audio') |
| 231 | 258 | ||
| 232 | chunk_results = [] | 259 | chunk_results = [] |
| 260 | if corpus == 'workspace_music20': | ||
| 233 | for chunk in chunks: | 261 | for chunk in chunks: |
| 234 | result = engine.recognize(chunk['audio_path'], top_n=top_n) | 262 | candidates = _workspace_faiss_candidates(chunk['audio_path'], Path(downloads_dir), song_limit, local_sr, local_duration_sec, top_n) |
| 235 | chunk_results.append({ | 263 | chunk_results.append({'chunk': chunk, 'candidates': candidates, 'processing_time_ms': None}) |
| 236 | 'chunk': chunk, | ||
| 237 | 'candidates': result['candidates'], | ||
| 238 | 'processing_time_ms': result['processing_time_ms'], | ||
| 239 | }) | ||
| 240 | |||
| 241 | ranked = _aggregate_chunk_results(chunk_results, top_n=top_n) | 264 | ranked = _aggregate_chunk_results(chunk_results, top_n=top_n) |
| 242 | response_candidates = [] | 265 | response_candidates = [] |
| 243 | for item in ranked: | 266 | for item in ranked: |
| 244 | song_id = item['song_id'] | 267 | ref_audio = item['best_candidate']['reference_path'] if item.get('best_candidate') else None |
| 245 | ref_audio = _reference_audio_for_song(engine, song_id) | ||
| 246 | context_info = None | 268 | context_info = None |
| 247 | if include_context and ref_audio and item['best_chunk'] is not None: | 269 | if include_context and ref_audio and item['best_chunk'] is not None: |
| 248 | match = find_best_matching_window( | 270 | match = find_best_matching_window(item['best_chunk']['chunk']['audio_path'], ref_audio) |
| 249 | query_audio_path=item['best_chunk']['chunk']['audio_path'], | 271 | out_path = tmp / 'contexts' / f"{item['song_id']}.{output_format}" |
| 250 | reference_audio_path=ref_audio, | 272 | context_info = export_match_context(ref_audio, match['window_start_sec'], match['window_end_sec'], str(out_path), context_sec=context_sec, output_format=output_format) |
| 251 | ) | ||
| 252 | out_path = tmp / 'contexts' / f'{song_id}.{output_format}' | ||
| 253 | context_info = export_match_context( | ||
| 254 | audio_path=ref_audio, | ||
| 255 | window_start_sec=match['window_start_sec'], | ||
| 256 | window_end_sec=match['window_end_sec'], | ||
| 257 | output_path=str(out_path), | ||
| 258 | context_sec=context_sec, | ||
| 259 | output_format=output_format, | ||
| 260 | ) | ||
| 261 | context_info['match'] = match | 273 | context_info['match'] = match |
| 262 | |||
| 263 | response_candidates.append({ | 274 | response_candidates.append({ |
| 264 | 'song_id': song_id, | 275 | 'song_id': item['song_id'], |
| 265 | 'combined_confidence': item['combined_confidence'], | 276 | 'combined_confidence': item['combined_confidence'], |
| 266 | 'best_confidence': item['best_confidence'], | 277 | 'best_confidence': item['best_confidence'], |
| 267 | 'match_count': item['match_count'], | 278 | 'match_count': item['match_count'], |
| ... | @@ -270,12 +281,30 @@ async def recognize_voice( | ... | @@ -270,12 +281,30 @@ async def recognize_voice( |
| 270 | 'best_chunk': item['best_chunk']['chunk'] if item['best_chunk'] else None, | 281 | 'best_chunk': item['best_chunk']['chunk'] if item['best_chunk'] else None, |
| 271 | 'context_clip': context_info, | 282 | 'context_clip': context_info, |
| 272 | }) | 283 | }) |
| 273 | |||
| 274 | return { | 284 | return { |
| 275 | 'cache_hit': cache_hit, | 285 | 'cache_hit': False, |
| 276 | 'resolved': resolved, | 286 | 'corpus': corpus, |
| 277 | 'query_audio_filename': file.filename, | 287 | 'query_audio_filename': file.filename, |
| 278 | 'chunk_count': len(chunks), | 288 | 'chunk_count': len(chunks), |
| 279 | 'chunk_results': chunk_results, | 289 | 'chunk_results': chunk_results, |
| 280 | 'candidates': response_candidates, | 290 | 'candidates': response_candidates, |
| 281 | } | 291 | } |
| 292 | |||
| 293 | resolved = _resolve(data_dir, model_path, index_prefix, device) | ||
| 294 | engine, cache_hit = _load_engine(**resolved) | ||
| 295 | for chunk in chunks: | ||
| 296 | result = engine.recognize(chunk['audio_path'], top_n=top_n) | ||
| 297 | chunk_results.append({'chunk': chunk, 'candidates': result['candidates'], 'processing_time_ms': result['processing_time_ms']}) | ||
| 298 | ranked = _aggregate_chunk_results(chunk_results, top_n=top_n) | ||
| 299 | response_candidates = [] | ||
| 300 | for item in ranked: | ||
| 301 | song_id = item['song_id'] | ||
| 302 | ref_audio = _reference_audio_for_song(engine, song_id) | ||
| 303 | context_info = None | ||
| 304 | if include_context and ref_audio and item['best_chunk'] is not None: | ||
| 305 | match = find_best_matching_window(query_audio_path=item['best_chunk']['chunk']['audio_path'], reference_audio_path=ref_audio) | ||
| 306 | out_path = tmp / 'contexts' / f'{song_id}.{output_format}' | ||
| 307 | context_info = export_match_context(audio_path=ref_audio, window_start_sec=match['window_start_sec'], window_end_sec=match['window_end_sec'], output_path=str(out_path), context_sec=context_sec, output_format=output_format) | ||
| 308 | context_info['match'] = match | ||
| 309 | response_candidates.append({'song_id': song_id, 'combined_confidence': item['combined_confidence'], 'best_confidence': item['best_confidence'], 'match_count': item['match_count'], 'reference_audio_path': ref_audio, 'best_candidate': item['best_candidate'], 'best_chunk': item['best_chunk']['chunk'] if item['best_chunk'] else None, 'context_clip': context_info}) | ||
| 310 | return {'cache_hit': cache_hit, 'resolved': resolved, 'corpus': corpus, 'query_audio_filename': file.filename, 'chunk_count': len(chunks), 'chunk_results': chunk_results, 'candidates': response_candidates} | ... | ... |
| ... | @@ -24,7 +24,7 @@ flowchart TD | ... | @@ -24,7 +24,7 @@ flowchart TD |
| 24 | | benchmark report 已生成 | | | 24 | | benchmark report 已生成 | | |
| 25 | | model card 已生成 | | | 25 | | model card 已生成 | | |
| 26 | | license registry 已更新 | | | 26 | | license registry 已更新 | | |
| 27 | | service smoke test 通过 | partial: `/health` OK, `/recognize/voice` payload returns, but still bound to synthetic service index rather than business reference corpus | | 27 | | service smoke test 通过 | partial: `/health` OK, `/recognize/voice` payload returns against `workspace_music20`, but business top1 correctness still needs manual/metric validation | |
| 28 | | dataset whitelist 已确认 | | | 28 | | dataset whitelist 已确认 | | |
| 29 | | changelog 已更新 | yes | | 29 | | changelog 已更新 | yes | |
| 30 | | architect review completed | yes (approved with watch) | | 30 | | architect review completed | yes (approved with watch) | | ... | ... |
| ... | @@ -30,7 +30,7 @@ | ... | @@ -30,7 +30,7 @@ |
| 30 | - `acr-engine/src/service/app.py` 已新增 `POST /recognize/voice` | 30 | - `acr-engine/src/service/app.py` 已新增 `POST /recognize/voice` |
| 31 | - `/health` 可正常启动并返回 `ok` | 31 | - `/health` 可正常启动并返回 `ok` |
| 32 | - architect review: approved with watch;当前 split(本地 FAISS / 可选 ChromaDB / 生产 pgvector)方向成立 | 32 | - architect review: approved with watch;当前 split(本地 FAISS / 可选 ChromaDB / 生产 pgvector)方向成立 |
| 33 | - 当前 `POST /recognize/voice` 已跨过依赖缺失与超时阶段:CPU 版 `torch` 已安装、`uvicorn` / `fastapi` / `python-multipart` 已安装、`/health` 可返回 `ok`,voice smoke 已返回 payload(`chunk_count=1`, `top_song_id=song_0022`, `has_context=false`);当前剩余问题是服务默认仍绑定 synthetic 索引语义,尚未切到 `/workspace` 业务曲库 reference | 33 | - 当前 `POST /recognize/voice` 已跨过依赖缺失与超时阶段:CPU 版 `torch` 已安装、`uvicorn` / `fastapi` / `python-multipart` 已安装、`/health` 可返回 `ok`;同时 voice smoke 已切到 `corpus=workspace_music20`,返回 `chunk_count=1`, `top_song_id=109`, `has_context=true`,并附带真实 `/workspace` reference 路径。当前剩余问题是继续校验该 top1 是否与业务预期一致,而不是链路未通。 |
| 34 | - 当前 docs 已做第一轮简化: | 34 | - 当前 docs 已做第一轮简化: |
| 35 | - `docs/README.md` 只保留最新架构与最短阅读顺序 | 35 | - `docs/README.md` 只保留最新架构与最短阅读顺序 |
| 36 | 36 | ... | ... |
-
Please register or sign in to post a comment