llm_options.py
1.18 KB
from __future__ import annotations
from typing import Any
def chat_extra_body(config: dict[str, Any]) -> dict[str, Any]:
if not _as_bool(config.get("enable_thinking", False)):
return {"chat_template_kwargs": {"enable_thinking": False}}
return {}
def chat_openai_kwargs(config: dict[str, Any]) -> dict[str, Any]:
kwargs: dict[str, Any] = {}
extra_body = chat_extra_body(config)
if extra_body:
kwargs["extra_body"] = extra_body
if not _as_bool(config.get("http_keepalive", False)):
try:
import httpx
except ImportError:
return kwargs
timeout_seconds = float(config.get("timeout_seconds", 600))
limits = httpx.Limits(max_connections=1, max_keepalive_connections=0)
timeout = httpx.Timeout(timeout_seconds)
kwargs["http_client"] = httpx.Client(limits=limits, timeout=timeout)
kwargs["http_async_client"] = httpx.AsyncClient(limits=limits, timeout=timeout)
return kwargs
def _as_bool(value: Any) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.strip().lower() in {"1", "true", "yes", "on"}
return bool(value)