llm_options.py 1.18 KB

Raw Blame History Permalink

from __future__ import annotations

from typing import Any


def chat_extra_body(config: dict[str, Any]) -> dict[str, Any]:
    if not _as_bool(config.get("enable_thinking", False)):
        return {"chat_template_kwargs": {"enable_thinking": False}}
    return {}


def chat_openai_kwargs(config: dict[str, Any]) -> dict[str, Any]:
    kwargs: dict[str, Any] = {}
    extra_body = chat_extra_body(config)
    if extra_body:
        kwargs["extra_body"] = extra_body
    if not _as_bool(config.get("http_keepalive", False)):
        try:
            import httpx
        except ImportError:
            return kwargs
        timeout_seconds = float(config.get("timeout_seconds", 600))
        limits = httpx.Limits(max_connections=1, max_keepalive_connections=0)
        timeout = httpx.Timeout(timeout_seconds)
        kwargs["http_client"] = httpx.Client(limits=limits, timeout=timeout)
        kwargs["http_async_client"] = httpx.AsyncClient(limits=limits, timeout=timeout)
    return kwargs


def _as_bool(value: Any) -> bool:
    if isinstance(value, bool):
        return value
    if isinstance(value, str):
        return value.strip().lower() in {"1", "true", "yes", "on"}
    return bool(value)