zhenxun_bot/zhenxun/services/llm/service.py
webjoin111 bba90e62db ♻️ refactor(llm): 重构 LLM 服务架构,引入中间件与组件化适配器
- 【重构】LLM 服务核心架构:
    - 引入中间件管道,统一处理请求生命周期(重试、密钥选择、日志、网络请求)。
    - 适配器重构为组件化设计,分离配置映射、消息转换、响应解析和工具序列化逻辑。
    - 移除 `with_smart_retry` 装饰器,其功能由中间件接管。
    - 移除 `LLMToolExecutor`,工具执行逻辑集成到 `ToolInvoker`。
- 【功能】增强配置系统:
    - `LLMGenerationConfig` 采用组件化结构(Core, Reasoning, Visual, Output, Safety, ToolConfig)。
    - 新增 `GenConfigBuilder` 提供语义化配置构建方式。
    - 新增 `LLMEmbeddingConfig` 用于嵌入专用配置。
    - `CommonOverrides` 迁移并更新至新配置结构。
- 【功能】强化工具系统:
    - 引入 `ToolInvoker` 实现更灵活的工具执行,支持回调与结构化错误。
    - `function_tool` 装饰器支持动态 Pydantic 模型创建和依赖注入 (`ToolParam`, `RunContext`)。
    - 平台原生工具支持 (`GeminiCodeExecution`, `GeminiGoogleSearch`, `GeminiUrlContext`)。
- 【功能】高级生成与嵌入:
    - `generate_structured` 方法支持 In-Context Validation and Repair (IVR) 循环和 AutoCoT (思维链) 包装。
    - 新增 `embed_query` 和 `embed_documents` 便捷嵌入 API。
    - `OpenAIImageAdapter` 支持 OpenAI 兼容的图像生成。
    - `SmartAdapter` 实现模型名称智能路由。
- 【重构】消息与类型系统:
    - `LLMContentPart` 扩展支持更多模态和代码执行相关内容。
    - `LLMMessage` 和 `LLMResponse` 结构更新,支持 `content_parts` 和思维链签名。
    - 统一 `LLMErrorCode` 和用户友好错误消息,提供更详细的网络/代理错误提示。
    - `pyproject.toml` 移除 `bilireq`,新增 `json_repair`。
- 【优化】日志与调试:
    - 引入 `DebugLogOptions`,提供细粒度日志脱敏控制。
    - 增强日志净化器,处理更多敏感数据和长字符串。
- 【清理】删除废弃模块:
    - `zhenxun/services/llm/memory.py`
    - `zhenxun/services/llm/executor.py`
    - `zhenxun/services/llm/config/presets.py`
    - `zhenxun/services/llm/types/content.py`
    - `zhenxun/services/llm/types/enums.py`
    - `zhenxun/services/llm/tools/__init__.py`
    - `zhenxun/services/llm/tools/manager.py`
2025-12-07 18:57:55 +08:00

813 lines
29 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
LLM 模型实现类
包含 LLM 模型的抽象基类和具体实现,负责与各种 AI 提供商的 API 交互。
"""
from abc import ABC, abstractmethod
import asyncio
from collections.abc import Awaitable, Callable
import json
import re
import time
from typing import Any, Literal, TypeVar, cast
import httpx
from pydantic import BaseModel, ConfigDict, Field
from zhenxun.services.log import logger
from zhenxun.utils.http_utils import AsyncHttpx
from zhenxun.utils.log_sanitizer import sanitize_for_logging
from zhenxun.utils.pydantic_compat import dump_json_safely
from .adapters.base import BaseAdapter, RequestData, process_image_data
from .config import LLMGenerationConfig
from .config.generation import LLMEmbeddingConfig
from .config.providers import get_llm_config
from .core import (
KeyStatusStore,
LLMHttpClient,
RetryConfig,
_should_retry_llm_error,
http_client_manager,
)
from .types import (
LLMErrorCode,
LLMException,
LLMMessage,
LLMResponse,
LLMToolCall,
ModelDetail,
ProviderConfig,
ToolChoice,
)
from .types.capabilities import ModelCapabilities, ModelModality
T = TypeVar("T", bound=BaseModel)
class LLMContext(BaseModel):
"""LLM 执行上下文,用于在中间件管道中传递请求状态"""
messages: list[LLMMessage]
config: LLMGenerationConfig | LLMEmbeddingConfig
tools: list[Any] | None
tool_choice: str | dict[str, Any] | ToolChoice | None
timeout: float | None
extra: dict[str, Any] = Field(default_factory=dict)
request_type: Literal["generation", "embedding"] = "generation"
runtime_state: dict[str, Any] = Field(
default_factory=dict,
description="中间件运行时的临时状态存储(api_key, retry_count等)",
)
model_config = ConfigDict(arbitrary_types_allowed=True)
NextCall = Callable[[LLMContext], Awaitable[LLMResponse]]
LLMMiddleware = Callable[[LLMContext, NextCall], Awaitable[LLMResponse]]
class BaseLLMMiddleware(ABC):
"""LLM 中间件抽象基类"""
@abstractmethod
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
"""
执行中间件逻辑
Args:
context: 请求上下文,包含配置和运行时状态
next_call: 调用链中的下一个处理函数
Returns:
LLMResponse: 模型响应结果
"""
pass
class LLMModelBase(ABC):
"""LLM模型抽象基类"""
@abstractmethod
async def generate_response(
self,
messages: list[LLMMessage],
config: LLMGenerationConfig | None = None,
tools: list[Any] | None = None,
tool_choice: str | dict[str, Any] | ToolChoice | None = None,
timeout: float | None = None,
) -> LLMResponse:
"""生成高级响应"""
pass
@abstractmethod
async def generate_embeddings(
self,
texts: list[str],
config: LLMEmbeddingConfig,
) -> list[list[float]]:
"""生成文本嵌入向量"""
pass
class LLMModel(LLMModelBase):
"""LLM 模型实现类"""
def __init__(
self,
provider_config: ProviderConfig,
model_detail: ModelDetail,
key_store: KeyStatusStore,
http_client: LLMHttpClient,
capabilities: ModelCapabilities,
config_override: LLMGenerationConfig | None = None,
):
self.provider_config = provider_config
self.model_detail = model_detail
self.key_store = key_store
self.http_client: LLMHttpClient = http_client
self.capabilities = capabilities
self._generation_config = config_override
self.provider_name = provider_config.name
self.api_type = provider_config.api_type
self.api_base = provider_config.api_base
self.api_keys = (
[provider_config.api_key]
if isinstance(provider_config.api_key, str)
else provider_config.api_key
)
self.model_name = model_detail.model_name
self.temperature = model_detail.temperature
self.max_tokens = model_detail.max_tokens
self._is_closed = False
self._ref_count = 0
self._middlewares: list[LLMMiddleware] = []
def _has_modality(self, modality: ModelModality, is_input: bool = True) -> bool:
target_set = (
self.capabilities.input_modalities
if is_input
else self.capabilities.output_modalities
)
return modality in target_set
@property
def can_process_images(self) -> bool:
"""检查模型是否支持图片作为输入。"""
return self._has_modality(ModelModality.IMAGE)
@property
def can_process_video(self) -> bool:
"""检查模型是否支持视频作为输入。"""
return self._has_modality(ModelModality.VIDEO)
@property
def can_process_audio(self) -> bool:
"""检查模型是否支持音频作为输入。"""
return self._has_modality(ModelModality.AUDIO)
@property
def can_generate_images(self) -> bool:
"""检查模型是否支持生成图片。"""
return self._has_modality(ModelModality.IMAGE, is_input=False)
@property
def can_generate_audio(self) -> bool:
"""检查模型是否支持生成音频 (TTS)。"""
return self._has_modality(ModelModality.AUDIO, is_input=False)
@property
def is_embedding_model(self) -> bool:
"""检查这是否是一个嵌入模型。"""
return self.capabilities.is_embedding_model
def add_middleware(self, middleware: LLMMiddleware) -> None:
"""注册一个中间件到处理管道的最外层"""
self._middlewares.append(middleware)
def _build_pipeline(self) -> NextCall:
"""
构建完整的中间件调用链。顺序为:
用户自定义中间件 -> Retry -> Logging -> KeySelection -> Network (终结者)
"""
from .adapters import get_adapter_for_api_type
client_settings = get_llm_config().client_settings
retry_config = RetryConfig(
max_retries=client_settings.max_retries,
retry_delay=client_settings.retry_delay,
)
adapter = get_adapter_for_api_type(self.api_type)
network_middleware = NetworkRequestMiddleware(self, adapter)
async def terminal_handler(ctx: LLMContext) -> LLMResponse:
async def _noop(_: LLMContext) -> LLMResponse:
raise RuntimeError("NetworkRequestMiddleware 不应调用 next_call")
return await network_middleware(ctx, _noop)
def _wrap(middleware: LLMMiddleware, next_call: NextCall) -> NextCall:
async def _handler(inner_ctx: LLMContext) -> LLMResponse:
return await middleware(inner_ctx, next_call)
return _handler
handler: NextCall = terminal_handler
handler = _wrap(
KeySelectionMiddleware(self.key_store, self.provider_name, self.api_keys),
handler,
)
handler = _wrap(
LoggingMiddleware(self.provider_name, self.model_name),
handler,
)
handler = _wrap(
RetryMiddleware(retry_config, self.key_store),
handler,
)
for middleware in reversed(self._middlewares):
handler = _wrap(middleware, handler)
return handler
def _get_effective_api_type(self) -> str:
"""
获取实际生效的 API 类型。
主要用于 Smart 模式下,判断日志净化应该使用哪种格式。
"""
if self.api_type != "smart":
return self.api_type
if self.model_detail.api_type:
return self.model_detail.api_type
if (
"gemini" in self.model_name.lower()
and "openai" not in self.model_name.lower()
):
return "gemini"
return "openai"
async def _get_http_client(self) -> LLMHttpClient:
"""获取HTTP客户端"""
if self.http_client.is_closed:
logger.debug(
f"LLMModel {self.provider_name}/{self.model_name} 的 HTTP 客户端已关闭,"
"正在获取新的客户端"
)
self.http_client = await http_client_manager.get_client(
self.provider_config
)
return self.http_client
async def _select_api_key(self, failed_keys: set[str] | None = None) -> str:
"""选择可用的API密钥使用轮询策略"""
if not self.api_keys:
raise LLMException(
f"提供商 {self.provider_name} 没有配置API密钥",
code=LLMErrorCode.NO_AVAILABLE_KEYS,
)
selected_key = await self.key_store.get_next_available_key(
self.provider_name, self.api_keys, failed_keys
)
if not selected_key:
raise LLMException(
f"提供商 {self.provider_name} 的所有API密钥当前都不可用",
code=LLMErrorCode.NO_AVAILABLE_KEYS,
details={
"total_keys": len(self.api_keys),
"failed_keys": len(failed_keys or set()),
},
)
return selected_key
async def close(self):
"""标记模型实例的当前使用周期结束"""
if self._is_closed:
return
self._is_closed = True
logger.debug(
f"LLMModel实例的使用周期已结束: {self} (共享HTTP客户端状态不受影响)"
)
async def __aenter__(self):
if self._is_closed:
logger.debug(
f"Re-entering context for closed LLMModel {self}. "
f"Resetting _is_closed to False."
)
self._is_closed = False
self._check_not_closed()
self._ref_count += 1
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""异步上下文管理器出口"""
_ = exc_type, exc_val, exc_tb
self._ref_count -= 1
if self._ref_count <= 0:
self._ref_count = 0
await self.close()
def _check_not_closed(self):
"""检查实例是否已关闭"""
if self._is_closed:
raise RuntimeError(f"LLMModel实例已关闭: {self}")
async def _execute_core_generation(self, context: LLMContext) -> LLMResponse:
"""
[内核] 执行核心生成逻辑:构建管道并执行。
此方法作为中间件管道的终点被调用。
"""
pipeline_handler = self._build_pipeline()
return await pipeline_handler(context)
async def generate_response(
self,
messages: list[LLMMessage],
config: LLMGenerationConfig | None = None,
tools: list[Any] | None = None,
tool_choice: str | dict[str, Any] | ToolChoice | None = None,
timeout: float | None = None,
) -> LLMResponse:
"""
生成高级响应 (支持中间件管道)。
"""
self._check_not_closed()
if self._generation_config and config:
final_request_config = self._generation_config.merge_with(config)
elif config:
final_request_config = config
else:
final_request_config = self._generation_config or LLMGenerationConfig()
normalized_tools: list[Any] | None = None
if tools:
if isinstance(tools, dict):
normalized_tools = list(tools.values())
elif isinstance(tools, list):
normalized_tools = tools
else:
normalized_tools = [tools]
context = LLMContext(
messages=messages,
config=final_request_config,
tools=normalized_tools,
tool_choice=tool_choice,
timeout=timeout,
)
return await self._execute_core_generation(context)
async def generate_embeddings(
self,
texts: list[str],
config: LLMEmbeddingConfig | None = None,
) -> list[list[float]]:
"""生成文本嵌入向量"""
self._check_not_closed()
if not texts:
return []
final_config = config or LLMEmbeddingConfig()
context = LLMContext(
messages=[],
config=final_config,
tools=None,
tool_choice=None,
timeout=None,
request_type="embedding",
extra={"texts": texts},
)
pipeline = self._build_pipeline()
response = await pipeline(context)
embeddings = (
response.cache_info.get("embeddings") if response.cache_info else None
)
if embeddings is None:
raise LLMException(
"嵌入请求未返回 embeddings 数据",
code=LLMErrorCode.EMBEDDING_FAILED,
)
return embeddings
def __str__(self) -> str:
status = "closed" if self._is_closed else "active"
return f"LLMModel({self.provider_name}/{self.model_name}, {status})"
def __repr__(self) -> str:
status = "closed" if self._is_closed else "active"
return (
f"LLMModel(provider={self.provider_name}, model={self.model_name}, "
f"api_type={self.api_type}, status={status})"
)
class RetryMiddleware(BaseLLMMiddleware):
"""
重试中间件:处理异常捕获与重试循环
"""
def __init__(self, retry_config: RetryConfig, key_store: KeyStatusStore):
self.retry_config = retry_config
self.key_store = key_store
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
last_exception: Exception | None = None
total_attempts = self.retry_config.max_retries + 1
for attempt in range(total_attempts):
try:
context.runtime_state["attempt"] = attempt + 1
return await next_call(context)
except LLMException as e:
last_exception = e
api_key = context.runtime_state.get("api_key")
if api_key:
status_code = e.details.get("status_code")
error_msg = f"({e.code.name}) {e.message}"
await self.key_store.record_failure(api_key, status_code, error_msg)
if not _should_retry_llm_error(
e, attempt, self.retry_config.max_retries
):
raise e
if attempt == total_attempts - 1:
raise e
wait_time = self.retry_config.retry_delay
if self.retry_config.exponential_backoff:
wait_time *= 2**attempt
logger.warning(
f"请求失败,{wait_time:.2f}秒后重试"
f" (第{attempt + 1}/{self.retry_config.max_retries}次重试): {e}"
)
await asyncio.sleep(wait_time)
except Exception as e:
logger.error(f"非预期异常,停止重试: {e}", e=e)
raise e
if last_exception:
raise last_exception
raise LLMException("重试循环异常结束")
class KeySelectionMiddleware(BaseLLMMiddleware):
"""
密钥选择中间件:负责轮询获取可用 API Key
"""
def __init__(
self, key_store: KeyStatusStore, provider_name: str, api_keys: list[str]
):
self.key_store = key_store
self.provider_name = provider_name
self.api_keys = api_keys
self._failed_keys: set[str] = set()
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
selected_key = await self.key_store.get_next_available_key(
self.provider_name, self.api_keys, exclude_keys=self._failed_keys
)
if not selected_key:
raise LLMException(
f"提供商 {self.provider_name} 无可用 API Key",
code=LLMErrorCode.NO_AVAILABLE_KEYS,
)
context.runtime_state["api_key"] = selected_key
try:
response = await next_call(context)
return response
except LLMException as e:
self._failed_keys.add(selected_key)
masked = f"{selected_key[:8]}..."
if isinstance(e.details, dict):
e.details["api_key"] = masked
raise e
class LoggingMiddleware(BaseLLMMiddleware):
"""
日志中间件:负责请求和响应的日志记录与脱敏
"""
def __init__(
self, provider_name: str, model_name: str, log_context: str = "Generation"
):
self.provider_name = provider_name
self.model_name = model_name
self.log_context = log_context
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
attempt = context.runtime_state.get("attempt", 1)
api_key = context.runtime_state.get("api_key", "unknown")
masked_key = f"{api_key[:8]}..."
logger.info(
f"🌐 发起LLM请求 (尝试 {attempt}) - {self.provider_name}/{self.model_name} "
f"[{self.log_context}] Key: {masked_key}"
)
try:
start_time = time.monotonic()
response = await next_call(context)
duration = (time.monotonic() - start_time) * 1000
logger.info(f"🎯 LLM响应成功 [{self.log_context}] 耗时: {duration:.2f}ms")
return response
except Exception as e:
logger.error(f"❌ 请求异常 [{self.log_context}]: {type(e).__name__} - {e}")
raise e
class NetworkRequestMiddleware(BaseLLMMiddleware):
"""
网络请求中间件:执行 Adapter 转换和 HTTP 请求
"""
def __init__(self, model_instance: "LLMModel", adapter: "BaseAdapter"):
self.model = model_instance
self.http_client = model_instance.http_client
self.adapter = adapter
self.key_store = model_instance.key_store
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
api_key = context.runtime_state["api_key"]
request_data: RequestData
gen_config: LLMGenerationConfig | None = None
embed_config: LLMEmbeddingConfig | None = None
if context.request_type == "embedding":
embed_config = cast(LLMEmbeddingConfig, context.config)
texts = (context.extra or {}).get("texts", [])
request_data = self.adapter.prepare_embedding_request(
model=self.model,
api_key=api_key,
texts=texts,
config=embed_config,
)
else:
gen_config = cast(LLMGenerationConfig, context.config)
request_data = await self.adapter.prepare_advanced_request(
model=self.model,
api_key=api_key,
messages=context.messages,
config=gen_config,
tools=context.tools,
tool_choice=context.tool_choice,
)
masked_key = (
f"{api_key[:8]}...{api_key[-4:] if len(api_key) > 12 else '***'}"
if api_key
else "N/A"
)
logger.debug(f"🔑 API密钥: {masked_key}")
logger.debug(f"📡 请求URL: {request_data.url}")
logger.debug(f"📋 请求头: {dict(request_data.headers)}")
if self.model.api_type == "smart":
effective_type = self.model._get_effective_api_type()
sanitizer_req_context = f"{effective_type}_request"
else:
sanitizer_req_context = self.adapter.log_sanitization_context
sanitized_body = sanitize_for_logging(
request_data.body, context=sanitizer_req_context
)
if request_data.files and isinstance(sanitized_body, dict):
file_info: list[str] = []
file_count = 0
if isinstance(request_data.files, list):
file_count = len(request_data.files)
for key, value in request_data.files:
filename = (
value[0]
if isinstance(value, tuple) and len(value) > 0
else "..."
)
file_info.append(f"{key}='{filename}'")
elif isinstance(request_data.files, dict):
file_count = len(request_data.files)
file_info = list(request_data.files.keys())
sanitized_body["[MULTIPART_FILES]"] = f"Count: {file_count} | {file_info}"
request_body_str = dump_json_safely(
sanitized_body, ensure_ascii=False, indent=2
)
logger.debug(f"📦 请求体: {request_body_str}")
start_time = time.monotonic()
try:
http_response = await self.http_client.post(
request_data.url,
headers=request_data.headers,
content=dump_json_safely(request_data.body, ensure_ascii=False)
if not request_data.files
else None,
data=request_data.body if request_data.files else None,
files=request_data.files,
timeout=context.timeout,
)
logger.debug(f"📥 响应状态码: {http_response.status_code}")
if exception := self.adapter.handle_http_error(http_response):
error_text = http_response.content.decode("utf-8", errors="ignore")
logger.debug(f"💥 完整错误响应: {error_text}")
await self.key_store.record_failure(
api_key, http_response.status_code, error_text
)
raise exception
response_bytes = await http_response.aread()
logger.debug(f"📦 响应体已完整读取 ({len(response_bytes)} bytes)")
response_json = json.loads(response_bytes)
sanitizer_resp_context = sanitizer_req_context.replace(
"_request", "_response"
)
if sanitizer_resp_context == sanitizer_req_context:
sanitizer_resp_context = f"{sanitizer_req_context}_response"
sanitized_response = sanitize_for_logging(
response_json, context=sanitizer_resp_context
)
response_json_str = json.dumps(
sanitized_response, ensure_ascii=False, indent=2
)
logger.debug(f"📋 响应JSON: {response_json_str}")
if context.request_type == "embedding":
self.adapter.validate_embedding_response(response_json)
embeddings = self.adapter.parse_embedding_response(response_json)
latency = (time.monotonic() - start_time) * 1000
await self.key_store.record_success(api_key, latency)
return LLMResponse(
text="",
raw_response=response_json,
cache_info={"embeddings": embeddings},
)
response_data = self.adapter.parse_response(
self.model, response_json, is_advanced=True
)
should_rescue_image = (
gen_config
and gen_config.validation_policy
and gen_config.validation_policy.get("require_image")
)
if (
should_rescue_image
and not response_data.images
and response_data.text
and gen_config
):
markdown_matches = re.findall(
r"(!?\[.*?\]\((https?://[^\)]+)\))", response_data.text
)
if markdown_matches:
logger.info(
f"检测到 {len(markdown_matches)} "
"个资源链接,尝试自动下载并清洗。"
)
if response_data.images is None:
response_data.images = []
downloaded_urls = set()
for full_tag, url in markdown_matches:
try:
if url not in downloaded_urls:
content = await AsyncHttpx.get_content(url)
response_data.images.append(process_image_data(content))
downloaded_urls.add(url)
response_data.text = response_data.text.replace(
full_tag, ""
)
except Exception as exc:
logger.warning(
f"自动下载生成的图片失败: {url}, 错误: {exc}"
)
response_data.text = response_data.text.strip()
latency = (time.monotonic() - start_time) * 1000
await self.key_store.record_success(api_key, latency)
response_tool_calls: list[LLMToolCall] = []
if response_data.tool_calls:
for tc_data in response_data.tool_calls:
if isinstance(tc_data, LLMToolCall):
response_tool_calls.append(tc_data)
elif isinstance(tc_data, dict):
try:
response_tool_calls.append(LLMToolCall(**tc_data))
except Exception:
pass
final_response = LLMResponse(
text=response_data.text,
content_parts=response_data.content_parts,
usage_info=response_data.usage_info,
images=response_data.images,
raw_response=response_data.raw_response,
tool_calls=response_tool_calls if response_tool_calls else None,
code_executions=response_data.code_executions,
grounding_metadata=response_data.grounding_metadata,
cache_info=response_data.cache_info,
thought_text=response_data.thought_text,
thought_signature=response_data.thought_signature,
)
if context.request_type == "generation" and gen_config:
if gen_config.response_validator:
try:
gen_config.response_validator(final_response)
except Exception as exc:
raise LLMException(
f"响应内容未通过自定义验证器: {exc}",
code=LLMErrorCode.API_RESPONSE_INVALID,
details={"validator_error": str(exc)},
cause=exc,
) from exc
policy = gen_config.validation_policy
if policy:
effective_type = self.model._get_effective_api_type()
if policy.get("require_image") and not final_response.images:
if effective_type == "gemini" and response_data.raw_response:
usage_metadata = response_data.raw_response.get(
"usageMetadata", {}
)
prompt_token_details = usage_metadata.get(
"promptTokensDetails", []
)
prompt_had_image = any(
detail.get("modality") == "IMAGE"
for detail in prompt_token_details
)
if prompt_had_image:
raise LLMException(
"响应验证失败:模型接收了图片输入但未生成图片。",
code=LLMErrorCode.API_RESPONSE_INVALID,
details={
"policy": policy,
"text_response": final_response.text,
"raw_response": response_data.raw_response,
},
)
else:
logger.debug(
"Gemini提示词中未包含图片跳过图片要求重试。"
)
else:
raise LLMException(
"响应验证失败:要求返回图片但未找到图片数据。",
code=LLMErrorCode.API_RESPONSE_INVALID,
details={
"policy": policy,
"text_response": final_response.text,
},
)
return final_response
except Exception as e:
if isinstance(e, LLMException):
raise e
logger.error(f"解析响应失败或发生未知错误: {e}")
if not isinstance(e, httpx.NetworkError | httpx.TimeoutException):
await self.key_store.record_failure(api_key, None, str(e))
raise LLMException(
f"网络请求异常: {type(e).__name__} - {e}",
code=LLMErrorCode.API_REQUEST_FAILED,
details={"api_key": masked_key},
cause=e,
)