2025-06-21 16:33:21 +08:00
|
|
|
|
"""
|
|
|
|
|
|
LLM 模型实现类
|
|
|
|
|
|
|
|
|
|
|
|
包含 LLM 模型的抽象基类和具体实现,负责与各种 AI 提供商的 API 交互。
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from abc import ABC, abstractmethod
|
2025-12-07 18:57:55 +08:00
|
|
|
|
import asyncio
|
2025-06-21 16:33:21 +08:00
|
|
|
|
from collections.abc import Awaitable, Callable
|
|
|
|
|
|
import json
|
2025-12-07 18:57:55 +08:00
|
|
|
|
import re
|
|
|
|
|
|
import time
|
|
|
|
|
|
from typing import Any, Literal, TypeVar, cast
|
2025-08-04 23:36:12 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
import httpx
|
|
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
|
|
|
|
|
from zhenxun.services.log import logger
|
2025-12-07 18:57:55 +08:00
|
|
|
|
from zhenxun.utils.http_utils import AsyncHttpx
|
2025-10-01 18:41:46 +08:00
|
|
|
|
from zhenxun.utils.log_sanitizer import sanitize_for_logging
|
2025-10-09 08:50:40 +08:00
|
|
|
|
from zhenxun.utils.pydantic_compat import dump_json_safely
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
from .adapters.base import BaseAdapter, RequestData, process_image_data
|
2025-06-21 16:33:21 +08:00
|
|
|
|
from .config import LLMGenerationConfig
|
2025-12-07 18:57:55 +08:00
|
|
|
|
from .config.generation import LLMEmbeddingConfig
|
|
|
|
|
|
from .config.providers import get_llm_config
|
2025-06-21 16:33:21 +08:00
|
|
|
|
from .core import (
|
|
|
|
|
|
KeyStatusStore,
|
|
|
|
|
|
LLMHttpClient,
|
|
|
|
|
|
RetryConfig,
|
2025-12-07 18:57:55 +08:00
|
|
|
|
_should_retry_llm_error,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
http_client_manager,
|
|
|
|
|
|
)
|
|
|
|
|
|
from .types import (
|
|
|
|
|
|
LLMErrorCode,
|
|
|
|
|
|
LLMException,
|
|
|
|
|
|
LLMMessage,
|
|
|
|
|
|
LLMResponse,
|
2025-12-07 18:57:55 +08:00
|
|
|
|
LLMToolCall,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
ModelDetail,
|
|
|
|
|
|
ProviderConfig,
|
2025-12-07 18:57:55 +08:00
|
|
|
|
ToolChoice,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
from .types.capabilities import ModelCapabilities, ModelModality
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-08-04 23:36:12 +08:00
|
|
|
|
T = TypeVar("T", bound=BaseModel)
|
|
|
|
|
|
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
class LLMContext(BaseModel):
|
|
|
|
|
|
"""LLM 执行上下文,用于在中间件管道中传递请求状态"""
|
|
|
|
|
|
|
|
|
|
|
|
messages: list[LLMMessage]
|
|
|
|
|
|
config: LLMGenerationConfig | LLMEmbeddingConfig
|
|
|
|
|
|
tools: list[Any] | None
|
|
|
|
|
|
tool_choice: str | dict[str, Any] | ToolChoice | None
|
|
|
|
|
|
timeout: float | None
|
|
|
|
|
|
extra: dict[str, Any] = Field(default_factory=dict)
|
|
|
|
|
|
request_type: Literal["generation", "embedding"] = "generation"
|
|
|
|
|
|
runtime_state: dict[str, Any] = Field(
|
|
|
|
|
|
default_factory=dict,
|
|
|
|
|
|
description="中间件运行时的临时状态存储(api_key, retry_count等)",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
NextCall = Callable[[LLMContext], Awaitable[LLMResponse]]
|
|
|
|
|
|
LLMMiddleware = Callable[[LLMContext, NextCall], Awaitable[LLMResponse]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BaseLLMMiddleware(ABC):
|
|
|
|
|
|
"""LLM 中间件抽象基类"""
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
|
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
|
|
|
|
|
|
"""
|
|
|
|
|
|
执行中间件逻辑
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
context: 请求上下文,包含配置和运行时状态
|
|
|
|
|
|
next_call: 调用链中的下一个处理函数
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
LLMResponse: 模型响应结果
|
|
|
|
|
|
"""
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-06-21 16:33:21 +08:00
|
|
|
|
class LLMModelBase(ABC):
|
|
|
|
|
|
"""LLM模型抽象基类"""
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
|
async def generate_response(
|
|
|
|
|
|
self,
|
|
|
|
|
|
messages: list[LLMMessage],
|
|
|
|
|
|
config: LLMGenerationConfig | None = None,
|
2025-12-07 18:57:55 +08:00
|
|
|
|
tools: list[Any] | None = None,
|
|
|
|
|
|
tool_choice: str | dict[str, Any] | ToolChoice | None = None,
|
|
|
|
|
|
timeout: float | None = None,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
) -> LLMResponse:
|
2025-07-14 22:39:17 +08:00
|
|
|
|
"""生成高级响应"""
|
2025-06-21 16:33:21 +08:00
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
|
async def generate_embeddings(
|
|
|
|
|
|
self,
|
|
|
|
|
|
texts: list[str],
|
2025-12-07 18:57:55 +08:00
|
|
|
|
config: LLMEmbeddingConfig,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
) -> list[list[float]]:
|
2025-07-14 22:39:17 +08:00
|
|
|
|
"""生成文本嵌入向量"""
|
2025-06-21 16:33:21 +08:00
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LLMModel(LLMModelBase):
|
|
|
|
|
|
"""LLM 模型实现类"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
|
self,
|
|
|
|
|
|
provider_config: ProviderConfig,
|
|
|
|
|
|
model_detail: ModelDetail,
|
|
|
|
|
|
key_store: KeyStatusStore,
|
|
|
|
|
|
http_client: LLMHttpClient,
|
2025-07-08 11:15:15 +08:00
|
|
|
|
capabilities: ModelCapabilities,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
config_override: LLMGenerationConfig | None = None,
|
|
|
|
|
|
):
|
|
|
|
|
|
self.provider_config = provider_config
|
|
|
|
|
|
self.model_detail = model_detail
|
|
|
|
|
|
self.key_store = key_store
|
|
|
|
|
|
self.http_client: LLMHttpClient = http_client
|
2025-07-08 11:15:15 +08:00
|
|
|
|
self.capabilities = capabilities
|
2025-06-21 16:33:21 +08:00
|
|
|
|
self._generation_config = config_override
|
|
|
|
|
|
|
|
|
|
|
|
self.provider_name = provider_config.name
|
|
|
|
|
|
self.api_type = provider_config.api_type
|
|
|
|
|
|
self.api_base = provider_config.api_base
|
|
|
|
|
|
self.api_keys = (
|
|
|
|
|
|
[provider_config.api_key]
|
|
|
|
|
|
if isinstance(provider_config.api_key, str)
|
|
|
|
|
|
else provider_config.api_key
|
|
|
|
|
|
)
|
|
|
|
|
|
self.model_name = model_detail.model_name
|
|
|
|
|
|
self.temperature = model_detail.temperature
|
|
|
|
|
|
self.max_tokens = model_detail.max_tokens
|
|
|
|
|
|
|
|
|
|
|
|
self._is_closed = False
|
2025-12-07 18:57:55 +08:00
|
|
|
|
self._ref_count = 0
|
|
|
|
|
|
self._middlewares: list[LLMMiddleware] = []
|
|
|
|
|
|
|
|
|
|
|
|
def _has_modality(self, modality: ModelModality, is_input: bool = True) -> bool:
|
|
|
|
|
|
target_set = (
|
|
|
|
|
|
self.capabilities.input_modalities
|
|
|
|
|
|
if is_input
|
|
|
|
|
|
else self.capabilities.output_modalities
|
|
|
|
|
|
)
|
|
|
|
|
|
return modality in target_set
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
@property
|
2025-07-08 11:15:15 +08:00
|
|
|
|
def can_process_images(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持图片作为输入。"""
|
2025-12-07 18:57:55 +08:00
|
|
|
|
return self._has_modality(ModelModality.IMAGE)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
@property
|
2025-07-08 11:15:15 +08:00
|
|
|
|
def can_process_video(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持视频作为输入。"""
|
2025-12-07 18:57:55 +08:00
|
|
|
|
return self._has_modality(ModelModality.VIDEO)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
@property
|
2025-07-08 11:15:15 +08:00
|
|
|
|
def can_process_audio(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持音频作为输入。"""
|
2025-12-07 18:57:55 +08:00
|
|
|
|
return self._has_modality(ModelModality.AUDIO)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
@property
|
2025-07-08 11:15:15 +08:00
|
|
|
|
def can_generate_images(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持生成图片。"""
|
2025-12-07 18:57:55 +08:00
|
|
|
|
return self._has_modality(ModelModality.IMAGE, is_input=False)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
@property
|
2025-07-08 11:15:15 +08:00
|
|
|
|
def can_generate_audio(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持生成音频 (TTS)。"""
|
2025-12-07 18:57:55 +08:00
|
|
|
|
return self._has_modality(ModelModality.AUDIO, is_input=False)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
@property
|
2025-07-08 11:15:15 +08:00
|
|
|
|
def is_embedding_model(self) -> bool:
|
|
|
|
|
|
"""检查这是否是一个嵌入模型。"""
|
|
|
|
|
|
return self.capabilities.is_embedding_model
|
|
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
def add_middleware(self, middleware: LLMMiddleware) -> None:
|
|
|
|
|
|
"""注册一个中间件到处理管道的最外层"""
|
|
|
|
|
|
self._middlewares.append(middleware)
|
|
|
|
|
|
|
|
|
|
|
|
def _build_pipeline(self) -> NextCall:
|
|
|
|
|
|
"""
|
|
|
|
|
|
构建完整的中间件调用链。顺序为:
|
|
|
|
|
|
用户自定义中间件 -> Retry -> Logging -> KeySelection -> Network (终结者)
|
|
|
|
|
|
"""
|
|
|
|
|
|
from .adapters import get_adapter_for_api_type
|
|
|
|
|
|
|
|
|
|
|
|
client_settings = get_llm_config().client_settings
|
|
|
|
|
|
retry_config = RetryConfig(
|
|
|
|
|
|
max_retries=client_settings.max_retries,
|
|
|
|
|
|
retry_delay=client_settings.retry_delay,
|
|
|
|
|
|
)
|
|
|
|
|
|
adapter = get_adapter_for_api_type(self.api_type)
|
|
|
|
|
|
|
|
|
|
|
|
network_middleware = NetworkRequestMiddleware(self, adapter)
|
|
|
|
|
|
|
|
|
|
|
|
async def terminal_handler(ctx: LLMContext) -> LLMResponse:
|
|
|
|
|
|
async def _noop(_: LLMContext) -> LLMResponse:
|
|
|
|
|
|
raise RuntimeError("NetworkRequestMiddleware 不应调用 next_call")
|
|
|
|
|
|
|
|
|
|
|
|
return await network_middleware(ctx, _noop)
|
|
|
|
|
|
|
|
|
|
|
|
def _wrap(middleware: LLMMiddleware, next_call: NextCall) -> NextCall:
|
|
|
|
|
|
async def _handler(inner_ctx: LLMContext) -> LLMResponse:
|
|
|
|
|
|
return await middleware(inner_ctx, next_call)
|
|
|
|
|
|
|
|
|
|
|
|
return _handler
|
|
|
|
|
|
|
|
|
|
|
|
handler: NextCall = terminal_handler
|
|
|
|
|
|
handler = _wrap(
|
|
|
|
|
|
KeySelectionMiddleware(self.key_store, self.provider_name, self.api_keys),
|
|
|
|
|
|
handler,
|
|
|
|
|
|
)
|
|
|
|
|
|
handler = _wrap(
|
|
|
|
|
|
LoggingMiddleware(self.provider_name, self.model_name),
|
|
|
|
|
|
handler,
|
|
|
|
|
|
)
|
|
|
|
|
|
handler = _wrap(
|
|
|
|
|
|
RetryMiddleware(retry_config, self.key_store),
|
|
|
|
|
|
handler,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
for middleware in reversed(self._middlewares):
|
|
|
|
|
|
handler = _wrap(middleware, handler)
|
|
|
|
|
|
|
|
|
|
|
|
return handler
|
|
|
|
|
|
|
|
|
|
|
|
def _get_effective_api_type(self) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
获取实际生效的 API 类型。
|
|
|
|
|
|
主要用于 Smart 模式下,判断日志净化应该使用哪种格式。
|
|
|
|
|
|
"""
|
|
|
|
|
|
if self.api_type != "smart":
|
|
|
|
|
|
return self.api_type
|
|
|
|
|
|
|
|
|
|
|
|
if self.model_detail.api_type:
|
|
|
|
|
|
return self.model_detail.api_type
|
|
|
|
|
|
if (
|
|
|
|
|
|
"gemini" in self.model_name.lower()
|
|
|
|
|
|
and "openai" not in self.model_name.lower()
|
|
|
|
|
|
):
|
|
|
|
|
|
return "gemini"
|
|
|
|
|
|
return "openai"
|
|
|
|
|
|
|
2025-06-21 16:33:21 +08:00
|
|
|
|
async def _get_http_client(self) -> LLMHttpClient:
|
|
|
|
|
|
"""获取HTTP客户端"""
|
|
|
|
|
|
if self.http_client.is_closed:
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
f"LLMModel {self.provider_name}/{self.model_name} 的 HTTP 客户端已关闭,"
|
|
|
|
|
|
"正在获取新的客户端"
|
|
|
|
|
|
)
|
|
|
|
|
|
self.http_client = await http_client_manager.get_client(
|
|
|
|
|
|
self.provider_config
|
|
|
|
|
|
)
|
|
|
|
|
|
return self.http_client
|
|
|
|
|
|
|
|
|
|
|
|
async def _select_api_key(self, failed_keys: set[str] | None = None) -> str:
|
|
|
|
|
|
"""选择可用的API密钥(使用轮询策略)"""
|
|
|
|
|
|
if not self.api_keys:
|
|
|
|
|
|
raise LLMException(
|
|
|
|
|
|
f"提供商 {self.provider_name} 没有配置API密钥",
|
|
|
|
|
|
code=LLMErrorCode.NO_AVAILABLE_KEYS,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
selected_key = await self.key_store.get_next_available_key(
|
|
|
|
|
|
self.provider_name, self.api_keys, failed_keys
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not selected_key:
|
|
|
|
|
|
raise LLMException(
|
|
|
|
|
|
f"提供商 {self.provider_name} 的所有API密钥当前都不可用",
|
|
|
|
|
|
code=LLMErrorCode.NO_AVAILABLE_KEYS,
|
|
|
|
|
|
details={
|
|
|
|
|
|
"total_keys": len(self.api_keys),
|
|
|
|
|
|
"failed_keys": len(failed_keys or set()),
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return selected_key
|
|
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
async def close(self):
|
|
|
|
|
|
"""标记模型实例的当前使用周期结束"""
|
|
|
|
|
|
if self._is_closed:
|
|
|
|
|
|
return
|
|
|
|
|
|
self._is_closed = True
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
f"LLMModel实例的使用周期已结束: {self} (共享HTTP客户端状态不受影响)"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def __aenter__(self):
|
|
|
|
|
|
if self._is_closed:
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
f"Re-entering context for closed LLMModel {self}. "
|
|
|
|
|
|
f"Resetting _is_closed to False."
|
|
|
|
|
|
)
|
|
|
|
|
|
self._is_closed = False
|
|
|
|
|
|
self._check_not_closed()
|
|
|
|
|
|
self._ref_count += 1
|
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
|
|
|
|
"""异步上下文管理器出口"""
|
|
|
|
|
|
_ = exc_type, exc_val, exc_tb
|
|
|
|
|
|
self._ref_count -= 1
|
|
|
|
|
|
if self._ref_count <= 0:
|
|
|
|
|
|
self._ref_count = 0
|
|
|
|
|
|
await self.close()
|
|
|
|
|
|
|
|
|
|
|
|
def _check_not_closed(self):
|
|
|
|
|
|
"""检查实例是否已关闭"""
|
|
|
|
|
|
if self._is_closed:
|
|
|
|
|
|
raise RuntimeError(f"LLMModel实例已关闭: {self}")
|
|
|
|
|
|
|
|
|
|
|
|
async def _execute_core_generation(self, context: LLMContext) -> LLMResponse:
|
|
|
|
|
|
"""
|
|
|
|
|
|
[内核] 执行核心生成逻辑:构建管道并执行。
|
|
|
|
|
|
此方法作为中间件管道的终点被调用。
|
|
|
|
|
|
"""
|
|
|
|
|
|
pipeline_handler = self._build_pipeline()
|
|
|
|
|
|
return await pipeline_handler(context)
|
|
|
|
|
|
|
|
|
|
|
|
async def generate_response(
|
2025-06-21 16:33:21 +08:00
|
|
|
|
self,
|
2025-12-07 18:57:55 +08:00
|
|
|
|
messages: list[LLMMessage],
|
|
|
|
|
|
config: LLMGenerationConfig | None = None,
|
|
|
|
|
|
tools: list[Any] | None = None,
|
|
|
|
|
|
tool_choice: str | dict[str, Any] | ToolChoice | None = None,
|
|
|
|
|
|
timeout: float | None = None,
|
|
|
|
|
|
) -> LLMResponse:
|
|
|
|
|
|
"""
|
|
|
|
|
|
生成高级响应 (支持中间件管道)。
|
|
|
|
|
|
"""
|
|
|
|
|
|
self._check_not_closed()
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
if self._generation_config and config:
|
|
|
|
|
|
final_request_config = self._generation_config.merge_with(config)
|
|
|
|
|
|
elif config:
|
|
|
|
|
|
final_request_config = config
|
|
|
|
|
|
else:
|
|
|
|
|
|
final_request_config = self._generation_config or LLMGenerationConfig()
|
|
|
|
|
|
|
|
|
|
|
|
normalized_tools: list[Any] | None = None
|
|
|
|
|
|
if tools:
|
|
|
|
|
|
if isinstance(tools, dict):
|
|
|
|
|
|
normalized_tools = list(tools.values())
|
|
|
|
|
|
elif isinstance(tools, list):
|
|
|
|
|
|
normalized_tools = tools
|
|
|
|
|
|
else:
|
|
|
|
|
|
normalized_tools = [tools]
|
|
|
|
|
|
|
|
|
|
|
|
context = LLMContext(
|
|
|
|
|
|
messages=messages,
|
|
|
|
|
|
config=final_request_config,
|
|
|
|
|
|
tools=normalized_tools,
|
|
|
|
|
|
tool_choice=tool_choice,
|
|
|
|
|
|
timeout=timeout,
|
|
|
|
|
|
)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
return await self._execute_core_generation(context)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
async def generate_embeddings(
|
|
|
|
|
|
self,
|
|
|
|
|
|
texts: list[str],
|
|
|
|
|
|
config: LLMEmbeddingConfig | None = None,
|
|
|
|
|
|
) -> list[list[float]]:
|
|
|
|
|
|
"""生成文本嵌入向量"""
|
|
|
|
|
|
self._check_not_closed()
|
|
|
|
|
|
if not texts:
|
|
|
|
|
|
return []
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
final_config = config or LLMEmbeddingConfig()
|
|
|
|
|
|
|
|
|
|
|
|
context = LLMContext(
|
|
|
|
|
|
messages=[],
|
|
|
|
|
|
config=final_config,
|
|
|
|
|
|
tools=None,
|
|
|
|
|
|
tool_choice=None,
|
|
|
|
|
|
timeout=None,
|
|
|
|
|
|
request_type="embedding",
|
|
|
|
|
|
extra={"texts": texts},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
pipeline = self._build_pipeline()
|
|
|
|
|
|
response = await pipeline(context)
|
|
|
|
|
|
embeddings = (
|
|
|
|
|
|
response.cache_info.get("embeddings") if response.cache_info else None
|
|
|
|
|
|
)
|
|
|
|
|
|
if embeddings is None:
|
|
|
|
|
|
raise LLMException(
|
|
|
|
|
|
"嵌入请求未返回 embeddings 数据",
|
|
|
|
|
|
code=LLMErrorCode.EMBEDDING_FAILED,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
2025-12-07 18:57:55 +08:00
|
|
|
|
return embeddings
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
def __str__(self) -> str:
|
|
|
|
|
|
status = "closed" if self._is_closed else "active"
|
|
|
|
|
|
return f"LLMModel({self.provider_name}/{self.model_name}, {status})"
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
|
|
status = "closed" if self._is_closed else "active"
|
|
|
|
|
|
return (
|
|
|
|
|
|
f"LLMModel(provider={self.provider_name}, model={self.model_name}, "
|
|
|
|
|
|
f"api_type={self.api_type}, status={status})"
|
|
|
|
|
|
)
|
2025-10-01 18:41:46 +08:00
|
|
|
|
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
class RetryMiddleware(BaseLLMMiddleware):
|
|
|
|
|
|
"""
|
|
|
|
|
|
重试中间件:处理异常捕获与重试循环
|
|
|
|
|
|
"""
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
def __init__(self, retry_config: RetryConfig, key_store: KeyStatusStore):
|
|
|
|
|
|
self.retry_config = retry_config
|
|
|
|
|
|
self.key_store = key_store
|
|
|
|
|
|
|
|
|
|
|
|
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
|
|
|
|
|
|
last_exception: Exception | None = None
|
|
|
|
|
|
total_attempts = self.retry_config.max_retries + 1
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
for attempt in range(total_attempts):
|
2025-06-21 16:33:21 +08:00
|
|
|
|
try:
|
2025-12-07 18:57:55 +08:00
|
|
|
|
context.runtime_state["attempt"] = attempt + 1
|
|
|
|
|
|
return await next_call(context)
|
2025-10-01 18:41:46 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
except LLMException as e:
|
|
|
|
|
|
last_exception = e
|
|
|
|
|
|
api_key = context.runtime_state.get("api_key")
|
2025-10-01 18:41:46 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
if api_key:
|
|
|
|
|
|
status_code = e.details.get("status_code")
|
|
|
|
|
|
error_msg = f"({e.code.name}) {e.message}"
|
|
|
|
|
|
await self.key_store.record_failure(api_key, status_code, error_msg)
|
|
|
|
|
|
|
|
|
|
|
|
if not _should_retry_llm_error(
|
|
|
|
|
|
e, attempt, self.retry_config.max_retries
|
|
|
|
|
|
):
|
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
|
|
if attempt == total_attempts - 1:
|
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
|
|
wait_time = self.retry_config.retry_delay
|
|
|
|
|
|
if self.retry_config.exponential_backoff:
|
|
|
|
|
|
wait_time *= 2**attempt
|
2025-10-01 18:41:46 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
logger.warning(
|
|
|
|
|
|
f"请求失败,{wait_time:.2f}秒后重试"
|
|
|
|
|
|
f" (第{attempt + 1}/{self.retry_config.max_retries}次重试): {e}"
|
2025-07-08 11:15:15 +08:00
|
|
|
|
)
|
2025-12-07 18:57:55 +08:00
|
|
|
|
await asyncio.sleep(wait_time)
|
|
|
|
|
|
|
2025-06-21 16:33:21 +08:00
|
|
|
|
except Exception as e:
|
2025-12-07 18:57:55 +08:00
|
|
|
|
logger.error(f"非预期异常,停止重试: {e}", e=e)
|
|
|
|
|
|
raise e
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
if last_exception:
|
|
|
|
|
|
raise last_exception
|
|
|
|
|
|
raise LLMException("重试循环异常结束")
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
|
|
|
|
|
|
class KeySelectionMiddleware(BaseLLMMiddleware):
|
|
|
|
|
|
"""
|
|
|
|
|
|
密钥选择中间件:负责轮询获取可用 API Key
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
|
self, key_store: KeyStatusStore, provider_name: str, api_keys: list[str]
|
|
|
|
|
|
):
|
|
|
|
|
|
self.key_store = key_store
|
|
|
|
|
|
self.provider_name = provider_name
|
|
|
|
|
|
self.api_keys = api_keys
|
|
|
|
|
|
self._failed_keys: set[str] = set()
|
|
|
|
|
|
|
|
|
|
|
|
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
|
|
|
|
|
|
selected_key = await self.key_store.get_next_available_key(
|
|
|
|
|
|
self.provider_name, self.api_keys, exclude_keys=self._failed_keys
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not selected_key:
|
2025-06-21 16:33:21 +08:00
|
|
|
|
raise LLMException(
|
2025-12-07 18:57:55 +08:00
|
|
|
|
f"提供商 {self.provider_name} 无可用 API Key",
|
|
|
|
|
|
code=LLMErrorCode.NO_AVAILABLE_KEYS,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
context.runtime_state["api_key"] = selected_key
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
response = await next_call(context)
|
|
|
|
|
|
return response
|
|
|
|
|
|
except LLMException as e:
|
|
|
|
|
|
self._failed_keys.add(selected_key)
|
|
|
|
|
|
masked = f"{selected_key[:8]}..."
|
|
|
|
|
|
if isinstance(e.details, dict):
|
|
|
|
|
|
e.details["api_key"] = masked
|
|
|
|
|
|
raise e
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
|
|
|
|
|
|
class LoggingMiddleware(BaseLLMMiddleware):
|
|
|
|
|
|
"""
|
|
|
|
|
|
日志中间件:负责请求和响应的日志记录与脱敏
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
|
self, provider_name: str, model_name: str, log_context: str = "Generation"
|
|
|
|
|
|
):
|
|
|
|
|
|
self.provider_name = provider_name
|
|
|
|
|
|
self.model_name = model_name
|
|
|
|
|
|
self.log_context = log_context
|
|
|
|
|
|
|
|
|
|
|
|
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
|
|
|
|
|
|
attempt = context.runtime_state.get("attempt", 1)
|
|
|
|
|
|
api_key = context.runtime_state.get("api_key", "unknown")
|
|
|
|
|
|
masked_key = f"{api_key[:8]}..."
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
f"🌐 发起LLM请求 (尝试 {attempt}) - {self.provider_name}/{self.model_name} "
|
|
|
|
|
|
f"[{self.log_context}] Key: {masked_key}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
start_time = time.monotonic()
|
|
|
|
|
|
response = await next_call(context)
|
|
|
|
|
|
duration = (time.monotonic() - start_time) * 1000
|
|
|
|
|
|
logger.info(f"🎯 LLM响应成功 [{self.log_context}] 耗时: {duration:.2f}ms")
|
|
|
|
|
|
return response
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"❌ 请求异常 [{self.log_context}]: {type(e).__name__} - {e}")
|
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class NetworkRequestMiddleware(BaseLLMMiddleware):
|
|
|
|
|
|
"""
|
|
|
|
|
|
网络请求中间件:执行 Adapter 转换和 HTTP 请求
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, model_instance: "LLMModel", adapter: "BaseAdapter"):
|
|
|
|
|
|
self.model = model_instance
|
|
|
|
|
|
self.http_client = model_instance.http_client
|
|
|
|
|
|
self.adapter = adapter
|
|
|
|
|
|
self.key_store = model_instance.key_store
|
|
|
|
|
|
|
|
|
|
|
|
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
|
|
|
|
|
|
api_key = context.runtime_state["api_key"]
|
|
|
|
|
|
|
|
|
|
|
|
request_data: RequestData
|
|
|
|
|
|
gen_config: LLMGenerationConfig | None = None
|
|
|
|
|
|
embed_config: LLMEmbeddingConfig | None = None
|
|
|
|
|
|
|
|
|
|
|
|
if context.request_type == "embedding":
|
|
|
|
|
|
embed_config = cast(LLMEmbeddingConfig, context.config)
|
|
|
|
|
|
texts = (context.extra or {}).get("texts", [])
|
|
|
|
|
|
request_data = self.adapter.prepare_embedding_request(
|
|
|
|
|
|
model=self.model,
|
2025-07-08 11:15:15 +08:00
|
|
|
|
api_key=api_key,
|
|
|
|
|
|
texts=texts,
|
2025-12-07 18:57:55 +08:00
|
|
|
|
config=embed_config,
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
gen_config = cast(LLMGenerationConfig, context.config)
|
|
|
|
|
|
request_data = await self.adapter.prepare_advanced_request(
|
|
|
|
|
|
model=self.model,
|
|
|
|
|
|
api_key=api_key,
|
|
|
|
|
|
messages=context.messages,
|
|
|
|
|
|
config=gen_config,
|
|
|
|
|
|
tools=context.tools,
|
|
|
|
|
|
tool_choice=context.tool_choice,
|
2025-07-08 11:15:15 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
masked_key = (
|
|
|
|
|
|
f"{api_key[:8]}...{api_key[-4:] if len(api_key) > 12 else '***'}"
|
|
|
|
|
|
if api_key
|
|
|
|
|
|
else "N/A"
|
|
|
|
|
|
)
|
|
|
|
|
|
logger.debug(f"🔑 API密钥: {masked_key}")
|
|
|
|
|
|
logger.debug(f"📡 请求URL: {request_data.url}")
|
|
|
|
|
|
logger.debug(f"📋 请求头: {dict(request_data.headers)}")
|
|
|
|
|
|
|
|
|
|
|
|
if self.model.api_type == "smart":
|
|
|
|
|
|
effective_type = self.model._get_effective_api_type()
|
|
|
|
|
|
sanitizer_req_context = f"{effective_type}_request"
|
|
|
|
|
|
else:
|
|
|
|
|
|
sanitizer_req_context = self.adapter.log_sanitization_context
|
|
|
|
|
|
sanitized_body = sanitize_for_logging(
|
|
|
|
|
|
request_data.body, context=sanitizer_req_context
|
2025-07-08 11:15:15 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
if request_data.files and isinstance(sanitized_body, dict):
|
|
|
|
|
|
file_info: list[str] = []
|
|
|
|
|
|
file_count = 0
|
|
|
|
|
|
if isinstance(request_data.files, list):
|
|
|
|
|
|
file_count = len(request_data.files)
|
|
|
|
|
|
for key, value in request_data.files:
|
|
|
|
|
|
filename = (
|
|
|
|
|
|
value[0]
|
|
|
|
|
|
if isinstance(value, tuple) and len(value) > 0
|
|
|
|
|
|
else "..."
|
|
|
|
|
|
)
|
|
|
|
|
|
file_info.append(f"{key}='{filename}'")
|
|
|
|
|
|
elif isinstance(request_data.files, dict):
|
|
|
|
|
|
file_count = len(request_data.files)
|
|
|
|
|
|
file_info = list(request_data.files.keys())
|
|
|
|
|
|
|
|
|
|
|
|
sanitized_body["[MULTIPART_FILES]"] = f"Count: {file_count} | {file_info}"
|
|
|
|
|
|
|
|
|
|
|
|
request_body_str = dump_json_safely(
|
|
|
|
|
|
sanitized_body, ensure_ascii=False, indent=2
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
2025-12-07 18:57:55 +08:00
|
|
|
|
logger.debug(f"📦 请求体: {request_body_str}")
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
start_time = time.monotonic()
|
|
|
|
|
|
try:
|
|
|
|
|
|
http_response = await self.http_client.post(
|
|
|
|
|
|
request_data.url,
|
|
|
|
|
|
headers=request_data.headers,
|
|
|
|
|
|
content=dump_json_safely(request_data.body, ensure_ascii=False)
|
|
|
|
|
|
if not request_data.files
|
|
|
|
|
|
else None,
|
|
|
|
|
|
data=request_data.body if request_data.files else None,
|
|
|
|
|
|
files=request_data.files,
|
|
|
|
|
|
timeout=context.timeout,
|
|
|
|
|
|
)
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
logger.debug(f"📥 响应状态码: {http_response.status_code}")
|
|
|
|
|
|
|
|
|
|
|
|
if exception := self.adapter.handle_http_error(http_response):
|
|
|
|
|
|
error_text = http_response.content.decode("utf-8", errors="ignore")
|
|
|
|
|
|
logger.debug(f"💥 完整错误响应: {error_text}")
|
|
|
|
|
|
await self.key_store.record_failure(
|
|
|
|
|
|
api_key, http_response.status_code, error_text
|
|
|
|
|
|
)
|
|
|
|
|
|
raise exception
|
|
|
|
|
|
|
|
|
|
|
|
response_bytes = await http_response.aread()
|
|
|
|
|
|
logger.debug(f"📦 响应体已完整读取 ({len(response_bytes)} bytes)")
|
|
|
|
|
|
|
|
|
|
|
|
response_json = json.loads(response_bytes)
|
|
|
|
|
|
|
|
|
|
|
|
sanitizer_resp_context = sanitizer_req_context.replace(
|
|
|
|
|
|
"_request", "_response"
|
|
|
|
|
|
)
|
|
|
|
|
|
if sanitizer_resp_context == sanitizer_req_context:
|
|
|
|
|
|
sanitizer_resp_context = f"{sanitizer_req_context}_response"
|
|
|
|
|
|
|
|
|
|
|
|
sanitized_response = sanitize_for_logging(
|
|
|
|
|
|
response_json, context=sanitizer_resp_context
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
2025-12-07 18:57:55 +08:00
|
|
|
|
response_json_str = json.dumps(
|
|
|
|
|
|
sanitized_response, ensure_ascii=False, indent=2
|
|
|
|
|
|
)
|
|
|
|
|
|
logger.debug(f"📋 响应JSON: {response_json_str}")
|
|
|
|
|
|
|
|
|
|
|
|
if context.request_type == "embedding":
|
|
|
|
|
|
self.adapter.validate_embedding_response(response_json)
|
|
|
|
|
|
embeddings = self.adapter.parse_embedding_response(response_json)
|
|
|
|
|
|
latency = (time.monotonic() - start_time) * 1000
|
|
|
|
|
|
await self.key_store.record_success(api_key, latency)
|
|
|
|
|
|
|
|
|
|
|
|
return LLMResponse(
|
|
|
|
|
|
text="",
|
|
|
|
|
|
raw_response=response_json,
|
|
|
|
|
|
cache_info={"embeddings": embeddings},
|
|
|
|
|
|
)
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
response_data = self.adapter.parse_response(
|
|
|
|
|
|
self.model, response_json, is_advanced=True
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
should_rescue_image = (
|
|
|
|
|
|
gen_config
|
|
|
|
|
|
and gen_config.validation_policy
|
|
|
|
|
|
and gen_config.validation_policy.get("require_image")
|
|
|
|
|
|
)
|
|
|
|
|
|
if (
|
|
|
|
|
|
should_rescue_image
|
|
|
|
|
|
and not response_data.images
|
|
|
|
|
|
and response_data.text
|
|
|
|
|
|
and gen_config
|
|
|
|
|
|
):
|
|
|
|
|
|
markdown_matches = re.findall(
|
|
|
|
|
|
r"(!?\[.*?\]\((https?://[^\)]+)\))", response_data.text
|
|
|
|
|
|
)
|
|
|
|
|
|
if markdown_matches:
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
f"检测到 {len(markdown_matches)} "
|
|
|
|
|
|
"个资源链接,尝试自动下载并清洗。"
|
|
|
|
|
|
)
|
|
|
|
|
|
if response_data.images is None:
|
|
|
|
|
|
response_data.images = []
|
|
|
|
|
|
|
|
|
|
|
|
downloaded_urls = set()
|
|
|
|
|
|
for full_tag, url in markdown_matches:
|
|
|
|
|
|
try:
|
|
|
|
|
|
if url not in downloaded_urls:
|
|
|
|
|
|
content = await AsyncHttpx.get_content(url)
|
|
|
|
|
|
response_data.images.append(process_image_data(content))
|
|
|
|
|
|
downloaded_urls.add(url)
|
|
|
|
|
|
response_data.text = response_data.text.replace(
|
|
|
|
|
|
full_tag, ""
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger.warning(
|
|
|
|
|
|
f"自动下载生成的图片失败: {url}, 错误: {exc}"
|
|
|
|
|
|
)
|
|
|
|
|
|
response_data.text = response_data.text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
latency = (time.monotonic() - start_time) * 1000
|
|
|
|
|
|
await self.key_store.record_success(api_key, latency)
|
|
|
|
|
|
|
|
|
|
|
|
response_tool_calls: list[LLMToolCall] = []
|
2025-07-08 11:15:15 +08:00
|
|
|
|
if response_data.tool_calls:
|
|
|
|
|
|
for tc_data in response_data.tool_calls:
|
|
|
|
|
|
if isinstance(tc_data, LLMToolCall):
|
|
|
|
|
|
response_tool_calls.append(tc_data)
|
|
|
|
|
|
elif isinstance(tc_data, dict):
|
|
|
|
|
|
try:
|
|
|
|
|
|
response_tool_calls.append(LLMToolCall(**tc_data))
|
2025-12-07 18:57:55 +08:00
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
final_response = LLMResponse(
|
2025-07-08 11:15:15 +08:00
|
|
|
|
text=response_data.text,
|
2025-12-07 18:57:55 +08:00
|
|
|
|
content_parts=response_data.content_parts,
|
2025-07-08 11:15:15 +08:00
|
|
|
|
usage_info=response_data.usage_info,
|
2025-10-09 08:50:40 +08:00
|
|
|
|
images=response_data.images,
|
2025-07-08 11:15:15 +08:00
|
|
|
|
raw_response=response_data.raw_response,
|
|
|
|
|
|
tool_calls=response_tool_calls if response_tool_calls else None,
|
|
|
|
|
|
code_executions=response_data.code_executions,
|
|
|
|
|
|
grounding_metadata=response_data.grounding_metadata,
|
|
|
|
|
|
cache_info=response_data.cache_info,
|
2025-12-07 18:57:55 +08:00
|
|
|
|
thought_text=response_data.thought_text,
|
|
|
|
|
|
thought_signature=response_data.thought_signature,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
if context.request_type == "generation" and gen_config:
|
|
|
|
|
|
if gen_config.response_validator:
|
|
|
|
|
|
try:
|
|
|
|
|
|
gen_config.response_validator(final_response)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
raise LLMException(
|
|
|
|
|
|
f"响应内容未通过自定义验证器: {exc}",
|
|
|
|
|
|
code=LLMErrorCode.API_RESPONSE_INVALID,
|
|
|
|
|
|
details={"validator_error": str(exc)},
|
|
|
|
|
|
cause=exc,
|
|
|
|
|
|
) from exc
|
|
|
|
|
|
|
|
|
|
|
|
policy = gen_config.validation_policy
|
|
|
|
|
|
if policy:
|
|
|
|
|
|
effective_type = self.model._get_effective_api_type()
|
|
|
|
|
|
if policy.get("require_image") and not final_response.images:
|
|
|
|
|
|
if effective_type == "gemini" and response_data.raw_response:
|
|
|
|
|
|
usage_metadata = response_data.raw_response.get(
|
|
|
|
|
|
"usageMetadata", {}
|
|
|
|
|
|
)
|
|
|
|
|
|
prompt_token_details = usage_metadata.get(
|
|
|
|
|
|
"promptTokensDetails", []
|
|
|
|
|
|
)
|
|
|
|
|
|
prompt_had_image = any(
|
|
|
|
|
|
detail.get("modality") == "IMAGE"
|
|
|
|
|
|
for detail in prompt_token_details
|
|
|
|
|
|
)
|
2025-10-01 18:41:46 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
if prompt_had_image:
|
|
|
|
|
|
raise LLMException(
|
|
|
|
|
|
"响应验证失败:模型接收了图片输入但未生成图片。",
|
|
|
|
|
|
code=LLMErrorCode.API_RESPONSE_INVALID,
|
|
|
|
|
|
details={
|
|
|
|
|
|
"policy": policy,
|
|
|
|
|
|
"text_response": final_response.text,
|
|
|
|
|
|
"raw_response": response_data.raw_response,
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
"Gemini提示词中未包含图片,跳过图片要求重试。"
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
2025-10-01 18:41:46 +08:00
|
|
|
|
raise LLMException(
|
2025-12-07 18:57:55 +08:00
|
|
|
|
"响应验证失败:要求返回图片但未找到图片数据。",
|
2025-10-01 18:41:46 +08:00
|
|
|
|
code=LLMErrorCode.API_RESPONSE_INVALID,
|
|
|
|
|
|
details={
|
|
|
|
|
|
"policy": policy,
|
2025-12-07 18:57:55 +08:00
|
|
|
|
"text_response": final_response.text,
|
2025-10-01 18:41:46 +08:00
|
|
|
|
},
|
|
|
|
|
|
)
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
return final_response
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
if isinstance(e, LLMException):
|
|
|
|
|
|
raise e
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
logger.error(f"解析响应失败或发生未知错误: {e}")
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-12-07 18:57:55 +08:00
|
|
|
|
if not isinstance(e, httpx.NetworkError | httpx.TimeoutException):
|
|
|
|
|
|
await self.key_store.record_failure(api_key, None, str(e))
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
|
|
|
|
|
raise LLMException(
|
2025-12-07 18:57:55 +08:00
|
|
|
|
f"网络请求异常: {type(e).__name__} - {e}",
|
|
|
|
|
|
code=LLMErrorCode.API_REQUEST_FAILED,
|
|
|
|
|
|
details={"api_key": masked_key},
|
|
|
|
|
|
cause=e,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|