mirror of
https://github.com/zhenxun-org/zhenxun_bot.git
synced 2025-12-14 21:52:56 +08:00
- 【重构】LLM 服务核心架构:
- 引入中间件管道,统一处理请求生命周期(重试、密钥选择、日志、网络请求)。
- 适配器重构为组件化设计,分离配置映射、消息转换、响应解析和工具序列化逻辑。
- 移除 `with_smart_retry` 装饰器,其功能由中间件接管。
- 移除 `LLMToolExecutor`,工具执行逻辑集成到 `ToolInvoker`。
- 【功能】增强配置系统:
- `LLMGenerationConfig` 采用组件化结构(Core, Reasoning, Visual, Output, Safety, ToolConfig)。
- 新增 `GenConfigBuilder` 提供语义化配置构建方式。
- 新增 `LLMEmbeddingConfig` 用于嵌入专用配置。
- `CommonOverrides` 迁移并更新至新配置结构。
- 【功能】强化工具系统:
- 引入 `ToolInvoker` 实现更灵活的工具执行,支持回调与结构化错误。
- `function_tool` 装饰器支持动态 Pydantic 模型创建和依赖注入 (`ToolParam`, `RunContext`)。
- 平台原生工具支持 (`GeminiCodeExecution`, `GeminiGoogleSearch`, `GeminiUrlContext`)。
- 【功能】高级生成与嵌入:
- `generate_structured` 方法支持 In-Context Validation and Repair (IVR) 循环和 AutoCoT (思维链) 包装。
- 新增 `embed_query` 和 `embed_documents` 便捷嵌入 API。
- `OpenAIImageAdapter` 支持 OpenAI 兼容的图像生成。
- `SmartAdapter` 实现模型名称智能路由。
- 【重构】消息与类型系统:
- `LLMContentPart` 扩展支持更多模态和代码执行相关内容。
- `LLMMessage` 和 `LLMResponse` 结构更新,支持 `content_parts` 和思维链签名。
- 统一 `LLMErrorCode` 和用户友好错误消息,提供更详细的网络/代理错误提示。
- `pyproject.toml` 移除 `bilireq`,新增 `json_repair`。
- 【优化】日志与调试:
- 引入 `DebugLogOptions`,提供细粒度日志脱敏控制。
- 增强日志净化器,处理更多敏感数据和长字符串。
- 【清理】删除废弃模块:
- `zhenxun/services/llm/memory.py`
- `zhenxun/services/llm/executor.py`
- `zhenxun/services/llm/config/presets.py`
- `zhenxun/services/llm/types/content.py`
- `zhenxun/services/llm/types/enums.py`
- `zhenxun/services/llm/tools/__init__.py`
- `zhenxun/services/llm/tools/manager.py`
813 lines
29 KiB
Python
813 lines
29 KiB
Python
"""
|
||
LLM 模型实现类
|
||
|
||
包含 LLM 模型的抽象基类和具体实现,负责与各种 AI 提供商的 API 交互。
|
||
"""
|
||
|
||
from abc import ABC, abstractmethod
|
||
import asyncio
|
||
from collections.abc import Awaitable, Callable
|
||
import json
|
||
import re
|
||
import time
|
||
from typing import Any, Literal, TypeVar, cast
|
||
|
||
import httpx
|
||
from pydantic import BaseModel, ConfigDict, Field
|
||
|
||
from zhenxun.services.log import logger
|
||
from zhenxun.utils.http_utils import AsyncHttpx
|
||
from zhenxun.utils.log_sanitizer import sanitize_for_logging
|
||
from zhenxun.utils.pydantic_compat import dump_json_safely
|
||
|
||
from .adapters.base import BaseAdapter, RequestData, process_image_data
|
||
from .config import LLMGenerationConfig
|
||
from .config.generation import LLMEmbeddingConfig
|
||
from .config.providers import get_llm_config
|
||
from .core import (
|
||
KeyStatusStore,
|
||
LLMHttpClient,
|
||
RetryConfig,
|
||
_should_retry_llm_error,
|
||
http_client_manager,
|
||
)
|
||
from .types import (
|
||
LLMErrorCode,
|
||
LLMException,
|
||
LLMMessage,
|
||
LLMResponse,
|
||
LLMToolCall,
|
||
ModelDetail,
|
||
ProviderConfig,
|
||
ToolChoice,
|
||
)
|
||
from .types.capabilities import ModelCapabilities, ModelModality
|
||
|
||
T = TypeVar("T", bound=BaseModel)
|
||
|
||
|
||
class LLMContext(BaseModel):
|
||
"""LLM 执行上下文,用于在中间件管道中传递请求状态"""
|
||
|
||
messages: list[LLMMessage]
|
||
config: LLMGenerationConfig | LLMEmbeddingConfig
|
||
tools: list[Any] | None
|
||
tool_choice: str | dict[str, Any] | ToolChoice | None
|
||
timeout: float | None
|
||
extra: dict[str, Any] = Field(default_factory=dict)
|
||
request_type: Literal["generation", "embedding"] = "generation"
|
||
runtime_state: dict[str, Any] = Field(
|
||
default_factory=dict,
|
||
description="中间件运行时的临时状态存储(api_key, retry_count等)",
|
||
)
|
||
|
||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||
|
||
|
||
NextCall = Callable[[LLMContext], Awaitable[LLMResponse]]
|
||
LLMMiddleware = Callable[[LLMContext, NextCall], Awaitable[LLMResponse]]
|
||
|
||
|
||
class BaseLLMMiddleware(ABC):
|
||
"""LLM 中间件抽象基类"""
|
||
|
||
@abstractmethod
|
||
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
|
||
"""
|
||
执行中间件逻辑
|
||
|
||
Args:
|
||
context: 请求上下文,包含配置和运行时状态
|
||
next_call: 调用链中的下一个处理函数
|
||
|
||
Returns:
|
||
LLMResponse: 模型响应结果
|
||
"""
|
||
pass
|
||
|
||
|
||
class LLMModelBase(ABC):
|
||
"""LLM模型抽象基类"""
|
||
|
||
@abstractmethod
|
||
async def generate_response(
|
||
self,
|
||
messages: list[LLMMessage],
|
||
config: LLMGenerationConfig | None = None,
|
||
tools: list[Any] | None = None,
|
||
tool_choice: str | dict[str, Any] | ToolChoice | None = None,
|
||
timeout: float | None = None,
|
||
) -> LLMResponse:
|
||
"""生成高级响应"""
|
||
pass
|
||
|
||
@abstractmethod
|
||
async def generate_embeddings(
|
||
self,
|
||
texts: list[str],
|
||
config: LLMEmbeddingConfig,
|
||
) -> list[list[float]]:
|
||
"""生成文本嵌入向量"""
|
||
pass
|
||
|
||
|
||
class LLMModel(LLMModelBase):
|
||
"""LLM 模型实现类"""
|
||
|
||
def __init__(
|
||
self,
|
||
provider_config: ProviderConfig,
|
||
model_detail: ModelDetail,
|
||
key_store: KeyStatusStore,
|
||
http_client: LLMHttpClient,
|
||
capabilities: ModelCapabilities,
|
||
config_override: LLMGenerationConfig | None = None,
|
||
):
|
||
self.provider_config = provider_config
|
||
self.model_detail = model_detail
|
||
self.key_store = key_store
|
||
self.http_client: LLMHttpClient = http_client
|
||
self.capabilities = capabilities
|
||
self._generation_config = config_override
|
||
|
||
self.provider_name = provider_config.name
|
||
self.api_type = provider_config.api_type
|
||
self.api_base = provider_config.api_base
|
||
self.api_keys = (
|
||
[provider_config.api_key]
|
||
if isinstance(provider_config.api_key, str)
|
||
else provider_config.api_key
|
||
)
|
||
self.model_name = model_detail.model_name
|
||
self.temperature = model_detail.temperature
|
||
self.max_tokens = model_detail.max_tokens
|
||
|
||
self._is_closed = False
|
||
self._ref_count = 0
|
||
self._middlewares: list[LLMMiddleware] = []
|
||
|
||
def _has_modality(self, modality: ModelModality, is_input: bool = True) -> bool:
|
||
target_set = (
|
||
self.capabilities.input_modalities
|
||
if is_input
|
||
else self.capabilities.output_modalities
|
||
)
|
||
return modality in target_set
|
||
|
||
@property
|
||
def can_process_images(self) -> bool:
|
||
"""检查模型是否支持图片作为输入。"""
|
||
return self._has_modality(ModelModality.IMAGE)
|
||
|
||
@property
|
||
def can_process_video(self) -> bool:
|
||
"""检查模型是否支持视频作为输入。"""
|
||
return self._has_modality(ModelModality.VIDEO)
|
||
|
||
@property
|
||
def can_process_audio(self) -> bool:
|
||
"""检查模型是否支持音频作为输入。"""
|
||
return self._has_modality(ModelModality.AUDIO)
|
||
|
||
@property
|
||
def can_generate_images(self) -> bool:
|
||
"""检查模型是否支持生成图片。"""
|
||
return self._has_modality(ModelModality.IMAGE, is_input=False)
|
||
|
||
@property
|
||
def can_generate_audio(self) -> bool:
|
||
"""检查模型是否支持生成音频 (TTS)。"""
|
||
return self._has_modality(ModelModality.AUDIO, is_input=False)
|
||
|
||
@property
|
||
def is_embedding_model(self) -> bool:
|
||
"""检查这是否是一个嵌入模型。"""
|
||
return self.capabilities.is_embedding_model
|
||
|
||
def add_middleware(self, middleware: LLMMiddleware) -> None:
|
||
"""注册一个中间件到处理管道的最外层"""
|
||
self._middlewares.append(middleware)
|
||
|
||
def _build_pipeline(self) -> NextCall:
|
||
"""
|
||
构建完整的中间件调用链。顺序为:
|
||
用户自定义中间件 -> Retry -> Logging -> KeySelection -> Network (终结者)
|
||
"""
|
||
from .adapters import get_adapter_for_api_type
|
||
|
||
client_settings = get_llm_config().client_settings
|
||
retry_config = RetryConfig(
|
||
max_retries=client_settings.max_retries,
|
||
retry_delay=client_settings.retry_delay,
|
||
)
|
||
adapter = get_adapter_for_api_type(self.api_type)
|
||
|
||
network_middleware = NetworkRequestMiddleware(self, adapter)
|
||
|
||
async def terminal_handler(ctx: LLMContext) -> LLMResponse:
|
||
async def _noop(_: LLMContext) -> LLMResponse:
|
||
raise RuntimeError("NetworkRequestMiddleware 不应调用 next_call")
|
||
|
||
return await network_middleware(ctx, _noop)
|
||
|
||
def _wrap(middleware: LLMMiddleware, next_call: NextCall) -> NextCall:
|
||
async def _handler(inner_ctx: LLMContext) -> LLMResponse:
|
||
return await middleware(inner_ctx, next_call)
|
||
|
||
return _handler
|
||
|
||
handler: NextCall = terminal_handler
|
||
handler = _wrap(
|
||
KeySelectionMiddleware(self.key_store, self.provider_name, self.api_keys),
|
||
handler,
|
||
)
|
||
handler = _wrap(
|
||
LoggingMiddleware(self.provider_name, self.model_name),
|
||
handler,
|
||
)
|
||
handler = _wrap(
|
||
RetryMiddleware(retry_config, self.key_store),
|
||
handler,
|
||
)
|
||
|
||
for middleware in reversed(self._middlewares):
|
||
handler = _wrap(middleware, handler)
|
||
|
||
return handler
|
||
|
||
def _get_effective_api_type(self) -> str:
|
||
"""
|
||
获取实际生效的 API 类型。
|
||
主要用于 Smart 模式下,判断日志净化应该使用哪种格式。
|
||
"""
|
||
if self.api_type != "smart":
|
||
return self.api_type
|
||
|
||
if self.model_detail.api_type:
|
||
return self.model_detail.api_type
|
||
if (
|
||
"gemini" in self.model_name.lower()
|
||
and "openai" not in self.model_name.lower()
|
||
):
|
||
return "gemini"
|
||
return "openai"
|
||
|
||
async def _get_http_client(self) -> LLMHttpClient:
|
||
"""获取HTTP客户端"""
|
||
if self.http_client.is_closed:
|
||
logger.debug(
|
||
f"LLMModel {self.provider_name}/{self.model_name} 的 HTTP 客户端已关闭,"
|
||
"正在获取新的客户端"
|
||
)
|
||
self.http_client = await http_client_manager.get_client(
|
||
self.provider_config
|
||
)
|
||
return self.http_client
|
||
|
||
async def _select_api_key(self, failed_keys: set[str] | None = None) -> str:
|
||
"""选择可用的API密钥(使用轮询策略)"""
|
||
if not self.api_keys:
|
||
raise LLMException(
|
||
f"提供商 {self.provider_name} 没有配置API密钥",
|
||
code=LLMErrorCode.NO_AVAILABLE_KEYS,
|
||
)
|
||
|
||
selected_key = await self.key_store.get_next_available_key(
|
||
self.provider_name, self.api_keys, failed_keys
|
||
)
|
||
|
||
if not selected_key:
|
||
raise LLMException(
|
||
f"提供商 {self.provider_name} 的所有API密钥当前都不可用",
|
||
code=LLMErrorCode.NO_AVAILABLE_KEYS,
|
||
details={
|
||
"total_keys": len(self.api_keys),
|
||
"failed_keys": len(failed_keys or set()),
|
||
},
|
||
)
|
||
|
||
return selected_key
|
||
|
||
async def close(self):
|
||
"""标记模型实例的当前使用周期结束"""
|
||
if self._is_closed:
|
||
return
|
||
self._is_closed = True
|
||
logger.debug(
|
||
f"LLMModel实例的使用周期已结束: {self} (共享HTTP客户端状态不受影响)"
|
||
)
|
||
|
||
async def __aenter__(self):
|
||
if self._is_closed:
|
||
logger.debug(
|
||
f"Re-entering context for closed LLMModel {self}. "
|
||
f"Resetting _is_closed to False."
|
||
)
|
||
self._is_closed = False
|
||
self._check_not_closed()
|
||
self._ref_count += 1
|
||
return self
|
||
|
||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||
"""异步上下文管理器出口"""
|
||
_ = exc_type, exc_val, exc_tb
|
||
self._ref_count -= 1
|
||
if self._ref_count <= 0:
|
||
self._ref_count = 0
|
||
await self.close()
|
||
|
||
def _check_not_closed(self):
|
||
"""检查实例是否已关闭"""
|
||
if self._is_closed:
|
||
raise RuntimeError(f"LLMModel实例已关闭: {self}")
|
||
|
||
async def _execute_core_generation(self, context: LLMContext) -> LLMResponse:
|
||
"""
|
||
[内核] 执行核心生成逻辑:构建管道并执行。
|
||
此方法作为中间件管道的终点被调用。
|
||
"""
|
||
pipeline_handler = self._build_pipeline()
|
||
return await pipeline_handler(context)
|
||
|
||
async def generate_response(
|
||
self,
|
||
messages: list[LLMMessage],
|
||
config: LLMGenerationConfig | None = None,
|
||
tools: list[Any] | None = None,
|
||
tool_choice: str | dict[str, Any] | ToolChoice | None = None,
|
||
timeout: float | None = None,
|
||
) -> LLMResponse:
|
||
"""
|
||
生成高级响应 (支持中间件管道)。
|
||
"""
|
||
self._check_not_closed()
|
||
|
||
if self._generation_config and config:
|
||
final_request_config = self._generation_config.merge_with(config)
|
||
elif config:
|
||
final_request_config = config
|
||
else:
|
||
final_request_config = self._generation_config or LLMGenerationConfig()
|
||
|
||
normalized_tools: list[Any] | None = None
|
||
if tools:
|
||
if isinstance(tools, dict):
|
||
normalized_tools = list(tools.values())
|
||
elif isinstance(tools, list):
|
||
normalized_tools = tools
|
||
else:
|
||
normalized_tools = [tools]
|
||
|
||
context = LLMContext(
|
||
messages=messages,
|
||
config=final_request_config,
|
||
tools=normalized_tools,
|
||
tool_choice=tool_choice,
|
||
timeout=timeout,
|
||
)
|
||
|
||
return await self._execute_core_generation(context)
|
||
|
||
async def generate_embeddings(
|
||
self,
|
||
texts: list[str],
|
||
config: LLMEmbeddingConfig | None = None,
|
||
) -> list[list[float]]:
|
||
"""生成文本嵌入向量"""
|
||
self._check_not_closed()
|
||
if not texts:
|
||
return []
|
||
|
||
final_config = config or LLMEmbeddingConfig()
|
||
|
||
context = LLMContext(
|
||
messages=[],
|
||
config=final_config,
|
||
tools=None,
|
||
tool_choice=None,
|
||
timeout=None,
|
||
request_type="embedding",
|
||
extra={"texts": texts},
|
||
)
|
||
|
||
pipeline = self._build_pipeline()
|
||
response = await pipeline(context)
|
||
embeddings = (
|
||
response.cache_info.get("embeddings") if response.cache_info else None
|
||
)
|
||
if embeddings is None:
|
||
raise LLMException(
|
||
"嵌入请求未返回 embeddings 数据",
|
||
code=LLMErrorCode.EMBEDDING_FAILED,
|
||
)
|
||
return embeddings
|
||
|
||
def __str__(self) -> str:
|
||
status = "closed" if self._is_closed else "active"
|
||
return f"LLMModel({self.provider_name}/{self.model_name}, {status})"
|
||
|
||
def __repr__(self) -> str:
|
||
status = "closed" if self._is_closed else "active"
|
||
return (
|
||
f"LLMModel(provider={self.provider_name}, model={self.model_name}, "
|
||
f"api_type={self.api_type}, status={status})"
|
||
)
|
||
|
||
|
||
class RetryMiddleware(BaseLLMMiddleware):
|
||
"""
|
||
重试中间件:处理异常捕获与重试循环
|
||
"""
|
||
|
||
def __init__(self, retry_config: RetryConfig, key_store: KeyStatusStore):
|
||
self.retry_config = retry_config
|
||
self.key_store = key_store
|
||
|
||
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
|
||
last_exception: Exception | None = None
|
||
total_attempts = self.retry_config.max_retries + 1
|
||
|
||
for attempt in range(total_attempts):
|
||
try:
|
||
context.runtime_state["attempt"] = attempt + 1
|
||
return await next_call(context)
|
||
|
||
except LLMException as e:
|
||
last_exception = e
|
||
api_key = context.runtime_state.get("api_key")
|
||
|
||
if api_key:
|
||
status_code = e.details.get("status_code")
|
||
error_msg = f"({e.code.name}) {e.message}"
|
||
await self.key_store.record_failure(api_key, status_code, error_msg)
|
||
|
||
if not _should_retry_llm_error(
|
||
e, attempt, self.retry_config.max_retries
|
||
):
|
||
raise e
|
||
|
||
if attempt == total_attempts - 1:
|
||
raise e
|
||
|
||
wait_time = self.retry_config.retry_delay
|
||
if self.retry_config.exponential_backoff:
|
||
wait_time *= 2**attempt
|
||
|
||
logger.warning(
|
||
f"请求失败,{wait_time:.2f}秒后重试"
|
||
f" (第{attempt + 1}/{self.retry_config.max_retries}次重试): {e}"
|
||
)
|
||
await asyncio.sleep(wait_time)
|
||
|
||
except Exception as e:
|
||
logger.error(f"非预期异常,停止重试: {e}", e=e)
|
||
raise e
|
||
|
||
if last_exception:
|
||
raise last_exception
|
||
raise LLMException("重试循环异常结束")
|
||
|
||
|
||
class KeySelectionMiddleware(BaseLLMMiddleware):
|
||
"""
|
||
密钥选择中间件:负责轮询获取可用 API Key
|
||
"""
|
||
|
||
def __init__(
|
||
self, key_store: KeyStatusStore, provider_name: str, api_keys: list[str]
|
||
):
|
||
self.key_store = key_store
|
||
self.provider_name = provider_name
|
||
self.api_keys = api_keys
|
||
self._failed_keys: set[str] = set()
|
||
|
||
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
|
||
selected_key = await self.key_store.get_next_available_key(
|
||
self.provider_name, self.api_keys, exclude_keys=self._failed_keys
|
||
)
|
||
|
||
if not selected_key:
|
||
raise LLMException(
|
||
f"提供商 {self.provider_name} 无可用 API Key",
|
||
code=LLMErrorCode.NO_AVAILABLE_KEYS,
|
||
)
|
||
|
||
context.runtime_state["api_key"] = selected_key
|
||
|
||
try:
|
||
response = await next_call(context)
|
||
return response
|
||
except LLMException as e:
|
||
self._failed_keys.add(selected_key)
|
||
masked = f"{selected_key[:8]}..."
|
||
if isinstance(e.details, dict):
|
||
e.details["api_key"] = masked
|
||
raise e
|
||
|
||
|
||
class LoggingMiddleware(BaseLLMMiddleware):
|
||
"""
|
||
日志中间件:负责请求和响应的日志记录与脱敏
|
||
"""
|
||
|
||
def __init__(
|
||
self, provider_name: str, model_name: str, log_context: str = "Generation"
|
||
):
|
||
self.provider_name = provider_name
|
||
self.model_name = model_name
|
||
self.log_context = log_context
|
||
|
||
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
|
||
attempt = context.runtime_state.get("attempt", 1)
|
||
api_key = context.runtime_state.get("api_key", "unknown")
|
||
masked_key = f"{api_key[:8]}..."
|
||
|
||
logger.info(
|
||
f"🌐 发起LLM请求 (尝试 {attempt}) - {self.provider_name}/{self.model_name} "
|
||
f"[{self.log_context}] Key: {masked_key}"
|
||
)
|
||
|
||
try:
|
||
start_time = time.monotonic()
|
||
response = await next_call(context)
|
||
duration = (time.monotonic() - start_time) * 1000
|
||
logger.info(f"🎯 LLM响应成功 [{self.log_context}] 耗时: {duration:.2f}ms")
|
||
return response
|
||
except Exception as e:
|
||
logger.error(f"❌ 请求异常 [{self.log_context}]: {type(e).__name__} - {e}")
|
||
raise e
|
||
|
||
|
||
class NetworkRequestMiddleware(BaseLLMMiddleware):
|
||
"""
|
||
网络请求中间件:执行 Adapter 转换和 HTTP 请求
|
||
"""
|
||
|
||
def __init__(self, model_instance: "LLMModel", adapter: "BaseAdapter"):
|
||
self.model = model_instance
|
||
self.http_client = model_instance.http_client
|
||
self.adapter = adapter
|
||
self.key_store = model_instance.key_store
|
||
|
||
async def __call__(self, context: LLMContext, next_call: NextCall) -> LLMResponse:
|
||
api_key = context.runtime_state["api_key"]
|
||
|
||
request_data: RequestData
|
||
gen_config: LLMGenerationConfig | None = None
|
||
embed_config: LLMEmbeddingConfig | None = None
|
||
|
||
if context.request_type == "embedding":
|
||
embed_config = cast(LLMEmbeddingConfig, context.config)
|
||
texts = (context.extra or {}).get("texts", [])
|
||
request_data = self.adapter.prepare_embedding_request(
|
||
model=self.model,
|
||
api_key=api_key,
|
||
texts=texts,
|
||
config=embed_config,
|
||
)
|
||
else:
|
||
gen_config = cast(LLMGenerationConfig, context.config)
|
||
request_data = await self.adapter.prepare_advanced_request(
|
||
model=self.model,
|
||
api_key=api_key,
|
||
messages=context.messages,
|
||
config=gen_config,
|
||
tools=context.tools,
|
||
tool_choice=context.tool_choice,
|
||
)
|
||
|
||
masked_key = (
|
||
f"{api_key[:8]}...{api_key[-4:] if len(api_key) > 12 else '***'}"
|
||
if api_key
|
||
else "N/A"
|
||
)
|
||
logger.debug(f"🔑 API密钥: {masked_key}")
|
||
logger.debug(f"📡 请求URL: {request_data.url}")
|
||
logger.debug(f"📋 请求头: {dict(request_data.headers)}")
|
||
|
||
if self.model.api_type == "smart":
|
||
effective_type = self.model._get_effective_api_type()
|
||
sanitizer_req_context = f"{effective_type}_request"
|
||
else:
|
||
sanitizer_req_context = self.adapter.log_sanitization_context
|
||
sanitized_body = sanitize_for_logging(
|
||
request_data.body, context=sanitizer_req_context
|
||
)
|
||
|
||
if request_data.files and isinstance(sanitized_body, dict):
|
||
file_info: list[str] = []
|
||
file_count = 0
|
||
if isinstance(request_data.files, list):
|
||
file_count = len(request_data.files)
|
||
for key, value in request_data.files:
|
||
filename = (
|
||
value[0]
|
||
if isinstance(value, tuple) and len(value) > 0
|
||
else "..."
|
||
)
|
||
file_info.append(f"{key}='{filename}'")
|
||
elif isinstance(request_data.files, dict):
|
||
file_count = len(request_data.files)
|
||
file_info = list(request_data.files.keys())
|
||
|
||
sanitized_body["[MULTIPART_FILES]"] = f"Count: {file_count} | {file_info}"
|
||
|
||
request_body_str = dump_json_safely(
|
||
sanitized_body, ensure_ascii=False, indent=2
|
||
)
|
||
logger.debug(f"📦 请求体: {request_body_str}")
|
||
|
||
start_time = time.monotonic()
|
||
try:
|
||
http_response = await self.http_client.post(
|
||
request_data.url,
|
||
headers=request_data.headers,
|
||
content=dump_json_safely(request_data.body, ensure_ascii=False)
|
||
if not request_data.files
|
||
else None,
|
||
data=request_data.body if request_data.files else None,
|
||
files=request_data.files,
|
||
timeout=context.timeout,
|
||
)
|
||
|
||
logger.debug(f"📥 响应状态码: {http_response.status_code}")
|
||
|
||
if exception := self.adapter.handle_http_error(http_response):
|
||
error_text = http_response.content.decode("utf-8", errors="ignore")
|
||
logger.debug(f"💥 完整错误响应: {error_text}")
|
||
await self.key_store.record_failure(
|
||
api_key, http_response.status_code, error_text
|
||
)
|
||
raise exception
|
||
|
||
response_bytes = await http_response.aread()
|
||
logger.debug(f"📦 响应体已完整读取 ({len(response_bytes)} bytes)")
|
||
|
||
response_json = json.loads(response_bytes)
|
||
|
||
sanitizer_resp_context = sanitizer_req_context.replace(
|
||
"_request", "_response"
|
||
)
|
||
if sanitizer_resp_context == sanitizer_req_context:
|
||
sanitizer_resp_context = f"{sanitizer_req_context}_response"
|
||
|
||
sanitized_response = sanitize_for_logging(
|
||
response_json, context=sanitizer_resp_context
|
||
)
|
||
response_json_str = json.dumps(
|
||
sanitized_response, ensure_ascii=False, indent=2
|
||
)
|
||
logger.debug(f"📋 响应JSON: {response_json_str}")
|
||
|
||
if context.request_type == "embedding":
|
||
self.adapter.validate_embedding_response(response_json)
|
||
embeddings = self.adapter.parse_embedding_response(response_json)
|
||
latency = (time.monotonic() - start_time) * 1000
|
||
await self.key_store.record_success(api_key, latency)
|
||
|
||
return LLMResponse(
|
||
text="",
|
||
raw_response=response_json,
|
||
cache_info={"embeddings": embeddings},
|
||
)
|
||
|
||
response_data = self.adapter.parse_response(
|
||
self.model, response_json, is_advanced=True
|
||
)
|
||
|
||
should_rescue_image = (
|
||
gen_config
|
||
and gen_config.validation_policy
|
||
and gen_config.validation_policy.get("require_image")
|
||
)
|
||
if (
|
||
should_rescue_image
|
||
and not response_data.images
|
||
and response_data.text
|
||
and gen_config
|
||
):
|
||
markdown_matches = re.findall(
|
||
r"(!?\[.*?\]\((https?://[^\)]+)\))", response_data.text
|
||
)
|
||
if markdown_matches:
|
||
logger.info(
|
||
f"检测到 {len(markdown_matches)} "
|
||
"个资源链接,尝试自动下载并清洗。"
|
||
)
|
||
if response_data.images is None:
|
||
response_data.images = []
|
||
|
||
downloaded_urls = set()
|
||
for full_tag, url in markdown_matches:
|
||
try:
|
||
if url not in downloaded_urls:
|
||
content = await AsyncHttpx.get_content(url)
|
||
response_data.images.append(process_image_data(content))
|
||
downloaded_urls.add(url)
|
||
response_data.text = response_data.text.replace(
|
||
full_tag, ""
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
f"自动下载生成的图片失败: {url}, 错误: {exc}"
|
||
)
|
||
response_data.text = response_data.text.strip()
|
||
|
||
latency = (time.monotonic() - start_time) * 1000
|
||
await self.key_store.record_success(api_key, latency)
|
||
|
||
response_tool_calls: list[LLMToolCall] = []
|
||
if response_data.tool_calls:
|
||
for tc_data in response_data.tool_calls:
|
||
if isinstance(tc_data, LLMToolCall):
|
||
response_tool_calls.append(tc_data)
|
||
elif isinstance(tc_data, dict):
|
||
try:
|
||
response_tool_calls.append(LLMToolCall(**tc_data))
|
||
except Exception:
|
||
pass
|
||
|
||
final_response = LLMResponse(
|
||
text=response_data.text,
|
||
content_parts=response_data.content_parts,
|
||
usage_info=response_data.usage_info,
|
||
images=response_data.images,
|
||
raw_response=response_data.raw_response,
|
||
tool_calls=response_tool_calls if response_tool_calls else None,
|
||
code_executions=response_data.code_executions,
|
||
grounding_metadata=response_data.grounding_metadata,
|
||
cache_info=response_data.cache_info,
|
||
thought_text=response_data.thought_text,
|
||
thought_signature=response_data.thought_signature,
|
||
)
|
||
|
||
if context.request_type == "generation" and gen_config:
|
||
if gen_config.response_validator:
|
||
try:
|
||
gen_config.response_validator(final_response)
|
||
except Exception as exc:
|
||
raise LLMException(
|
||
f"响应内容未通过自定义验证器: {exc}",
|
||
code=LLMErrorCode.API_RESPONSE_INVALID,
|
||
details={"validator_error": str(exc)},
|
||
cause=exc,
|
||
) from exc
|
||
|
||
policy = gen_config.validation_policy
|
||
if policy:
|
||
effective_type = self.model._get_effective_api_type()
|
||
if policy.get("require_image") and not final_response.images:
|
||
if effective_type == "gemini" and response_data.raw_response:
|
||
usage_metadata = response_data.raw_response.get(
|
||
"usageMetadata", {}
|
||
)
|
||
prompt_token_details = usage_metadata.get(
|
||
"promptTokensDetails", []
|
||
)
|
||
prompt_had_image = any(
|
||
detail.get("modality") == "IMAGE"
|
||
for detail in prompt_token_details
|
||
)
|
||
|
||
if prompt_had_image:
|
||
raise LLMException(
|
||
"响应验证失败:模型接收了图片输入但未生成图片。",
|
||
code=LLMErrorCode.API_RESPONSE_INVALID,
|
||
details={
|
||
"policy": policy,
|
||
"text_response": final_response.text,
|
||
"raw_response": response_data.raw_response,
|
||
},
|
||
)
|
||
else:
|
||
logger.debug(
|
||
"Gemini提示词中未包含图片,跳过图片要求重试。"
|
||
)
|
||
else:
|
||
raise LLMException(
|
||
"响应验证失败:要求返回图片但未找到图片数据。",
|
||
code=LLMErrorCode.API_RESPONSE_INVALID,
|
||
details={
|
||
"policy": policy,
|
||
"text_response": final_response.text,
|
||
},
|
||
)
|
||
|
||
return final_response
|
||
|
||
except Exception as e:
|
||
if isinstance(e, LLMException):
|
||
raise e
|
||
|
||
logger.error(f"解析响应失败或发生未知错误: {e}")
|
||
|
||
if not isinstance(e, httpx.NetworkError | httpx.TimeoutException):
|
||
await self.key_store.record_failure(api_key, None, str(e))
|
||
|
||
raise LLMException(
|
||
f"网络请求异常: {type(e).__name__} - {e}",
|
||
code=LLMErrorCode.API_REQUEST_FAILED,
|
||
details={"api_key": masked_key},
|
||
cause=e,
|
||
)
|