zhenxun_bot/zhenxun/services/llm/service.py
Rumio 74a9f3a843
feat(core): 支持LLM多图片响应,增强UI主题皮肤系统及优化JSON/Markdown处理 (#2062)
- 【LLM服务】
  - `LLMResponse` 模型现在支持 `images: list[bytes]`,允许模型返回多张图片。
  - LLM适配器 (`base.py`, `gemini.py`) 和 API 层 (`api.py`, `service.py`) 已更新以处理多图片响应。
  - 响应验证逻辑已调整,以检查 `images` 列表而非单个 `image_bytes`。
- 【UI渲染服务】
  - 引入组件“皮肤”(variant)概念,允许为同一组件提供不同视觉风格。
  - 改进了 `manifest.json` 的加载、合并和缓存机制,支持基础清单与皮肤清单的递归合并。
  - `ThemeManager` 现在会缓存已加载的清单,并在主题重载时清除缓存。
  - 增强了资源解析器 (`ResourceResolver`),支持 `@` 命名空间路径和更健壮的相对路径处理。
  - 独立模板现在会继承主 Jinja 环境的过滤器。
- 【工具函数】
  - 引入 `dump_json_safely` 工具函数,用于更安全地序列化包含 Pydantic 模型、枚举等复杂类型的对象为 JSON。
  - LLM 服务中的请求体和缓存键生成已改用 `dump_json_safely`。
  - 优化了 `format_usage_for_markdown` 函数,改进了 Markdown 文本的格式化,确保块级元素前有正确换行,并正确处理段落内硬换行。

Co-authored-by: webjoin111 <455457521@qq.com>
2025-10-09 08:50:40 +08:00

597 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
LLM 模型实现类
包含 LLM 模型的抽象基类和具体实现,负责与各种 AI 提供商的 API 交互。
"""
from abc import ABC, abstractmethod
from collections.abc import Awaitable, Callable
import json
from typing import Any, TypeVar
from pydantic import BaseModel
from zhenxun.services.log import logger
from zhenxun.utils.log_sanitizer import sanitize_for_logging
from zhenxun.utils.pydantic_compat import dump_json_safely
from .adapters.base import RequestData
from .config import LLMGenerationConfig
from .config.providers import get_ai_config
from .core import (
KeyStatusStore,
LLMHttpClient,
RetryConfig,
http_client_manager,
with_smart_retry,
)
from .types import (
EmbeddingTaskType,
LLMErrorCode,
LLMException,
LLMMessage,
LLMResponse,
ModelDetail,
ProviderConfig,
ToolExecutable,
)
from .types.capabilities import ModelCapabilities, ModelModality
T = TypeVar("T", bound=BaseModel)
class LLMModelBase(ABC):
"""LLM模型抽象基类"""
@abstractmethod
async def generate_response(
self,
messages: list[LLMMessage],
config: LLMGenerationConfig | None = None,
tools: dict[str, ToolExecutable] | None = None,
tool_choice: str | dict[str, Any] | None = None,
**kwargs: Any,
) -> LLMResponse:
"""生成高级响应"""
pass
@abstractmethod
async def generate_embeddings(
self,
texts: list[str],
task_type: EmbeddingTaskType | str = EmbeddingTaskType.RETRIEVAL_DOCUMENT,
**kwargs: Any,
) -> list[list[float]]:
"""生成文本嵌入向量"""
pass
class LLMModel(LLMModelBase):
"""LLM 模型实现类"""
def __init__(
self,
provider_config: ProviderConfig,
model_detail: ModelDetail,
key_store: KeyStatusStore,
http_client: LLMHttpClient,
capabilities: ModelCapabilities,
config_override: LLMGenerationConfig | None = None,
):
self.provider_config = provider_config
self.model_detail = model_detail
self.key_store = key_store
self.http_client: LLMHttpClient = http_client
self.capabilities = capabilities
self._generation_config = config_override
self.provider_name = provider_config.name
self.api_type = provider_config.api_type
self.api_base = provider_config.api_base
self.api_keys = (
[provider_config.api_key]
if isinstance(provider_config.api_key, str)
else provider_config.api_key
)
self.model_name = model_detail.model_name
self.temperature = model_detail.temperature
self.max_tokens = model_detail.max_tokens
self._is_closed = False
def can_process_images(self) -> bool:
"""检查模型是否支持图片作为输入。"""
return ModelModality.IMAGE in self.capabilities.input_modalities
def can_process_video(self) -> bool:
"""检查模型是否支持视频作为输入。"""
return ModelModality.VIDEO in self.capabilities.input_modalities
def can_process_audio(self) -> bool:
"""检查模型是否支持音频作为输入。"""
return ModelModality.AUDIO in self.capabilities.input_modalities
def can_generate_images(self) -> bool:
"""检查模型是否支持生成图片。"""
return ModelModality.IMAGE in self.capabilities.output_modalities
def can_generate_audio(self) -> bool:
"""检查模型是否支持生成音频 (TTS)。"""
return ModelModality.AUDIO in self.capabilities.output_modalities
def can_use_tools(self) -> bool:
"""检查模型是否支持工具调用/函数调用。"""
return self.capabilities.supports_tool_calling
def is_embedding_model(self) -> bool:
"""检查这是否是一个嵌入模型。"""
return self.capabilities.is_embedding_model
async def _get_http_client(self) -> LLMHttpClient:
"""获取HTTP客户端"""
if self.http_client.is_closed:
logger.debug(
f"LLMModel {self.provider_name}/{self.model_name} 的 HTTP 客户端已关闭,"
"正在获取新的客户端"
)
self.http_client = await http_client_manager.get_client(
self.provider_config
)
return self.http_client
async def _select_api_key(self, failed_keys: set[str] | None = None) -> str:
"""选择可用的API密钥使用轮询策略"""
if not self.api_keys:
raise LLMException(
f"提供商 {self.provider_name} 没有配置API密钥",
code=LLMErrorCode.NO_AVAILABLE_KEYS,
)
selected_key = await self.key_store.get_next_available_key(
self.provider_name, self.api_keys, failed_keys
)
if not selected_key:
raise LLMException(
f"提供商 {self.provider_name} 的所有API密钥当前都不可用",
code=LLMErrorCode.NO_AVAILABLE_KEYS,
details={
"total_keys": len(self.api_keys),
"failed_keys": len(failed_keys or set()),
},
)
return selected_key
async def _perform_api_call(
self,
prepare_request_func: Callable[[str], Awaitable["RequestData"]],
parse_response_func: Callable[[dict[str, Any]], Any],
http_client: "LLMHttpClient",
failed_keys: set[str] | None = None,
log_context: str = "API",
) -> tuple[Any, str]:
"""执行API调用的通用核心方法"""
api_key = await self._select_api_key(failed_keys)
try:
request_data = await prepare_request_func(api_key)
logger.info(
f"🌐 发起LLM请求 - 模型: {self.provider_name}/{self.model_name} "
f"[{log_context}]"
)
logger.debug(f"📡 请求URL: {request_data.url}")
masked_key = (
f"{api_key[:8]}...{api_key[-4:] if len(api_key) > 12 else '***'}"
)
logger.debug(f"🔑 API密钥: {masked_key}")
logger.debug(f"📋 请求头: {dict(request_data.headers)}")
sanitizer_req_context_map = {"gemini": "gemini_request"}
sanitizer_req_context = sanitizer_req_context_map.get(
self.api_type, "openai_request"
)
sanitized_body = sanitize_for_logging(
request_data.body, context=sanitizer_req_context
)
request_body_str = dump_json_safely(
sanitized_body, ensure_ascii=False, indent=2
)
logger.debug(f"📦 请求体: {request_body_str}")
http_response = await http_client.post(
request_data.url,
headers=request_data.headers,
content=dump_json_safely(request_data.body, ensure_ascii=False),
)
logger.debug(f"📥 响应状态码: {http_response.status_code}")
logger.debug(f"📄 响应头: {dict(http_response.headers)}")
response_bytes = await http_response.aread()
logger.debug(f"📦 响应体已完整读取 ({len(response_bytes)} bytes)")
if http_response.status_code != 200:
error_text = response_bytes.decode("utf-8", errors="ignore")
logger.error(
f"❌ HTTP请求失败: {http_response.status_code} - {error_text} "
f"[{log_context}]"
)
logger.debug(f"💥 完整错误响应: {error_text}")
await self.key_store.record_failure(
api_key, http_response.status_code, error_text
)
if http_response.status_code in [401, 403]:
error_code = LLMErrorCode.API_KEY_INVALID
elif http_response.status_code == 429:
error_code = LLMErrorCode.API_RATE_LIMITED
elif http_response.status_code in [402, 413]:
error_code = LLMErrorCode.API_QUOTA_EXCEEDED
else:
error_code = LLMErrorCode.API_REQUEST_FAILED
raise LLMException(
f"HTTP请求失败: {http_response.status_code}",
code=error_code,
details={
"status_code": http_response.status_code,
"response": error_text,
"api_key": api_key,
},
)
try:
response_json = json.loads(response_bytes)
sanitizer_context_map = {"gemini": "gemini_response"}
sanitizer_context = sanitizer_context_map.get(
self.api_type, "openai_response"
)
sanitized_for_log = sanitize_for_logging(
response_json, context=sanitizer_context
)
response_json_str = json.dumps(
sanitized_for_log, ensure_ascii=False, indent=2
)
logger.debug(f"📋 响应JSON: {response_json_str}")
parsed_data = parse_response_func(response_json)
except Exception as e:
logger.error(f"解析 {log_context} 响应失败: {e}", e=e)
await self.key_store.record_failure(api_key, None, str(e))
if isinstance(e, LLMException):
raise
else:
raise LLMException(
f"解析API {log_context} 响应失败: {e}",
code=LLMErrorCode.RESPONSE_PARSE_ERROR,
cause=e,
)
logger.info(f"🎯 LLM响应解析完成 [{log_context}]")
return parsed_data, api_key
except LLMException:
raise
except Exception as e:
error_log_msg = f"生成 {log_context.lower()} 时发生未预期错误: {e}"
logger.error(error_log_msg, e=e)
await self.key_store.record_failure(api_key, None, str(e))
raise LLMException(
error_log_msg,
code=LLMErrorCode.GENERATION_FAILED
if log_context == "Generation"
else LLMErrorCode.EMBEDDING_FAILED,
cause=e,
)
async def _execute_embedding_request(
self,
adapter,
texts: list[str],
task_type: EmbeddingTaskType | str,
http_client: LLMHttpClient,
failed_keys: set[str] | None = None,
) -> list[list[float]]:
"""执行单次嵌入请求 - 供重试机制调用"""
async def prepare_request(api_key: str) -> RequestData:
return adapter.prepare_embedding_request(
model=self,
api_key=api_key,
texts=texts,
task_type=task_type,
)
def parse_response(response_json: dict[str, Any]) -> list[list[float]]:
adapter.validate_embedding_response(response_json)
return adapter.parse_embedding_response(response_json)
parsed_data, _api_key_used = await self._perform_api_call(
prepare_request_func=prepare_request,
parse_response_func=parse_response,
http_client=http_client,
failed_keys=failed_keys,
log_context="Embedding",
)
return parsed_data
async def _execute_with_smart_retry(
self,
adapter,
messages: list[LLMMessage],
config: LLMGenerationConfig | None,
tools: dict[str, ToolExecutable] | None,
tool_choice: str | dict[str, Any] | None,
http_client: LLMHttpClient,
):
"""智能重试机制 - 使用统一的重试装饰器"""
ai_config = get_ai_config()
max_retries = ai_config.get("max_retries_llm", 3)
retry_delay = ai_config.get("retry_delay_llm", 2)
retry_config = RetryConfig(max_retries=max_retries, retry_delay=retry_delay)
return await with_smart_retry(
self._execute_single_request,
adapter,
messages,
config,
tools,
tool_choice,
http_client,
retry_config=retry_config,
key_store=self.key_store,
provider_name=self.provider_name,
)
async def _execute_single_request(
self,
adapter,
messages: list[LLMMessage],
config: LLMGenerationConfig | None,
tools: dict[str, ToolExecutable] | None,
tool_choice: str | dict[str, Any] | None,
http_client: LLMHttpClient,
failed_keys: set[str] | None = None,
) -> tuple[LLMResponse, str]:
"""执行单次请求 - 供重试机制调用,直接返回 LLMResponse 和使用的 key"""
async def prepare_request(api_key: str) -> RequestData:
return await adapter.prepare_advanced_request(
model=self,
api_key=api_key,
messages=messages,
config=config,
tools=tools,
tool_choice=tool_choice,
)
def parse_response(response_json: dict[str, Any]) -> LLMResponse:
response_data = adapter.parse_response(
model=self,
response_json=response_json,
is_advanced=True,
)
from .types.models import LLMToolCall
response_tool_calls = []
if response_data.tool_calls:
for tc_data in response_data.tool_calls:
if isinstance(tc_data, LLMToolCall):
response_tool_calls.append(tc_data)
elif isinstance(tc_data, dict):
try:
response_tool_calls.append(LLMToolCall(**tc_data))
except Exception as e:
logger.warning(
f"无法将工具调用数据转换为LLMToolCall: {tc_data}, "
f"error: {e}"
)
else:
logger.warning(f"工具调用数据格式未知: {tc_data}")
return LLMResponse(
text=response_data.text,
usage_info=response_data.usage_info,
images=response_data.images,
raw_response=response_data.raw_response,
tool_calls=response_tool_calls if response_tool_calls else None,
code_executions=response_data.code_executions,
grounding_metadata=response_data.grounding_metadata,
cache_info=response_data.cache_info,
)
parsed_data, api_key_used = await self._perform_api_call(
prepare_request_func=prepare_request,
parse_response_func=parse_response,
http_client=http_client,
failed_keys=failed_keys,
log_context="Generation",
)
if config:
if config.response_validator:
try:
config.response_validator(parsed_data)
except Exception as e:
raise LLMException(
f"响应内容未通过自定义验证器: {e}",
code=LLMErrorCode.API_RESPONSE_INVALID,
details={"validator_error": str(e)},
cause=e,
) from e
policy = config.validation_policy
if policy:
if policy.get("require_image") and not parsed_data.images:
if self.api_type == "gemini" and parsed_data.raw_response:
usage_metadata = parsed_data.raw_response.get(
"usageMetadata", {}
)
prompt_token_details = usage_metadata.get(
"promptTokensDetails", []
)
prompt_had_image = any(
detail.get("modality") == "IMAGE"
for detail in prompt_token_details
)
if prompt_had_image:
raise LLMException(
"响应验证失败:模型接收了图片输入但未生成图片。",
code=LLMErrorCode.API_RESPONSE_INVALID,
details={
"policy": policy,
"text_response": parsed_data.text,
"raw_response": parsed_data.raw_response,
},
)
else:
logger.debug("Gemini提示词中未包含图片跳过图片要求重试。")
else:
raise LLMException(
"响应验证失败:要求返回图片但未找到图片数据。",
code=LLMErrorCode.API_RESPONSE_INVALID,
details={
"policy": policy,
"text_response": parsed_data.text,
},
)
return parsed_data, api_key_used
async def close(self):
"""标记模型实例的当前使用周期结束"""
if self._is_closed:
return
self._is_closed = True
logger.debug(
f"LLMModel实例的使用周期已结束: {self} (共享HTTP客户端状态不受影响)"
)
async def __aenter__(self):
if self._is_closed:
logger.debug(
f"Re-entering context for closed LLMModel {self}. "
f"Resetting _is_closed to False."
)
self._is_closed = False
self._check_not_closed()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""异步上下文管理器出口"""
_ = exc_type, exc_val, exc_tb
await self.close()
def _check_not_closed(self):
"""检查实例是否已关闭"""
if self._is_closed:
raise RuntimeError(f"LLMModel实例已关闭: {self}")
async def generate_response(
self,
messages: list[LLMMessage],
config: LLMGenerationConfig | None = None,
tools: dict[str, ToolExecutable] | None = None,
tool_choice: str | dict[str, Any] | None = None,
**kwargs: Any,
) -> LLMResponse:
"""
生成高级响应。
此方法现在只执行 *单次* LLM API 调用,并将结果(包括工具调用请求)返回。
"""
self._check_not_closed()
from .adapters import get_adapter_for_api_type
from .config.generation import create_generation_config_from_kwargs
final_request_config = self._generation_config or LLMGenerationConfig()
if kwargs:
kwargs_config = create_generation_config_from_kwargs(**kwargs)
merged_dict = final_request_config.to_dict()
merged_dict.update(kwargs_config.to_dict())
final_request_config = LLMGenerationConfig(**merged_dict)
if config is not None:
merged_dict = final_request_config.to_dict()
merged_dict.update(config.to_dict())
final_request_config = LLMGenerationConfig(**merged_dict)
adapter = get_adapter_for_api_type(self.api_type)
http_client = await self._get_http_client()
response, _ = await self._execute_with_smart_retry(
adapter,
messages,
final_request_config,
tools,
tool_choice,
http_client,
)
return response
async def generate_embeddings(
self,
texts: list[str],
task_type: EmbeddingTaskType | str = EmbeddingTaskType.RETRIEVAL_DOCUMENT,
**kwargs: Any,
) -> list[list[float]]:
"""生成文本嵌入向量"""
self._check_not_closed()
if not texts:
return []
from .adapters import get_adapter_for_api_type
adapter = get_adapter_for_api_type(self.api_type)
if not adapter:
raise LLMException(
f"未找到适用于 API 类型 '{self.api_type}' 的嵌入适配器",
code=LLMErrorCode.CONFIGURATION_ERROR,
)
http_client = await self._get_http_client()
ai_config = get_ai_config()
default_max_retries = ai_config.get("max_retries_llm", 3)
default_retry_delay = ai_config.get("retry_delay_llm", 2)
max_retries_embed = kwargs.get(
"max_retries_embed", max(1, default_max_retries // 2)
)
retry_delay_embed = kwargs.get("retry_delay_embed", default_retry_delay / 2)
retry_config = RetryConfig(
max_retries=max_retries_embed,
retry_delay=retry_delay_embed,
exponential_backoff=True,
key_rotation=True,
)
return await with_smart_retry(
self._execute_embedding_request,
adapter,
texts,
task_type,
http_client,
retry_config=retry_config,
key_store=self.key_store,
provider_name=self.provider_name,
)
def __str__(self) -> str:
status = "closed" if self._is_closed else "active"
return f"LLMModel({self.provider_name}/{self.model_name}, {status})"
def __repr__(self) -> str:
status = "closed" if self._is_closed else "active"
return (
f"LLMModel(provider={self.provider_name}, model={self.model_name}, "
f"api_type={self.api_type}, status={status})"
)