2025-06-21 16:33:21 +08:00
|
|
|
|
"""
|
|
|
|
|
|
LLM 模型实现类
|
|
|
|
|
|
|
|
|
|
|
|
包含 LLM 模型的抽象基类和具体实现,负责与各种 AI 提供商的 API 交互。
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from abc import ABC, abstractmethod
|
|
|
|
|
|
from collections.abc import Awaitable, Callable
|
|
|
|
|
|
import json
|
2025-08-04 23:36:12 +08:00
|
|
|
|
from typing import Any, TypeVar
|
|
|
|
|
|
|
|
|
|
|
|
from pydantic import BaseModel
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
|
|
|
|
|
from zhenxun.services.log import logger
|
|
|
|
|
|
|
2025-07-08 11:15:15 +08:00
|
|
|
|
from .adapters.base import RequestData
|
2025-06-21 16:33:21 +08:00
|
|
|
|
from .config import LLMGenerationConfig
|
|
|
|
|
|
from .config.providers import get_ai_config
|
|
|
|
|
|
from .core import (
|
|
|
|
|
|
KeyStatusStore,
|
|
|
|
|
|
LLMHttpClient,
|
|
|
|
|
|
RetryConfig,
|
|
|
|
|
|
http_client_manager,
|
|
|
|
|
|
with_smart_retry,
|
|
|
|
|
|
)
|
|
|
|
|
|
from .types import (
|
|
|
|
|
|
EmbeddingTaskType,
|
|
|
|
|
|
LLMErrorCode,
|
|
|
|
|
|
LLMException,
|
|
|
|
|
|
LLMMessage,
|
|
|
|
|
|
LLMResponse,
|
|
|
|
|
|
ModelDetail,
|
|
|
|
|
|
ProviderConfig,
|
2025-08-04 23:36:12 +08:00
|
|
|
|
ToolExecutable,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
from .types.capabilities import ModelCapabilities, ModelModality
|
|
|
|
|
|
from .utils import _sanitize_request_body_for_logging
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-08-04 23:36:12 +08:00
|
|
|
|
T = TypeVar("T", bound=BaseModel)
|
|
|
|
|
|
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
|
|
|
|
|
class LLMModelBase(ABC):
|
|
|
|
|
|
"""LLM模型抽象基类"""
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
|
async def generate_response(
|
|
|
|
|
|
self,
|
|
|
|
|
|
messages: list[LLMMessage],
|
|
|
|
|
|
config: LLMGenerationConfig | None = None,
|
2025-08-04 23:36:12 +08:00
|
|
|
|
tools: dict[str, ToolExecutable] | None = None,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
tool_choice: str | dict[str, Any] | None = None,
|
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
|
) -> LLMResponse:
|
2025-07-14 22:39:17 +08:00
|
|
|
|
"""生成高级响应"""
|
2025-06-21 16:33:21 +08:00
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
|
|
async def generate_embeddings(
|
|
|
|
|
|
self,
|
|
|
|
|
|
texts: list[str],
|
|
|
|
|
|
task_type: EmbeddingTaskType | str = EmbeddingTaskType.RETRIEVAL_DOCUMENT,
|
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
|
) -> list[list[float]]:
|
2025-07-14 22:39:17 +08:00
|
|
|
|
"""生成文本嵌入向量"""
|
2025-06-21 16:33:21 +08:00
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LLMModel(LLMModelBase):
|
|
|
|
|
|
"""LLM 模型实现类"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
|
self,
|
|
|
|
|
|
provider_config: ProviderConfig,
|
|
|
|
|
|
model_detail: ModelDetail,
|
|
|
|
|
|
key_store: KeyStatusStore,
|
|
|
|
|
|
http_client: LLMHttpClient,
|
2025-07-08 11:15:15 +08:00
|
|
|
|
capabilities: ModelCapabilities,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
config_override: LLMGenerationConfig | None = None,
|
|
|
|
|
|
):
|
|
|
|
|
|
self.provider_config = provider_config
|
|
|
|
|
|
self.model_detail = model_detail
|
|
|
|
|
|
self.key_store = key_store
|
|
|
|
|
|
self.http_client: LLMHttpClient = http_client
|
2025-07-08 11:15:15 +08:00
|
|
|
|
self.capabilities = capabilities
|
2025-06-21 16:33:21 +08:00
|
|
|
|
self._generation_config = config_override
|
|
|
|
|
|
|
|
|
|
|
|
self.provider_name = provider_config.name
|
|
|
|
|
|
self.api_type = provider_config.api_type
|
|
|
|
|
|
self.api_base = provider_config.api_base
|
|
|
|
|
|
self.api_keys = (
|
|
|
|
|
|
[provider_config.api_key]
|
|
|
|
|
|
if isinstance(provider_config.api_key, str)
|
|
|
|
|
|
else provider_config.api_key
|
|
|
|
|
|
)
|
|
|
|
|
|
self.model_name = model_detail.model_name
|
|
|
|
|
|
self.temperature = model_detail.temperature
|
|
|
|
|
|
self.max_tokens = model_detail.max_tokens
|
|
|
|
|
|
|
|
|
|
|
|
self._is_closed = False
|
|
|
|
|
|
|
2025-07-08 11:15:15 +08:00
|
|
|
|
def can_process_images(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持图片作为输入。"""
|
|
|
|
|
|
return ModelModality.IMAGE in self.capabilities.input_modalities
|
|
|
|
|
|
|
|
|
|
|
|
def can_process_video(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持视频作为输入。"""
|
|
|
|
|
|
return ModelModality.VIDEO in self.capabilities.input_modalities
|
|
|
|
|
|
|
|
|
|
|
|
def can_process_audio(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持音频作为输入。"""
|
|
|
|
|
|
return ModelModality.AUDIO in self.capabilities.input_modalities
|
|
|
|
|
|
|
|
|
|
|
|
def can_generate_images(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持生成图片。"""
|
|
|
|
|
|
return ModelModality.IMAGE in self.capabilities.output_modalities
|
|
|
|
|
|
|
|
|
|
|
|
def can_generate_audio(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持生成音频 (TTS)。"""
|
|
|
|
|
|
return ModelModality.AUDIO in self.capabilities.output_modalities
|
|
|
|
|
|
|
|
|
|
|
|
def can_use_tools(self) -> bool:
|
|
|
|
|
|
"""检查模型是否支持工具调用/函数调用。"""
|
|
|
|
|
|
return self.capabilities.supports_tool_calling
|
|
|
|
|
|
|
|
|
|
|
|
def is_embedding_model(self) -> bool:
|
|
|
|
|
|
"""检查这是否是一个嵌入模型。"""
|
|
|
|
|
|
return self.capabilities.is_embedding_model
|
|
|
|
|
|
|
2025-06-21 16:33:21 +08:00
|
|
|
|
async def _get_http_client(self) -> LLMHttpClient:
|
|
|
|
|
|
"""获取HTTP客户端"""
|
|
|
|
|
|
if self.http_client.is_closed:
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
f"LLMModel {self.provider_name}/{self.model_name} 的 HTTP 客户端已关闭,"
|
|
|
|
|
|
"正在获取新的客户端"
|
|
|
|
|
|
)
|
|
|
|
|
|
self.http_client = await http_client_manager.get_client(
|
|
|
|
|
|
self.provider_config
|
|
|
|
|
|
)
|
|
|
|
|
|
return self.http_client
|
|
|
|
|
|
|
|
|
|
|
|
async def _select_api_key(self, failed_keys: set[str] | None = None) -> str:
|
|
|
|
|
|
"""选择可用的API密钥(使用轮询策略)"""
|
|
|
|
|
|
if not self.api_keys:
|
|
|
|
|
|
raise LLMException(
|
|
|
|
|
|
f"提供商 {self.provider_name} 没有配置API密钥",
|
|
|
|
|
|
code=LLMErrorCode.NO_AVAILABLE_KEYS,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
selected_key = await self.key_store.get_next_available_key(
|
|
|
|
|
|
self.provider_name, self.api_keys, failed_keys
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not selected_key:
|
|
|
|
|
|
raise LLMException(
|
|
|
|
|
|
f"提供商 {self.provider_name} 的所有API密钥当前都不可用",
|
|
|
|
|
|
code=LLMErrorCode.NO_AVAILABLE_KEYS,
|
|
|
|
|
|
details={
|
|
|
|
|
|
"total_keys": len(self.api_keys),
|
|
|
|
|
|
"failed_keys": len(failed_keys or set()),
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return selected_key
|
|
|
|
|
|
|
2025-07-08 11:15:15 +08:00
|
|
|
|
async def _perform_api_call(
|
2025-06-21 16:33:21 +08:00
|
|
|
|
self,
|
2025-07-08 11:15:15 +08:00
|
|
|
|
prepare_request_func: Callable[[str], Awaitable["RequestData"]],
|
|
|
|
|
|
parse_response_func: Callable[[dict[str, Any]], Any],
|
|
|
|
|
|
http_client: "LLMHttpClient",
|
2025-06-21 16:33:21 +08:00
|
|
|
|
failed_keys: set[str] | None = None,
|
2025-07-08 11:15:15 +08:00
|
|
|
|
log_context: str = "API",
|
2025-07-14 22:39:17 +08:00
|
|
|
|
) -> tuple[Any, str]:
|
|
|
|
|
|
"""执行API调用的通用核心方法"""
|
2025-06-21 16:33:21 +08:00
|
|
|
|
api_key = await self._select_api_key(failed_keys)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
2025-07-08 11:15:15 +08:00
|
|
|
|
request_data = await prepare_request_func(api_key)
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
f"🌐 发起LLM请求 - 模型: {self.provider_name}/{self.model_name} "
|
|
|
|
|
|
f"[{log_context}]"
|
|
|
|
|
|
)
|
|
|
|
|
|
logger.debug(f"📡 请求URL: {request_data.url}")
|
|
|
|
|
|
masked_key = (
|
|
|
|
|
|
f"{api_key[:8]}...{api_key[-4:] if len(api_key) > 12 else '***'}"
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
logger.debug(f"🔑 API密钥: {masked_key}")
|
|
|
|
|
|
logger.debug(f"📋 请求头: {dict(request_data.headers)}")
|
|
|
|
|
|
|
|
|
|
|
|
sanitized_body = _sanitize_request_body_for_logging(request_data.body)
|
|
|
|
|
|
request_body_str = json.dumps(sanitized_body, ensure_ascii=False, indent=2)
|
|
|
|
|
|
logger.debug(f"📦 请求体: {request_body_str}")
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
|
|
|
|
|
http_response = await http_client.post(
|
|
|
|
|
|
request_data.url,
|
|
|
|
|
|
headers=request_data.headers,
|
|
|
|
|
|
json=request_data.body,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-07-08 11:15:15 +08:00
|
|
|
|
logger.debug(f"📥 响应状态码: {http_response.status_code}")
|
|
|
|
|
|
logger.debug(f"📄 响应头: {dict(http_response.headers)}")
|
|
|
|
|
|
|
2025-06-21 16:33:21 +08:00
|
|
|
|
if http_response.status_code != 200:
|
|
|
|
|
|
error_text = http_response.text
|
|
|
|
|
|
logger.error(
|
2025-07-08 11:15:15 +08:00
|
|
|
|
f"❌ HTTP请求失败: {http_response.status_code} - {error_text} "
|
|
|
|
|
|
f"[{log_context}]"
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
logger.debug(f"💥 完整错误响应: {error_text}")
|
|
|
|
|
|
|
2025-07-14 22:39:17 +08:00
|
|
|
|
await self.key_store.record_failure(
|
|
|
|
|
|
api_key, http_response.status_code, error_text
|
|
|
|
|
|
)
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
|
|
|
|
|
if http_response.status_code in [401, 403]:
|
|
|
|
|
|
error_code = LLMErrorCode.API_KEY_INVALID
|
|
|
|
|
|
elif http_response.status_code == 429:
|
|
|
|
|
|
error_code = LLMErrorCode.API_RATE_LIMITED
|
2025-07-08 11:15:15 +08:00
|
|
|
|
elif http_response.status_code in [402, 413]:
|
|
|
|
|
|
error_code = LLMErrorCode.API_QUOTA_EXCEEDED
|
|
|
|
|
|
else:
|
|
|
|
|
|
error_code = LLMErrorCode.API_REQUEST_FAILED
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
|
|
|
|
|
raise LLMException(
|
2025-07-08 11:15:15 +08:00
|
|
|
|
f"HTTP请求失败: {http_response.status_code}",
|
2025-06-21 16:33:21 +08:00
|
|
|
|
code=error_code,
|
|
|
|
|
|
details={
|
|
|
|
|
|
"status_code": http_response.status_code,
|
|
|
|
|
|
"response": error_text,
|
|
|
|
|
|
"api_key": api_key,
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
response_json = http_response.json()
|
2025-07-08 11:15:15 +08:00
|
|
|
|
response_json_str = json.dumps(
|
|
|
|
|
|
response_json, ensure_ascii=False, indent=2
|
|
|
|
|
|
)
|
|
|
|
|
|
logger.debug(f"📋 响应JSON: {response_json_str}")
|
|
|
|
|
|
parsed_data = parse_response_func(response_json)
|
|
|
|
|
|
|
2025-06-21 16:33:21 +08:00
|
|
|
|
except Exception as e:
|
2025-07-08 11:15:15 +08:00
|
|
|
|
logger.error(f"解析 {log_context} 响应失败: {e}", e=e)
|
2025-07-14 22:39:17 +08:00
|
|
|
|
await self.key_store.record_failure(api_key, None, str(e))
|
2025-06-21 16:33:21 +08:00
|
|
|
|
if isinstance(e, LLMException):
|
|
|
|
|
|
raise
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise LLMException(
|
2025-07-08 11:15:15 +08:00
|
|
|
|
f"解析API {log_context} 响应失败: {e}",
|
2025-06-21 16:33:21 +08:00
|
|
|
|
code=LLMErrorCode.RESPONSE_PARSE_ERROR,
|
|
|
|
|
|
cause=e,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-07-08 11:15:15 +08:00
|
|
|
|
logger.info(f"🎯 LLM响应解析完成 [{log_context}]")
|
2025-07-14 22:39:17 +08:00
|
|
|
|
return parsed_data, api_key
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
|
|
|
|
|
except LLMException:
|
|
|
|
|
|
raise
|
|
|
|
|
|
except Exception as e:
|
2025-07-08 11:15:15 +08:00
|
|
|
|
error_log_msg = f"生成 {log_context.lower()} 时发生未预期错误: {e}"
|
|
|
|
|
|
logger.error(error_log_msg, e=e)
|
2025-07-14 22:39:17 +08:00
|
|
|
|
await self.key_store.record_failure(api_key, None, str(e))
|
2025-06-21 16:33:21 +08:00
|
|
|
|
raise LLMException(
|
2025-07-08 11:15:15 +08:00
|
|
|
|
error_log_msg,
|
|
|
|
|
|
code=LLMErrorCode.GENERATION_FAILED
|
|
|
|
|
|
if log_context == "Generation"
|
|
|
|
|
|
else LLMErrorCode.EMBEDDING_FAILED,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
cause=e,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-07-08 11:15:15 +08:00
|
|
|
|
async def _execute_embedding_request(
|
|
|
|
|
|
self,
|
|
|
|
|
|
adapter,
|
|
|
|
|
|
texts: list[str],
|
|
|
|
|
|
task_type: EmbeddingTaskType | str,
|
|
|
|
|
|
http_client: LLMHttpClient,
|
|
|
|
|
|
failed_keys: set[str] | None = None,
|
|
|
|
|
|
) -> list[list[float]]:
|
|
|
|
|
|
"""执行单次嵌入请求 - 供重试机制调用"""
|
|
|
|
|
|
|
|
|
|
|
|
async def prepare_request(api_key: str) -> RequestData:
|
|
|
|
|
|
return adapter.prepare_embedding_request(
|
|
|
|
|
|
model=self,
|
|
|
|
|
|
api_key=api_key,
|
|
|
|
|
|
texts=texts,
|
|
|
|
|
|
task_type=task_type,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def parse_response(response_json: dict[str, Any]) -> list[list[float]]:
|
|
|
|
|
|
adapter.validate_embedding_response(response_json)
|
|
|
|
|
|
return adapter.parse_embedding_response(response_json)
|
|
|
|
|
|
|
2025-07-14 22:39:17 +08:00
|
|
|
|
parsed_data, api_key_used = await self._perform_api_call(
|
2025-07-08 11:15:15 +08:00
|
|
|
|
prepare_request_func=prepare_request,
|
|
|
|
|
|
parse_response_func=parse_response,
|
|
|
|
|
|
http_client=http_client,
|
|
|
|
|
|
failed_keys=failed_keys,
|
|
|
|
|
|
log_context="Embedding",
|
|
|
|
|
|
)
|
2025-07-14 22:39:17 +08:00
|
|
|
|
return parsed_data
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-06-21 16:33:21 +08:00
|
|
|
|
async def _execute_with_smart_retry(
|
|
|
|
|
|
self,
|
|
|
|
|
|
adapter,
|
|
|
|
|
|
messages: list[LLMMessage],
|
|
|
|
|
|
config: LLMGenerationConfig | None,
|
2025-08-04 23:36:12 +08:00
|
|
|
|
tools: dict[str, ToolExecutable] | None,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
tool_choice: str | dict[str, Any] | None,
|
|
|
|
|
|
http_client: LLMHttpClient,
|
|
|
|
|
|
):
|
|
|
|
|
|
"""智能重试机制 - 使用统一的重试装饰器"""
|
|
|
|
|
|
ai_config = get_ai_config()
|
|
|
|
|
|
max_retries = ai_config.get("max_retries_llm", 3)
|
|
|
|
|
|
retry_delay = ai_config.get("retry_delay_llm", 2)
|
|
|
|
|
|
retry_config = RetryConfig(max_retries=max_retries, retry_delay=retry_delay)
|
|
|
|
|
|
|
|
|
|
|
|
return await with_smart_retry(
|
|
|
|
|
|
self._execute_single_request,
|
|
|
|
|
|
adapter,
|
|
|
|
|
|
messages,
|
|
|
|
|
|
config,
|
2025-07-08 11:15:15 +08:00
|
|
|
|
tools,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
tool_choice,
|
|
|
|
|
|
http_client,
|
|
|
|
|
|
retry_config=retry_config,
|
|
|
|
|
|
key_store=self.key_store,
|
|
|
|
|
|
provider_name=self.provider_name,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def _execute_single_request(
|
|
|
|
|
|
self,
|
|
|
|
|
|
adapter,
|
|
|
|
|
|
messages: list[LLMMessage],
|
|
|
|
|
|
config: LLMGenerationConfig | None,
|
2025-08-04 23:36:12 +08:00
|
|
|
|
tools: dict[str, ToolExecutable] | None,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
tool_choice: str | dict[str, Any] | None,
|
|
|
|
|
|
http_client: LLMHttpClient,
|
|
|
|
|
|
failed_keys: set[str] | None = None,
|
2025-07-14 22:39:17 +08:00
|
|
|
|
) -> tuple[LLMResponse, str]:
|
|
|
|
|
|
"""执行单次请求 - 供重试机制调用,直接返回 LLMResponse 和使用的 key"""
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-07-08 11:15:15 +08:00
|
|
|
|
async def prepare_request(api_key: str) -> RequestData:
|
|
|
|
|
|
return await adapter.prepare_advanced_request(
|
2025-06-21 16:33:21 +08:00
|
|
|
|
model=self,
|
|
|
|
|
|
api_key=api_key,
|
|
|
|
|
|
messages=messages,
|
|
|
|
|
|
config=config,
|
2025-07-08 11:15:15 +08:00
|
|
|
|
tools=tools,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
tool_choice=tool_choice,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-07-08 11:15:15 +08:00
|
|
|
|
def parse_response(response_json: dict[str, Any]) -> LLMResponse:
|
|
|
|
|
|
response_data = adapter.parse_response(
|
|
|
|
|
|
model=self,
|
|
|
|
|
|
response_json=response_json,
|
|
|
|
|
|
is_advanced=True,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
2025-07-08 11:15:15 +08:00
|
|
|
|
from .types.models import LLMToolCall
|
|
|
|
|
|
|
|
|
|
|
|
response_tool_calls = []
|
|
|
|
|
|
if response_data.tool_calls:
|
|
|
|
|
|
for tc_data in response_data.tool_calls:
|
|
|
|
|
|
if isinstance(tc_data, LLMToolCall):
|
|
|
|
|
|
response_tool_calls.append(tc_data)
|
|
|
|
|
|
elif isinstance(tc_data, dict):
|
|
|
|
|
|
try:
|
|
|
|
|
|
response_tool_calls.append(LLMToolCall(**tc_data))
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(
|
|
|
|
|
|
f"无法将工具调用数据转换为LLMToolCall: {tc_data}, "
|
|
|
|
|
|
f"error: {e}"
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.warning(f"工具调用数据格式未知: {tc_data}")
|
|
|
|
|
|
|
|
|
|
|
|
return LLMResponse(
|
|
|
|
|
|
text=response_data.text,
|
|
|
|
|
|
usage_info=response_data.usage_info,
|
|
|
|
|
|
raw_response=response_data.raw_response,
|
|
|
|
|
|
tool_calls=response_tool_calls if response_tool_calls else None,
|
|
|
|
|
|
code_executions=response_data.code_executions,
|
|
|
|
|
|
grounding_metadata=response_data.grounding_metadata,
|
|
|
|
|
|
cache_info=response_data.cache_info,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2025-07-14 22:39:17 +08:00
|
|
|
|
parsed_data, api_key_used = await self._perform_api_call(
|
2025-07-08 11:15:15 +08:00
|
|
|
|
prepare_request_func=prepare_request,
|
|
|
|
|
|
parse_response_func=parse_response,
|
|
|
|
|
|
http_client=http_client,
|
|
|
|
|
|
failed_keys=failed_keys,
|
|
|
|
|
|
log_context="Generation",
|
|
|
|
|
|
)
|
2025-07-14 22:39:17 +08:00
|
|
|
|
return parsed_data, api_key_used
|
2025-07-08 11:15:15 +08:00
|
|
|
|
|
2025-06-21 16:33:21 +08:00
|
|
|
|
async def close(self):
|
2025-07-14 22:39:17 +08:00
|
|
|
|
"""标记模型实例的当前使用周期结束"""
|
2025-06-21 16:33:21 +08:00
|
|
|
|
if self._is_closed:
|
|
|
|
|
|
return
|
|
|
|
|
|
self._is_closed = True
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
f"LLMModel实例的使用周期已结束: {self} (共享HTTP客户端状态不受影响)"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def __aenter__(self):
|
|
|
|
|
|
if self._is_closed:
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
f"Re-entering context for closed LLMModel {self}. "
|
|
|
|
|
|
f"Resetting _is_closed to False."
|
|
|
|
|
|
)
|
|
|
|
|
|
self._is_closed = False
|
|
|
|
|
|
self._check_not_closed()
|
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
|
|
|
|
"""异步上下文管理器出口"""
|
|
|
|
|
|
_ = exc_type, exc_val, exc_tb
|
|
|
|
|
|
await self.close()
|
|
|
|
|
|
|
|
|
|
|
|
def _check_not_closed(self):
|
|
|
|
|
|
"""检查实例是否已关闭"""
|
|
|
|
|
|
if self._is_closed:
|
|
|
|
|
|
raise RuntimeError(f"LLMModel实例已关闭: {self}")
|
|
|
|
|
|
|
|
|
|
|
|
async def generate_response(
|
|
|
|
|
|
self,
|
|
|
|
|
|
messages: list[LLMMessage],
|
|
|
|
|
|
config: LLMGenerationConfig | None = None,
|
2025-08-04 23:36:12 +08:00
|
|
|
|
tools: dict[str, ToolExecutable] | None = None,
|
2025-06-21 16:33:21 +08:00
|
|
|
|
tool_choice: str | dict[str, Any] | None = None,
|
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
|
) -> LLMResponse:
|
2025-08-04 23:36:12 +08:00
|
|
|
|
"""
|
|
|
|
|
|
生成高级响应。
|
|
|
|
|
|
此方法现在只执行 *单次* LLM API 调用,并将结果(包括工具调用请求)返回。
|
|
|
|
|
|
"""
|
2025-06-21 16:33:21 +08:00
|
|
|
|
self._check_not_closed()
|
|
|
|
|
|
|
|
|
|
|
|
from .adapters import get_adapter_for_api_type
|
|
|
|
|
|
from .config.generation import create_generation_config_from_kwargs
|
|
|
|
|
|
|
|
|
|
|
|
final_request_config = self._generation_config or LLMGenerationConfig()
|
|
|
|
|
|
if kwargs:
|
|
|
|
|
|
kwargs_config = create_generation_config_from_kwargs(**kwargs)
|
|
|
|
|
|
merged_dict = final_request_config.to_dict()
|
|
|
|
|
|
merged_dict.update(kwargs_config.to_dict())
|
|
|
|
|
|
final_request_config = LLMGenerationConfig(**merged_dict)
|
|
|
|
|
|
|
|
|
|
|
|
if config is not None:
|
|
|
|
|
|
merged_dict = final_request_config.to_dict()
|
|
|
|
|
|
merged_dict.update(config.to_dict())
|
|
|
|
|
|
final_request_config = LLMGenerationConfig(**merged_dict)
|
|
|
|
|
|
|
2025-08-04 23:36:12 +08:00
|
|
|
|
adapter = get_adapter_for_api_type(self.api_type)
|
2025-06-21 16:33:21 +08:00
|
|
|
|
http_client = await self._get_http_client()
|
|
|
|
|
|
|
2025-08-04 23:36:12 +08:00
|
|
|
|
response, _ = await self._execute_with_smart_retry(
|
|
|
|
|
|
adapter,
|
|
|
|
|
|
messages,
|
|
|
|
|
|
final_request_config,
|
|
|
|
|
|
tools,
|
|
|
|
|
|
tool_choice,
|
|
|
|
|
|
http_client,
|
|
|
|
|
|
)
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
2025-08-04 23:36:12 +08:00
|
|
|
|
return response
|
2025-06-21 16:33:21 +08:00
|
|
|
|
|
|
|
|
|
|
async def generate_embeddings(
|
|
|
|
|
|
self,
|
|
|
|
|
|
texts: list[str],
|
|
|
|
|
|
task_type: EmbeddingTaskType | str = EmbeddingTaskType.RETRIEVAL_DOCUMENT,
|
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
|
) -> list[list[float]]:
|
2025-07-14 22:39:17 +08:00
|
|
|
|
"""生成文本嵌入向量"""
|
2025-06-21 16:33:21 +08:00
|
|
|
|
self._check_not_closed()
|
|
|
|
|
|
if not texts:
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
from .adapters import get_adapter_for_api_type
|
|
|
|
|
|
|
|
|
|
|
|
adapter = get_adapter_for_api_type(self.api_type)
|
|
|
|
|
|
if not adapter:
|
|
|
|
|
|
raise LLMException(
|
|
|
|
|
|
f"未找到适用于 API 类型 '{self.api_type}' 的嵌入适配器",
|
|
|
|
|
|
code=LLMErrorCode.CONFIGURATION_ERROR,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
http_client = await self._get_http_client()
|
|
|
|
|
|
|
|
|
|
|
|
ai_config = get_ai_config()
|
|
|
|
|
|
default_max_retries = ai_config.get("max_retries_llm", 3)
|
|
|
|
|
|
default_retry_delay = ai_config.get("retry_delay_llm", 2)
|
|
|
|
|
|
max_retries_embed = kwargs.get(
|
|
|
|
|
|
"max_retries_embed", max(1, default_max_retries // 2)
|
|
|
|
|
|
)
|
|
|
|
|
|
retry_delay_embed = kwargs.get("retry_delay_embed", default_retry_delay / 2)
|
|
|
|
|
|
|
|
|
|
|
|
retry_config = RetryConfig(
|
|
|
|
|
|
max_retries=max_retries_embed,
|
|
|
|
|
|
retry_delay=retry_delay_embed,
|
|
|
|
|
|
exponential_backoff=True,
|
|
|
|
|
|
key_rotation=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return await with_smart_retry(
|
|
|
|
|
|
self._execute_embedding_request,
|
|
|
|
|
|
adapter,
|
|
|
|
|
|
texts,
|
|
|
|
|
|
task_type,
|
|
|
|
|
|
http_client,
|
|
|
|
|
|
retry_config=retry_config,
|
|
|
|
|
|
key_store=self.key_store,
|
|
|
|
|
|
provider_name=self.provider_name,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self) -> str:
|
|
|
|
|
|
status = "closed" if self._is_closed else "active"
|
|
|
|
|
|
return f"LLMModel({self.provider_name}/{self.model_name}, {status})"
|
|
|
|
|
|
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
|
|
status = "closed" if self._is_closed else "active"
|
|
|
|
|
|
return (
|
|
|
|
|
|
f"LLMModel(provider={self.provider_name}, model={self.model_name}, "
|
|
|
|
|
|
f"api_type={self.api_type}, status={status})"
|
|
|
|
|
|
)
|