From fea9be8c63c76f3e2dd2c81f5ce886dc145ca557 Mon Sep 17 00:00:00 2001 From: webjoin111 <455457521@qq.com> Date: Fri, 19 Sep 2025 00:01:32 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat(image):=20=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E5=9B=BE=E7=89=87=E7=94=9F=E6=88=90=E5=93=8D=E5=BA=94=E5=B9=B6?= =?UTF-8?q?=E8=BF=94=E5=9B=9E=E5=AE=8C=E6=95=B4LLMResponse?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- zhenxun/services/llm/api.py | 17 +++++--------- zhenxun/services/llm/service.py | 40 ++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/zhenxun/services/llm/api.py b/zhenxun/services/llm/api.py index d41fda7d..2e0932a6 100644 --- a/zhenxun/services/llm/api.py +++ b/zhenxun/services/llm/api.py @@ -311,7 +311,7 @@ async def _generate_image_from_message( message: UniMessage, model: ModelName = None, **kwargs: Any, -) -> bytes: +) -> LLMResponse: """ [内部] 从 UniMessage 生成图片的核心辅助函数。 """ @@ -341,14 +341,9 @@ async def _generate_image_from_message( if not response.image_bytes: error_text = response.text or "模型未返回图片数据。" - logger.error(f"图片生成失败: {error_text}") - raise LLMException( - f"图片生成失败: {error_text}", - code=LLMErrorCode.GENERATION_FAILED, - details={"raw_response": response.raw_response}, - ) + logger.warning(f"图片生成调用未返回图片,返回文本内容: {error_text}") - return response.image_bytes + return response except LLMException: raise except Exception as e: @@ -363,7 +358,7 @@ async def create_image( images: None = None, model: ModelName = None, **kwargs: Any, -) -> bytes: +) -> LLMResponse: """根据文本提示生成一张新图片。""" ... @@ -375,7 +370,7 @@ async def create_image( images: list[Path | bytes | str] | Path | bytes | str, model: ModelName = None, **kwargs: Any, -) -> bytes: +) -> LLMResponse: """在给定图片的基础上,根据文本提示进行编辑或重新生成。""" ... @@ -386,7 +381,7 @@ async def create_image( images: list[Path | bytes | str] | Path | bytes | str | None = None, model: ModelName = None, **kwargs: Any, -) -> bytes: +) -> LLMResponse: """ 智能图片生成/编辑函数。 - 如果 `images` 为 None,执行文生图。 diff --git a/zhenxun/services/llm/service.py b/zhenxun/services/llm/service.py index 15fbaf6d..1b2bd6b1 100644 --- a/zhenxun/services/llm/service.py +++ b/zhenxun/services/llm/service.py @@ -304,7 +304,7 @@ class LLMModel(LLMModelBase): adapter.validate_embedding_response(response_json) return adapter.parse_embedding_response(response_json) - parsed_data, api_key_used = await self._perform_api_call( + parsed_data, _api_key_used = await self._perform_api_call( prepare_request_func=prepare_request, parse_response_func=parse_response, http_client=http_client, @@ -421,11 +421,39 @@ class LLMModel(LLMModelBase): policy = config.validation_policy if policy: if policy.get("require_image") and not parsed_data.image_bytes: - raise LLMException( - "响应验证失败:要求返回图片但未找到图片数据。", - code=LLMErrorCode.API_RESPONSE_INVALID, - details={"policy": policy, "text_response": parsed_data.text}, - ) + if self.api_type == "gemini" and parsed_data.raw_response: + usage_metadata = parsed_data.raw_response.get( + "usageMetadata", {} + ) + prompt_token_details = usage_metadata.get( + "promptTokensDetails", [] + ) + prompt_had_image = any( + detail.get("modality") == "IMAGE" + for detail in prompt_token_details + ) + + if prompt_had_image: + raise LLMException( + "响应验证失败:模型接收了图片输入但未生成图片。", + code=LLMErrorCode.API_RESPONSE_INVALID, + details={ + "policy": policy, + "text_response": parsed_data.text, + "raw_response": parsed_data.raw_response, + }, + ) + else: + logger.debug("Gemini提示词中未包含图片,跳过图片要求重试。") + else: + raise LLMException( + "响应验证失败:要求返回图片但未找到图片数据。", + code=LLMErrorCode.API_RESPONSE_INVALID, + details={ + "policy": policy, + "text_response": parsed_data.text, + }, + ) return parsed_data, api_key_used