From fea9be8c63c76f3e2dd2c81f5ce886dc145ca557 Mon Sep 17 00:00:00 2001
From: webjoin111 <455457521@qq.com>
Date: Fri, 19 Sep 2025 00:01:32 +0800
Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat(image):=20=E4=BC=98=E5=8C=96?=
 =?UTF-8?q?=E5=9B=BE=E7=89=87=E7=94=9F=E6=88=90=E5=93=8D=E5=BA=94=E5=B9=B6?=
 =?UTF-8?q?=E8=BF=94=E5=9B=9E=E5=AE=8C=E6=95=B4LLMResponse?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 zhenxun/services/llm/api.py     | 17 +++++---------
 zhenxun/services/llm/service.py | 40 ++++++++++++++++++++++++++++-----
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/zhenxun/services/llm/api.py b/zhenxun/services/llm/api.py
index d41fda7d..2e0932a6 100644
--- a/zhenxun/services/llm/api.py
+++ b/zhenxun/services/llm/api.py
@@ -311,7 +311,7 @@ async def _generate_image_from_message(
     message: UniMessage,
     model: ModelName = None,
     **kwargs: Any,
-) -> bytes:
+) -> LLMResponse:
     """
     [内部] 从 UniMessage 生成图片的核心辅助函数。
     """
@@ -341,14 +341,9 @@ async def _generate_image_from_message(
 
             if not response.image_bytes:
                 error_text = response.text or "模型未返回图片数据。"
-                logger.error(f"图片生成失败: {error_text}")
-                raise LLMException(
-                    f"图片生成失败: {error_text}",
-                    code=LLMErrorCode.GENERATION_FAILED,
-                    details={"raw_response": response.raw_response},
-                )
+                logger.warning(f"图片生成调用未返回图片，返回文本内容: {error_text}")
 
-            return response.image_bytes
+            return response
     except LLMException:
         raise
     except Exception as e:
@@ -363,7 +358,7 @@ async def create_image(
     images: None = None,
     model: ModelName = None,
     **kwargs: Any,
-) -> bytes:
+) -> LLMResponse:
     """根据文本提示生成一张新图片。"""
     ...
 
@@ -375,7 +370,7 @@ async def create_image(
     images: list[Path | bytes | str] | Path | bytes | str,
     model: ModelName = None,
     **kwargs: Any,
-) -> bytes:
+) -> LLMResponse:
     """在给定图片的基础上，根据文本提示进行编辑或重新生成。"""
     ...
 
@@ -386,7 +381,7 @@ async def create_image(
     images: list[Path | bytes | str] | Path | bytes | str | None = None,
     model: ModelName = None,
     **kwargs: Any,
-) -> bytes:
+) -> LLMResponse:
     """
     智能图片生成/编辑函数。
     - 如果 `images` 为 None，执行文生图。
diff --git a/zhenxun/services/llm/service.py b/zhenxun/services/llm/service.py
index 15fbaf6d..1b2bd6b1 100644
--- a/zhenxun/services/llm/service.py
+++ b/zhenxun/services/llm/service.py
@@ -304,7 +304,7 @@ class LLMModel(LLMModelBase):
             adapter.validate_embedding_response(response_json)
             return adapter.parse_embedding_response(response_json)
 
-        parsed_data, api_key_used = await self._perform_api_call(
+        parsed_data, _api_key_used = await self._perform_api_call(
             prepare_request_func=prepare_request,
             parse_response_func=parse_response,
             http_client=http_client,
@@ -421,11 +421,39 @@ class LLMModel(LLMModelBase):
             policy = config.validation_policy
             if policy:
                 if policy.get("require_image") and not parsed_data.image_bytes:
-                    raise LLMException(
-                        "响应验证失败：要求返回图片但未找到图片数据。",
-                        code=LLMErrorCode.API_RESPONSE_INVALID,
-                        details={"policy": policy, "text_response": parsed_data.text},
-                    )
+                    if self.api_type == "gemini" and parsed_data.raw_response:
+                        usage_metadata = parsed_data.raw_response.get(
+                            "usageMetadata", {}
+                        )
+                        prompt_token_details = usage_metadata.get(
+                            "promptTokensDetails", []
+                        )
+                        prompt_had_image = any(
+                            detail.get("modality") == "IMAGE"
+                            for detail in prompt_token_details
+                        )
+
+                        if prompt_had_image:
+                            raise LLMException(
+                                "响应验证失败：模型接收了图片输入但未生成图片。",
+                                code=LLMErrorCode.API_RESPONSE_INVALID,
+                                details={
+                                    "policy": policy,
+                                    "text_response": parsed_data.text,
+                                    "raw_response": parsed_data.raw_response,
+                                },
+                            )
+                        else:
+                            logger.debug("Gemini提示词中未包含图片，跳过图片要求重试。")
+                    else:
+                        raise LLMException(
+                            "响应验证失败：要求返回图片但未找到图片数据。",
+                            code=LLMErrorCode.API_RESPONSE_INVALID,
+                            details={
+                                "policy": policy,
+                                "text_response": parsed_data.text,
+                            },
+                        )
 
         return parsed_data, api_key_used