✨ feat(image): 优化图片生成响应并返回完整LLMResponse

2025-12-14 21:52:56 +08:00 · 2025-09-19 00:01:32 +08:00 · 2025-09-19 00:01:32 +08:00 · fea9be8c63
commit fea9be8c63
parent 851a7a549e
2 changed files with 40 additions and 17 deletions
--- a/zhenxun/services/llm/api.py
+++ b/zhenxun/services/llm/api.py
@ -311,7 +311,7 @@ async def _generate_image_from_message(
    message: UniMessage,
    model: ModelName = None,
    **kwargs: Any,
-) -> bytes:
+) -> LLMResponse:
    """
    [内部] 从 UniMessage 生成图片的核心辅助函数。
    """
@ -341,14 +341,9 @@ async def _generate_image_from_message(

            if not response.image_bytes:
                error_text = response.text or "模型未返回图片数据。"
-                logger.error(f"图片生成失败: {error_text}")
-                raise LLMException(
-                    f"图片生成失败: {error_text}",
-                    code=LLMErrorCode.GENERATION_FAILED,
-                    details={"raw_response": response.raw_response},
-                )
+                logger.warning(f"图片生成调用未返回图片，返回文本内容: {error_text}")

-            return response.image_bytes
+            return response
    except LLMException:
        raise
    except Exception as e:
@ -363,7 +358,7 @@ async def create_image(
    images: None = None,
    model: ModelName = None,
    **kwargs: Any,
-) -> bytes:
+) -> LLMResponse:
    """根据文本提示生成一张新图片。"""
    ...

@ -375,7 +370,7 @@ async def create_image(
    images: list[Path | bytes | str] | Path | bytes | str,
    model: ModelName = None,
    **kwargs: Any,
-) -> bytes:
+) -> LLMResponse:
    """在给定图片的基础上，根据文本提示进行编辑或重新生成。"""
    ...

@ -386,7 +381,7 @@ async def create_image(
    images: list[Path | bytes | str] | Path | bytes | str | None = None,
    model: ModelName = None,
    **kwargs: Any,
-) -> bytes:
+) -> LLMResponse:
    """
    智能图片生成/编辑函数。
    - 如果 `images` 为 None，执行文生图。
--- a/zhenxun/services/llm/service.py
+++ b/zhenxun/services/llm/service.py
@ -304,7 +304,7 @@ class LLMModel(LLMModelBase):
            adapter.validate_embedding_response(response_json)
            return adapter.parse_embedding_response(response_json)

-        parsed_data, api_key_used = await self._perform_api_call(
+        parsed_data, _api_key_used = await self._perform_api_call(
            prepare_request_func=prepare_request,
            parse_response_func=parse_response,
            http_client=http_client,
@ -421,11 +421,39 @@ class LLMModel(LLMModelBase):
            policy = config.validation_policy
            if policy:
                if policy.get("require_image") and not parsed_data.image_bytes:
-                    raise LLMException(
-                        "响应验证失败：要求返回图片但未找到图片数据。",
-                        code=LLMErrorCode.API_RESPONSE_INVALID,
-                        details={"policy": policy, "text_response": parsed_data.text},
-                    )
+                    if self.api_type == "gemini" and parsed_data.raw_response:
+                        usage_metadata = parsed_data.raw_response.get(
+                            "usageMetadata", {}
+                        )
+                        prompt_token_details = usage_metadata.get(
+                            "promptTokensDetails", []
+                        )
+                        prompt_had_image = any(
+                            detail.get("modality") == "IMAGE"
+                            for detail in prompt_token_details
+                        )
+
+                        if prompt_had_image:
+                            raise LLMException(
+                                "响应验证失败：模型接收了图片输入但未生成图片。",
+                                code=LLMErrorCode.API_RESPONSE_INVALID,
+                                details={
+                                    "policy": policy,
+                                    "text_response": parsed_data.text,
+                                    "raw_response": parsed_data.raw_response,
+                                },
+                            )
+                        else:
+                            logger.debug("Gemini提示词中未包含图片，跳过图片要求重试。")
+                    else:
+                        raise LLMException(
+                            "响应验证失败：要求返回图片但未找到图片数据。",
+                            code=LLMErrorCode.API_RESPONSE_INVALID,
+                            details={
+                                "policy": policy,
+                                "text_response": parsed_data.text,
+                            },
+                        )

        return parsed_data, api_key_used