⚡ 增强 httpx 兼容性 (#1915)

* 增强 httpx 兼容性 * 顺便修了对tx图片服务器的ssl错误 * 🚨 auto fix by pre-commit hooks * 修复通不过检查的问题 * 🚨 auto fix by pre-commit hooks * 适配 httpx 0.28.0+版本 * 🚨 auto fix by pre-commit hooks * 🎨 代码格式化 * ⚡ 代码优化 * 🎨 代码格式化 --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: HibiKier <45528451+HibiKier@users.noreply.github.com>
2025-12-15 14:22:55 +08:00 · 2025-06-16 11:14:19 +08:00 · 2025-06-16 11:14:19 +08:00 · 62b0b02466
commit 62b0b02466
parent 4cc800c832
1 changed files with 271 additions and 341 deletions
--- a/zhenxun/utils/http_utils.py
+++ b/zhenxun/utils/http_utils.py
@ -1,219 +1,205 @@
 import asyncio
-from asyncio.exceptions import TimeoutError
+from collections.abc import AsyncGenerator, Sequence
 from collections.abc import AsyncGenerator
 from contextlib import asynccontextmanager
 from pathlib import Path
 import time
-from typing import Any, ClassVar, Literal
+from typing import Any, ClassVar, Literal, cast
 import aiofiles
 from anyio import EndOfStream
 import httpx
-from httpx import ConnectTimeout, HTTPStatusError, Response
+from httpx import AsyncHTTPTransport, HTTPStatusError, Response
 from nonebot_plugin_alconna import UniMessage
 from nonebot_plugin_htmlrender import get_browser
 from packaging.version import parse as parse_version
 from playwright.async_api import Page
-from retrying import retry
+from rich.progress import (
-import rich
+    BarColumn,
    DownloadColumn,
    Progress,
    TextColumn,
    TransferSpeedColumn,
 )
 from zhenxun.configs.config import BotConfig
 from zhenxun.services.log import logger
 from zhenxun.utils.message import MessageUtils
 from zhenxun.utils.user_agent import get_user_agent
-# from .browser import get_browser
+
 def get_async_client(
    proxies: dict[str, str] | None = None, verify: bool = False, **kwargs
 ) -> httpx.AsyncClient:
    check_httpx_version = parse_version(httpx.__version__) >= parse_version("0.28.0")
    transport = kwargs.pop("transport", None) or AsyncHTTPTransport(verify=verify)
    if not check_httpx_version:
        return httpx.AsyncClient(proxies=proxies, transport=transport, **kwargs)  # type: ignore
    proxy_str = None
    if proxies:
        proxy_str = proxies.get("http://") or proxies.get("https://")
        if not proxy_str:
            logger.warning(f"代理字典 {proxies} 中未能提取出有效的URL，代理已被忽略。")
    return httpx.AsyncClient(proxy=proxy_str, transport=transport, **kwargs)  # type: ignore
 class AsyncHttpx:
-    proxy: ClassVar[dict[str, str | None]] = {
+    default_proxy: ClassVar[dict[str, str] | None] = (
-        "http://": BotConfig.system_proxy,
+        {
-        "https://": BotConfig.system_proxy,
+            "http://": BotConfig.system_proxy,
-    }
+            "https://": BotConfig.system_proxy,
        }
        if BotConfig.system_proxy
        else None
    )
    @classmethod
    @asynccontextmanager
    async def _create_client(
        cls,
        *,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        headers: dict[str, str] | None = None,
        verify: bool = False,
        **kwargs,
    ) -> AsyncGenerator[httpx.AsyncClient, None]:
        """创建一个私有的、配置好的 httpx.AsyncClient 上下文管理器。
        说明:
            此方法用于内部统一创建客户端，处理代理和请求头逻辑，减少代码重复。
        参数:
            use_proxy: 是否使用在类中定义的默认代理。
            proxy: 手动指定的代理，会覆盖默认代理。
            headers: 需要合并到客户端的自定义请求头。
            verify: 是否验证 SSL 证书。
            **kwargs: 其他所有传递给 httpx.AsyncClient 的参数。
        返回:
            AsyncGenerator[httpx.AsyncClient, None]: 生成器。
        """
        proxies_to_use = proxy or (cls.default_proxy if use_proxy else None)
        final_headers = get_user_agent()
        if headers:
            final_headers.update(headers)
        async with get_async_client(
            proxies=proxies_to_use, verify=verify, headers=final_headers, **kwargs
        ) as client:
            yield client
    @classmethod
    @retry(stop_max_attempt_number=3)
    async def get(
        cls,
        url: str | list[str],
        *,
        params: dict[str, Any] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        verify: bool = True,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        timeout: int = 30,  # noqa: ASYNC109
        check_status_code: int | None = None,
        **kwargs,
-    ) -> Response:
+    ) -> Response:  # sourcery skip: use-assigned-variable
-        """Get
+        """发送 GET 请求，并返回第一个成功的响应。
        说明:
            本方法是 httpx.get 的高级包装，增加了多链接尝试、自动重试和统一的代理管理。
            如果提供 URL 列表，它将依次尝试直到成功为止。
        参数:
-            url: url
+            url: 单个请求 URL 或一个 URL 列表。
-            params: params
+            check_status_code: (可选) 若提供，将检查响应状态码是否匹配，否则抛出异常。
-            headers: 请求头
+            **kwargs: 其他所有传递给 httpx.get 的参数
-            cookies: cookies
+                    (如 `params`, `headers`, `timeout`等)。
-            verify: verify
+
-            use_proxy: 使用默认代理
+        返回:
-            proxy: 指定代理
+            Response: Response
            timeout: 超时时间
            check_status_code: 检查状态码
        """
        urls = [url] if isinstance(url, str) else url
        return await cls._get_first_successful(
            urls,
            params=params,
            headers=headers,
            cookies=cookies,
            verify=verify,
            use_proxy=use_proxy,
            proxy=proxy,
            timeout=timeout,
            check_status_code=check_status_code,
            **kwargs,
        )
    @classmethod
    async def _get_first_successful(
        cls,
        urls: list[str],
        check_status_code: int | None = None,
        **kwargs,
    ) -> Response:
        last_exception = None
-        for url in urls:
+        for current_url in urls:
            try:
-                logger.info(f"开始获取 {url}..")
+                logger.info(f"开始获取 {current_url}..")
-                response = await cls._get_single(url, **kwargs)
+                async with cls._create_client(**kwargs) as client:
                    # 从 kwargs 中提取仅 client.get 支持的参数
                    get_kwargs = {
                        k: v
                        for k, v in kwargs.items()
                        if k not in ["use_proxy", "proxy", "verify", "headers"]
                    }
                    response = await client.get(current_url, **get_kwargs)
                if check_status_code and response.status_code != check_status_code:
-                    status_code = response.status_code
+                    raise HTTPStatusError(
-                    raise Exception(f"状态码错误:{status_code}!={check_status_code}")
+                        f"状态码错误: {response.status_code}!={check_status_code}",
                        request=response.request,
                        response=response,
                    )
                return response
            except Exception as e:
                last_exception = e
-                if url != urls[-1]:
+                if current_url != urls[-1]:
-                    logger.warning(f"获取 {url} 失败, 尝试下一个")
+                    logger.warning(f"获取 {current_url} 失败, 尝试下一个", e=e)
-        raise last_exception or Exception("All URLs failed")
+
        raise last_exception or Exception("所有URL都获取失败")
    @classmethod
-    async def _get_single(
+    async def head(cls, url: str, **kwargs) -> Response:
-        cls,
+        """发送 HEAD 请求。
        url: str,
        *,
        params: dict[str, Any] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        verify: bool = True,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        timeout: int = 30,  # noqa: ASYNC109
        **kwargs,
    ) -> Response:
        if not headers:
            headers = get_user_agent()
        _proxy = proxy or (cls.proxy if use_proxy else None)
        async with httpx.AsyncClient(proxies=_proxy, verify=verify) as client:  # type: ignore
            return await client.get(
                url,
                params=params,
                headers=headers,
                cookies=cookies,
                timeout=timeout,
                **kwargs,
            )
    @classmethod
    async def head(
        cls,
        url: str,
        *,
        params: dict[str, Any] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        verify: bool = True,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        timeout: int = 30,  # noqa: ASYNC109
        **kwargs,
    ) -> Response:
        """Get
        参数:
            url: url
            params: params
            headers: 请求头
            cookies: cookies
            verify: verify
            use_proxy: 使用默认代理
            proxy: 指定代理
            timeout: 超时时间
        """
        if not headers:
            headers = get_user_agent()
        _proxy = proxy or (cls.proxy if use_proxy else None)
        async with httpx.AsyncClient(proxies=_proxy, verify=verify) as client:  # type: ignore
            return await client.head(
                url,
                params=params,
                headers=headers,
                cookies=cookies,
                timeout=timeout,
                **kwargs,
            )
    @classmethod
    async def post(
        cls,
        url: str,
        *,
        data: dict[str, Any] | None = None,
        content: Any = None,
        files: Any = None,
        verify: bool = True,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        json: dict[str, Any] | None = None,
        params: dict[str, str] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        timeout: int = 30,  # noqa: ASYNC109
        **kwargs,
    ) -> Response:
        """
        说明:
-            Post
+            本方法是对 httpx.head 的封装，通常用于检查资源的元信息（如大小、类型）。
        参数:
-            url: url
+            url: 请求的 URL。
-            data: data
+            **kwargs: 其他所有传递给 httpx.head 的参数
-            content: content
+                        (如 `headers`, `timeout`, `allow_redirects`)。
-            files: files
+
-            use_proxy: 是否默认代理
+        返回:
-            proxy: 指定代理
+            Response: Response
            json: json
            params: params
            headers: 请求头
            cookies: cookies
            timeout: 超时时间
        """
-        if not headers:
+        async with cls._create_client(**kwargs) as client:
-            headers = get_user_agent()
+            head_kwargs = {
-        _proxy = proxy or (cls.proxy if use_proxy else None)
+                k: v
-        async with httpx.AsyncClient(proxies=_proxy, verify=verify) as client:  # type: ignore
+                for k, v in kwargs.items()
-            return await client.post(
+                if k not in ["use_proxy", "proxy", "verify"]
-                url,
+            }
-                content=content,
+            return await client.head(url, **head_kwargs)
-                data=data,
+
-                files=files,
+    @classmethod
-                json=json,
+    async def post(cls, url: str, **kwargs) -> Response:
-                params=params,
+        """发送 POST 请求。
-                headers=headers,
+
-                cookies=cookies,
+        说明:
-                timeout=timeout,
+            本方法是对 httpx.post 的封装，提供了统一的代理和客户端管理。
-                **kwargs,
+
-            )
+        参数:
            url: 请求的 URL。
            **kwargs: 其他所有传递给 httpx.post 的参数
                        (如 `data`, `json`, `content` 等)。
        返回:
            Response: Response。
        """
        async with cls._create_client(**kwargs) as client:
            post_kwargs = {
                k: v
                for k, v in kwargs.items()
                if k not in ["use_proxy", "proxy", "verify"]
            }
            return await client.post(url, **post_kwargs)
    @classmethod
    async def get_content(cls, url: str, **kwargs) -> bytes:
        """获取指定 URL 的二进制内容。
        说明:
            这是一个便捷方法，等同于调用 get() 后再访问 .content 属性。
        参数:
            url: 请求的 URL。
            **kwargs: 所有传递给 get() 方法的参数。
        返回:
            bytes: 响应内容的二进制字节流 (bytes)。
        """
        res = await cls.get(url, **kwargs)
        return res.content
@ -223,195 +209,143 @@ class AsyncHttpx:
        url: str | list[str],
        path: str | Path,
        *,
        params: dict[str, str] | None = None,
        verify: bool = True,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        timeout: int = 30,  # noqa: ASYNC109
        stream: bool = False,
        follow_redirects: bool = True,
        **kwargs,
    ) -> bool:
-        """下载文件
+        """下载文件到指定路径。
        说明:
            支持多链接尝试和流式下载（带进度条）。
        参数:
-            url: url
+            url: 单个文件 URL 或一个备用 URL 列表。
-            path: 存储路径
+            path: 文件保存的本地路径。
-            params: params
+            stream: (可选) 是否使用流式下载，适用于大文件，默认为 False。
-            verify: verify
+            **kwargs: 其他所有传递给 get() 方法或 httpx.stream() 的参数。
-            use_proxy: 使用代理
+
-            proxy: 指定代理
+        返回:
-            headers: 请求头
+            bool: 是否下载成功。
            cookies: cookies
            timeout: 超时时间
            stream: 是否使用流式下载（流式写入+进度条，适用于下载大文件）
        """
-        if isinstance(path, str):
+        path = Path(path)
            path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)
-        try:
+
-            for _ in range(3):
+        urls = [url] if isinstance(url, str) else url
-                if not isinstance(url, list):
+
-                    url = [url]
+        for current_url in urls:
-                for u in url:
+            try:
-                    try:
+                if not stream:
-                        if not stream:
+                    response = await cls.get(current_url, **kwargs)
-                            response = await cls.get(
+                    response.raise_for_status()
-                                u,
+                    async with aiofiles.open(path, "wb") as f:
-                                params=params,
+                        await f.write(response.content)
-                                headers=headers,
+                else:
-                                cookies=cookies,
+                    async with cls._create_client(**kwargs) as client:
-                                use_proxy=use_proxy,
+                        stream_kwargs = {
-                                proxy=proxy,
+                            k: v
-                                timeout=timeout,
+                            for k, v in kwargs.items()
-                                follow_redirects=follow_redirects,
+                            if k not in ["use_proxy", "proxy", "verify"]
-                                **kwargs,
+                        }
-                            )
+                        async with client.stream(
                            "GET", current_url, **stream_kwargs
                        ) as response:
                            response.raise_for_status()
-                            content = response.content
+                            total = int(response.headers.get("Content-Length", 0))
-                            async with aiofiles.open(path, "wb") as wf:
+
-                                await wf.write(content)
+                            with Progress(
-                                logger.info(f"下载 {u} 成功.. Path：{path.absolute()}")
+                                TextColumn(path.name),
-                        else:
+                                "[progress.percentage]{task.percentage:>3.0f}%",
-                            if not headers:
+                                BarColumn(bar_width=None),
-                                headers = get_user_agent()
+                                DownloadColumn(),
-                            _proxy = proxy or (cls.proxy if use_proxy else None)
+                                TransferSpeedColumn(),
-                            async with httpx.AsyncClient(
+                            ) as progress:
-                                proxies=_proxy,  # type: ignore
+                                task_id = progress.add_task("Download", total=total)
-                                verify=verify,
+                                async with aiofiles.open(path, "wb") as f:
-                            ) as client:
+                                    async for chunk in response.aiter_bytes():
-                                async with client.stream(
+                                        await f.write(chunk)
-                                    "GET",
+                                        progress.update(task_id, advance=len(chunk))
-                                    u,
+
-                                    params=params,
+                logger.info(f"下载 {current_url} 成功 -> {path.absolute()}")
-                                    headers=headers,
+                return True
-                                    cookies=cookies,
+
-                                    timeout=timeout,
+            except Exception as e:
-                                    follow_redirects=True,
+                logger.warning(f"下载 {current_url} 失败，尝试下一个。错误: {e}")
-                                    **kwargs,
+
-                                ) as response:
+        logger.error(f"所有URL {urls} 下载均失败 -> {path.absolute()}")
                                    response.raise_for_status()
                                    logger.info(
                                        f"开始下载 {path.name}.. "
                                        f"Url: {u}.. "
                                        f"Path: {path.absolute()}"
                                    )
                                    async with aiofiles.open(path, "wb") as wf:
                                        total = int(
                                            response.headers.get("Content-Length", 0)
                                        )
                                        with rich.progress.Progress(  # type: ignore
                                            rich.progress.TextColumn(path.name),  # type: ignore
                                            "[progress.percentage]{task.percentage:>3.0f}%",  # type: ignore
                                            rich.progress.BarColumn(bar_width=None),  # type: ignore
                                            rich.progress.DownloadColumn(),  # type: ignore
                                            rich.progress.TransferSpeedColumn(),  # type: ignore
                                        ) as progress:
                                            download_task = progress.add_task(
                                                "Download",
                                                total=total or None,
                                            )
                                            async for chunk in response.aiter_bytes():
                                                await wf.write(chunk)
                                                await wf.flush()
                                                progress.update(
                                                    download_task,
                                                    completed=response.num_bytes_downloaded,
                                                )
                                        logger.info(
                                            f"下载 {u} 成功.. Path：{path.absolute()}"
                                        )
                        return True
                    except (TimeoutError, ConnectTimeout, HTTPStatusError):
                        logger.warning(f"下载 {u} 失败.. 尝试下一个地址..")
                    except EndOfStream as e:
                        logger.warning(
                            f"下载 {url} EndOfStream 异常 Path：{path.absolute()}", e=e
                        )
                        if path.exists():
                            return True
            logger.error(f"下载 {url} 下载超时.. Path：{path.absolute()}")
        except Exception as e:
            logger.error(f"下载 {url} 错误 Path：{path.absolute()}", e=e)
        return False
    @classmethod
    async def gather_download_file(
        cls,
-        url_list: list[str] | list[list[str]],
+        url_list: Sequence[list[str] | str],
-        path_list: list[str | Path],
+        path_list: Sequence[str | Path],
        *,
-        limit_async_number: int | None = None,
+        limit_async_number: int = 5,
        params: dict[str, str] | None = None,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        timeout: int = 30,  # noqa: ASYNC109
        **kwargs,
    ) -> list[bool]:
-        """分组同时下载文件
+        """并发下载多个文件，支持为每个文件提供备用镜像链接。
        说明:
            使用 asyncio.Semaphore 来控制并发请求的数量。
            对于 url_list 中的每个元素，如果它是一个列表，则会依次尝试直到下载成功。
        参数:
-            url_list: url列表
+            url_list: 包含所有文件下载任务的列表。每个元素可以是：
-            path_list: 存储路径列表
+                      - 一个字符串 (str): 代表该任务的唯一URL。
-            limit_async_number: 限制同时请求数量
+                      - 一个字符串列表 (list[str]): 代表该任务的多个备用/镜像URL。
-            params: params
+            path_list: 与 url_list 对应的文件保存路径列表。
-            use_proxy: 使用代理
+            limit_async_number: (可选) 最大并发下载数，默认为 5。
-            proxy: 指定代理
+            **kwargs: 其他所有传递给 download_file() 方法的参数。
-            headers: 请求头
+
-            cookies: cookies
+        返回:
-            timeout: 超时时间
+            list[bool]: 对应每个下载任务是否成功。
        """
-        if n := len(url_list) != len(path_list):
+        if len(url_list) != len(path_list):
-            raise UrlPathNumberNotEqual(
+            raise ValueError("URL 列表和路径列表的长度必须相等")
-                f"Url数量与Path数量不对等，Url：{len(url_list)}，Path：{len(path_list)}"
+
-            )
+        semaphore = asyncio.Semaphore(limit_async_number)
-        if limit_async_number and n > limit_async_number:
+
-            m = float(n) / limit_async_number
+        async def _download_with_semaphore(
-            x = 0
+            urls_for_one_path: str | list[str], path: str | Path
-            j = limit_async_number
+        ):
-            _split_url_list = []
+            async with semaphore:
-            _split_path_list = []
+                return await cls.download_file(urls_for_one_path, path, **kwargs)
-            for _ in range(int(m)):
+
-                _split_url_list.append(url_list[x:j])
+        tasks = [
-                _split_path_list.append(path_list[x:j])
+            _download_with_semaphore(url_group, path)
-                x += limit_async_number
+            for url_group, path in zip(url_list, path_list)
-                j += limit_async_number
+        ]
-            if int(m) < m:
+
-                _split_url_list.append(url_list[j:])
+        results = await asyncio.gather(*tasks, return_exceptions=True)
-                _split_path_list.append(path_list[j:])
+
-        else:
+        final_results = []
-            _split_url_list = [url_list]
+        for i, result in enumerate(results):
-            _split_path_list = [path_list]
+            if isinstance(result, Exception):
-        tasks = []
+                url_info = (
-        result_ = []
+                    url_list[i]
-        for x, y in zip(_split_url_list, _split_path_list):
+                    if isinstance(url_list[i], str)
-            tasks.extend(
+                    else ", ".join(url_list[i])
                asyncio.create_task(
                    cls.download_file(
                        url,
                        path,
                        params=params,
                        headers=headers,
                        cookies=cookies,
                        use_proxy=use_proxy,
                        timeout=timeout,
                        proxy=proxy,
                        **kwargs,
                    )
                )
-                for url, path in zip(x, y)
+                logger.error(f"并发下载任务 ({url_info}) 时发生错误", e=result)
-            )
+                final_results.append(False)
-            _x = await asyncio.gather(*tasks)
+            else:
-            result_ = result_ + list(_x)
+                # download_file 返回的是 bool，可以直接附加
-            tasks.clear()
+                final_results.append(cast(bool, result))
-        return result_
+
        return final_results
    @classmethod
    async def get_fastest_mirror(cls, url_list: list[str]) -> list[str]:
        """测试并返回最快的镜像地址。
        说明:
            通过并发发送 HEAD 请求来测试每个 URL 的响应时间和可用性，并按响应速度排序。
        参数:
            url_list: 需要测试的镜像 URL 列表。
        返回:
            list[str]: 按从快到慢的顺序包含了所有可用的 URL。
        """
        assert url_list
        async def head_mirror(client: type[AsyncHttpx], url: str) -> dict[str, Any]:
@ -480,7 +414,7 @@ class AsyncPlaywright:
        wait_until: (
            Literal["domcontentloaded", "load", "networkidle"] | None
        ) = "networkidle",
-        timeout: float | None = None,  # noqa: ASYNC109
+        timeout: float | None = None,
        type_: Literal["jpeg", "png"] | None = None,
        user_agent: str | None = None,
        cookies: list[dict[str, Any]] | dict[str, Any] | None = None,
@ -524,9 +458,5 @@ class AsyncPlaywright:
        return None
 class UrlPathNumberNotEqual(Exception):
    pass
 class BrowserIsNone(Exception):
    pass