zhenxun_bot/zhenxun/utils/http_utils.py

import time
import asyncio
from pathlib import Path
from typing import Any, Literal, ClassVar
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from asyncio.exceptions import TimeoutError

import rich
import httpx
import aiofiles
from retrying import retry
from playwright.async_api import Page
from nonebot_plugin_alconna import UniMessage
from nonebot_plugin_htmlrender import get_browser
from httpx import Response, ConnectTimeout, HTTPStatusError

from zhenxun.services.log import logger
from zhenxun.configs.config import BotConfig
from zhenxun.utils.message import MessageUtils
from zhenxun.utils.user_agent import get_user_agent

# from .browser import get_browser


class AsyncHttpx:
    proxy: ClassVar[dict[str, str | None]] = {
        "http://": BotConfig.system_proxy,
        "https://": BotConfig.system_proxy,
    }

    @classmethod
    @retry(stop_max_attempt_number=3)
    async def get(
        cls,
        url: str | list[str],
        *,
        params: dict[str, Any] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        verify: bool = True,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        timeout: int = 30,
        **kwargs,
    ) -> Response:
        """Get

        参数:
            url: url
            params: params
            headers: 请求头
            cookies: cookies
            verify: verify
            use_proxy: 使用默认代理
            proxy: 指定代理
            timeout: 超时时间
        """
        urls = [url] if isinstance(url, str) else url
        return await cls._get_first_successful(
            urls,
            params=params,
            headers=headers,
            cookies=cookies,
            verify=verify,
            use_proxy=use_proxy,
            proxy=proxy,
            timeout=timeout,
            **kwargs,
        )

    @classmethod
    async def _get_first_successful(
        cls,
        urls: list[str],
        **kwargs,
    ) -> Response:
        last_exception = None
        for url in urls:
            try:
                return await cls._get_single(url, **kwargs)
            except Exception as e:
                last_exception = e
                if url != urls[-1]:
                    logger.warning(f"获取 {url} 失败, 尝试下一个")
        raise last_exception or Exception("All URLs failed")

    @classmethod
    async def _get_single(
        cls,
        url: str,
        *,
        params: dict[str, Any] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        verify: bool = True,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        timeout: int = 30,
        **kwargs,
    ) -> Response:
        if not headers:
            headers = get_user_agent()
        _proxy = proxy or (cls.proxy if use_proxy else None)
        async with httpx.AsyncClient(proxies=_proxy, verify=verify) as client:  # type: ignore
            return await client.get(
                url,
                params=params,
                headers=headers,
                cookies=cookies,
                timeout=timeout,
                **kwargs,
            )

    @classmethod
    async def head(
        cls,
        url: str,
        *,
        params: dict[str, Any] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        verify: bool = True,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        timeout: int = 30,
        **kwargs,
    ) -> Response:
        """Get

        参数:
            url: url
            params: params
            headers: 请求头
            cookies: cookies
            verify: verify
            use_proxy: 使用默认代理
            proxy: 指定代理
            timeout: 超时时间
        """
        if not headers:
            headers = get_user_agent()
        _proxy = proxy or (cls.proxy if use_proxy else None)
        async with httpx.AsyncClient(proxies=_proxy, verify=verify) as client:  # type: ignore
            return await client.head(
                url,
                params=params,
                headers=headers,
                cookies=cookies,
                timeout=timeout,
                **kwargs,
            )

    @classmethod
    async def post(
        cls,
        url: str,
        *,
        data: dict[str, Any] | None = None,
        content: Any = None,
        files: Any = None,
        verify: bool = True,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        json: dict[str, Any] | None = None,
        params: dict[str, str] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        timeout: int = 30,
        **kwargs,
    ) -> Response:
        """
        说明:
            Post
        参数:
            url: url
            data: data
            content: content
            files: files
            use_proxy: 是否默认代理
            proxy: 指定代理
            json: json
            params: params
            headers: 请求头
            cookies: cookies
            timeout: 超时时间
        """
        if not headers:
            headers = get_user_agent()
        _proxy = proxy or (cls.proxy if use_proxy else None)
        async with httpx.AsyncClient(proxies=_proxy, verify=verify) as client:  # type: ignore
            return await client.post(
                url,
                content=content,
                data=data,
                files=files,
                json=json,
                params=params,
                headers=headers,
                cookies=cookies,
                timeout=timeout,
                **kwargs,
            )

    @classmethod
    async def get_content(cls, url: str, **kwargs) -> bytes | None:
        res = await cls.get(url, **kwargs)
        return res.content if res and res.status_code == 200 else None

    @classmethod
    async def download_file(
        cls,
        url: str | list[str],
        path: str | Path,
        *,
        params: dict[str, str] | None = None,
        verify: bool = True,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        timeout: int = 30,
        stream: bool = False,
        follow_redirects: bool = True,
        **kwargs,
    ) -> bool:
        """下载文件

        参数:
            url: url
            path: 存储路径
            params: params
            verify: verify
            use_proxy: 使用代理
            proxy: 指定代理
            headers: 请求头
            cookies: cookies
            timeout: 超时时间
            stream: 是否使用流式下载（流式写入+进度条，适用于下载大文件）
        """
        if isinstance(path, str):
            path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)
        try:
            for _ in range(3):
                if not isinstance(url, list):
                    url = [url]
                for u in url:
                    try:
                        if not stream:
                            response = await cls.get(
                                u,
                                params=params,
                                headers=headers,
                                cookies=cookies,
                                use_proxy=use_proxy,
                                proxy=proxy,
                                timeout=timeout,
                                follow_redirects=follow_redirects,
                                **kwargs,
                            )
                            response.raise_for_status()
                            content = response.content
                            async with aiofiles.open(path, "wb") as wf:
                                await wf.write(content)
                                logger.info(f"下载 {u} 成功.. Path：{path.absolute()}")
                        else:
                            if not headers:
                                headers = get_user_agent()
                            _proxy = proxy or (cls.proxy if use_proxy else None)
                            async with httpx.AsyncClient(
                                proxies=_proxy,  # type: ignore
                                verify=verify,
                            ) as client:
                                async with client.stream(
                                    "GET",
                                    u,
                                    params=params,
                                    headers=headers,
                                    cookies=cookies,
                                    timeout=timeout,
                                    follow_redirects=True,
                                    **kwargs,
                                ) as response:
                                    response.raise_for_status()
                                    logger.info(
                                        f"开始下载 {path.name}.. "
                                        f"Path: {path.absolute()}"
                                    )
                                    async with aiofiles.open(path, "wb") as wf:
                                        total = int(
                                            response.headers.get("Content-Length", 0)
                                        )
                                        with rich.progress.Progress(  # type: ignore
                                            rich.progress.TextColumn(path.name),  # type: ignore
                                            "[progress.percentage]{task.percentage:>3.0f}%",  # type: ignore
                                            rich.progress.BarColumn(bar_width=None),  # type: ignore
                                            rich.progress.DownloadColumn(),  # type: ignore
                                            rich.progress.TransferSpeedColumn(),  # type: ignore
                                        ) as progress:
                                            download_task = progress.add_task(
                                                "Download",
                                                total=total or None,
                                            )
                                            async for chunk in response.aiter_bytes():
                                                await wf.write(chunk)
                                                await wf.flush()
                                                progress.update(
                                                    download_task,
                                                    completed=response.num_bytes_downloaded,
                                                )
                                        logger.info(
                                            f"下载 {u} 成功.. "
                                            f"Path：{path.absolute()}"
                                        )
                        return True
                    except (TimeoutError, ConnectTimeout, HTTPStatusError):
                        logger.warning(f"下载 {u} 失败.. 尝试下一个地址..")
            logger.error(f"下载 {url} 下载超时.. Path：{path.absolute()}")
        except Exception as e:
            logger.error(f"下载 {url} 错误 Path：{path.absolute()}", e=e)
        return False

    @classmethod
    async def gather_download_file(
        cls,
        url_list: list[str] | list[list[str]],
        path_list: list[str | Path],
        *,
        limit_async_number: int | None = None,
        params: dict[str, str] | None = None,
        use_proxy: bool = True,
        proxy: dict[str, str] | None = None,
        headers: dict[str, str] | None = None,
        cookies: dict[str, str] | None = None,
        timeout: int = 30,
        **kwargs,
    ) -> list[bool]:
        """分组同时下载文件

        参数:
            url_list: url列表
            path_list: 存储路径列表
            limit_async_number: 限制同时请求数量
            params: params
            use_proxy: 使用代理
            proxy: 指定代理
            headers: 请求头
            cookies: cookies
            timeout: 超时时间
        """
        if n := len(url_list) != len(path_list):
            raise UrlPathNumberNotEqual(
                f"Url数量与Path数量不对等，Url：{len(url_list)}，Path：{len(path_list)}"
            )
        if limit_async_number and n > limit_async_number:
            m = float(n) / limit_async_number
            x = 0
            j = limit_async_number
            _split_url_list = []
            _split_path_list = []
            for _ in range(int(m)):
                _split_url_list.append(url_list[x:j])
                _split_path_list.append(path_list[x:j])
                x += limit_async_number
                j += limit_async_number
            if int(m) < m:
                _split_url_list.append(url_list[j:])
                _split_path_list.append(path_list[j:])
        else:
            _split_url_list = [url_list]
            _split_path_list = [path_list]
        tasks = []
        result_ = []
        for x, y in zip(_split_url_list, _split_path_list):
            for url, path in zip(x, y):
                tasks.append(
                    asyncio.create_task(
                        cls.download_file(
                            url,
                            path,
                            params=params,
                            headers=headers,
                            cookies=cookies,
                            use_proxy=use_proxy,
                            timeout=timeout,
                            proxy=proxy,
                            **kwargs,
                        )
                    )
                )
            _x = await asyncio.gather(*tasks)
            result_ = result_ + list(_x)
            tasks.clear()
        return result_

    @classmethod
    async def get_fastest_mirror(cls, url_list: list[str]) -> list[str]:
        assert url_list

        async def head_mirror(client: type[AsyncHttpx], url: str) -> dict[str, Any]:
            begin_time = time.time()

            response = await client.head(url=url, timeout=6)

            elapsed_time = (time.time() - begin_time) * 1000
            content_length = int(response.headers.get("content-length", 0))

            return {
                "url": url,
                "elapsed_time": elapsed_time,
                "content_length": content_length,
            }

        logger.debug(f"开始获取最快镜像，可能需要一段时间... | URL列表：{url_list}")
        results = await asyncio.gather(
            *(head_mirror(cls, url) for url in url_list),
            return_exceptions=True,
        )
        _results: list[dict[str, Any]] = []
        for result in results:
            if isinstance(result, BaseException):
                logger.warning(f"获取镜像失败，错误：{result}")
            else:
                logger.debug(f"获取镜像成功，结果：{result}")
                _results.append(result)
        _results = sorted(iter(_results), key=lambda r: r["elapsed_time"])
        return [result["url"] for result in _results]


class AsyncPlaywright:
    @classmethod
    @asynccontextmanager
    async def new_page(
        cls, cookies: list[dict[str, Any]] | dict[str, Any] | None = None, **kwargs
    ) -> AsyncGenerator[Page, None]:
        """获取一个新页面

        参数:
            cookies: cookies
        """
        browser = await get_browser()
        ctx = await browser.new_context(**kwargs)
        if cookies:
            if isinstance(cookies, dict):
                cookies = [cookies]
            await ctx.add_cookies(cookies)  # type: ignore
        page = await ctx.new_page()
        try:
            yield page
        finally:
            await page.close()
            await ctx.close()

    @classmethod
    async def screenshot(
        cls,
        url: str,
        path: Path | str,
        element: str | list[str],
        *,
        wait_time: int | None = None,
        viewport_size: dict[str, int] | None = None,
        wait_until: (
            Literal["domcontentloaded", "load", "networkidle"] | None
        ) = "networkidle",
        timeout: float | None = None,
        type_: Literal["jpeg", "png"] | None = None,
        user_agent: str | None = None,
        cookies: list[dict[str, Any]] | dict[str, Any] | None = None,
        **kwargs,
    ) -> UniMessage | None:
        """截图，该方法仅用于简单快捷截图，复杂截图请操作 page

        参数:
            url: 网址
            path: 存储路径
            element: 元素选择
            wait_time: 等待截取超时时间
            viewport_size: 窗口大小
            wait_until: 等待类型
            timeout: 超时限制
            type_: 保存类型
            user_agent: user_agent
            cookies: cookies
        """
        if viewport_size is None:
            viewport_size = {"width": 2560, "height": 1080}
        if isinstance(path, str):
            path = Path(path)
        wait_time = wait_time * 1000 if wait_time else None
        element_list = [element] if isinstance(element, str) else element
        async with cls.new_page(
            cookies,
            viewport=viewport_size,
            user_agent=user_agent,
            **kwargs,
        ) as page:
            await page.goto(url, timeout=timeout, wait_until=wait_until)
            card = page
            for e in element_list:
                if not card:
                    return None
                card = await card.wait_for_selector(e, timeout=wait_time)
            if card:
                await card.screenshot(path=path, timeout=timeout, type=type_)
                return MessageUtils.build_message(path)
        return None


class UrlPathNumberNotEqual(Exception):
    pass


class BrowserIsNone(Exception):
    pass
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								import time
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								import asyncio
 								from pathlib import Path
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								from typing import Any, Literal, ClassVar
 								from collections.abc import AsyncGenerator
 								from contextlib import asynccontextmanager
 								from asyncio.exceptions import TimeoutError
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
 								import rich
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								import httpx
 								import aiofiles
 								from retrying import retry
 								from playwright.async_api import Page
-												🎨 统一发送格式

											
										
										
											2024-08-10 02:25:04 +08:00
+								from nonebot_plugin_alconna import UniMessage
-												🎨 代码优化

											
										
										
											2024-08-15 20:27:19 +08:00
+								from nonebot_plugin_htmlrender import get_browser
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								from httpx import Response, ConnectTimeout, HTTPStatusError
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
 								from zhenxun.services.log import logger
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								from zhenxun.configs.config import BotConfig
-												🎨 统一发送格式

											
										
										
											2024-08-10 02:25:04 +08:00
+								from zhenxun.utils.message import MessageUtils
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								from zhenxun.utils.user_agent import get_user_agent
-												🎨 代码优化

											
										
										
											2024-08-15 20:27:19 +08:00
+								# from .browser import get_browser
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
 								class AsyncHttpx:
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								    proxy: ClassVar[dict[str, str | None]] = {
 								        "http://": BotConfig.system_proxy,
 								        "https://": BotConfig.system_proxy,
 								    }
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
 								    @classmethod
 								    @retry(stop_max_attempt_number=3)
 								    async def get(
 								        cls,
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								        url: str | list[str],
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        *,
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								        params: dict[str, Any] | None = None,
 								        headers: dict[str, str] | None = None,
 								        cookies: dict[str, str] | None = None,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        verify: bool = True,
 								        use_proxy: bool = True,
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								        proxy: dict[str, str] | None = None,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        timeout: int = 30,
 								        **kwargs,
 								    ) -> Response:
 								        """Get
 								        参数:
 								            url: url
 								            params: params
 								            headers: 请求头
 								            cookies: cookies
 								            verify: verify
 								            use_proxy: 使用默认代理
 								            proxy: 指定代理
 								            timeout: 超时时间
 								        """
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								        urls = [url] if isinstance(url, str) else url
 								        return await cls._get_first_successful(
 								            urls,
 								            params=params,
 								            headers=headers,
 								            cookies=cookies,
 								            verify=verify,
 								            use_proxy=use_proxy,
 								            proxy=proxy,
 								            timeout=timeout,
 								            **kwargs,
 								        )
 								    @classmethod
 								    async def _get_first_successful(
 								        cls,
 								        urls: list[str],
 								        **kwargs,
 								    ) -> Response:
 								        last_exception = None
 								        for url in urls:
 								            try:
 								                return await cls._get_single(url, **kwargs)
 								            except Exception as e:
 								                last_exception = e
 								                if url != urls[-1]:
 								                    logger.warning(f"获取 {url} 失败, 尝试下一个")
 								        raise last_exception or Exception("All URLs failed")
 								    @classmethod
 								    async def _get_single(
 								        cls,
 								        url: str,
 								        *,
 								        params: dict[str, Any] | None = None,
 								        headers: dict[str, str] | None = None,
 								        cookies: dict[str, str] | None = None,
 								        verify: bool = True,
 								        use_proxy: bool = True,
 								        proxy: dict[str, str] | None = None,
 								        timeout: int = 30,
 								        **kwargs,
 								    ) -> Response:
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        if not headers:
 								            headers = get_user_agent()
-												🎨 http_utils代码优化

											
										
										
											2024-10-19 23:02:05 +08:00
+								        _proxy = proxy or (cls.proxy if use_proxy else None)
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        async with httpx.AsyncClient(proxies=_proxy, verify=verify) as client:  # type: ignore
 								            return await client.get(
 								                url,
 								                params=params,
 								                headers=headers,
 								                cookies=cookies,
 								                timeout=timeout,
 								                **kwargs,
 								            )
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								    @classmethod
 								    async def head(
 								        cls,
 								        url: str,
 								        *,
 								        params: dict[str, Any] | None = None,
 								        headers: dict[str, str] | None = None,
 								        cookies: dict[str, str] | None = None,
 								        verify: bool = True,
 								        use_proxy: bool = True,
 								        proxy: dict[str, str] | None = None,
 								        timeout: int = 30,
 								        **kwargs,
 								    ) -> Response:
 								        """Get
 								        参数:
 								            url: url
 								            params: params
 								            headers: 请求头
 								            cookies: cookies
 								            verify: verify
 								            use_proxy: 使用默认代理
 								            proxy: 指定代理
 								            timeout: 超时时间
 								        """
 								        if not headers:
 								            headers = get_user_agent()
-												🎨 http_utils代码优化

											
										
										
											2024-10-19 23:02:05 +08:00
+								        _proxy = proxy or (cls.proxy if use_proxy else None)
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								        async with httpx.AsyncClient(proxies=_proxy, verify=verify) as client:  # type: ignore
 								            return await client.head(
 								                url,
 								                params=params,
 								                headers=headers,
 								                cookies=cookies,
 								                timeout=timeout,
 								                **kwargs,
 								            )
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								    @classmethod
 								    async def post(
 								        cls,
 								        url: str,
 								        *,
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								        data: dict[str, Any] | None = None,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        content: Any = None,
 								        files: Any = None,
 								        verify: bool = True,
 								        use_proxy: bool = True,
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								        proxy: dict[str, str] | None = None,
 								        json: dict[str, Any] | None = None,
 								        params: dict[str, str] | None = None,
 								        headers: dict[str, str] | None = None,
 								        cookies: dict[str, str] | None = None,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        timeout: int = 30,
 								        **kwargs,
 								    ) -> Response:
 								        """
 								        说明:
 								            Post
 								        参数:
 								            url: url
 								            data: data
 								            content: content
 								            files: files
 								            use_proxy: 是否默认代理
 								            proxy: 指定代理
 								            json: json
 								            params: params
 								            headers: 请求头
 								            cookies: cookies
 								            timeout: 超时时间
 								        """
 								        if not headers:
 								            headers = get_user_agent()
-												🎨 http_utils代码优化

											
										
										
											2024-10-19 23:02:05 +08:00
+								        _proxy = proxy or (cls.proxy if use_proxy else None)
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        async with httpx.AsyncClient(proxies=_proxy, verify=verify) as client:  # type: ignore
 								            return await client.post(
 								                url,
 								                content=content,
 								                data=data,
 								                files=files,
 								                json=json,
 								                params=params,
 								                headers=headers,
 								                cookies=cookies,
 								                timeout=timeout,
 								                **kwargs,
 								            )
-												✨ 部分功能适配QQ BOT (#1701)


											
										
										
											2024-10-18 18:57:55 +08:00
+								    @classmethod
 								    async def get_content(cls, url: str, **kwargs) -> bytes | None:
 								        res = await cls.get(url, **kwargs)
 								        return res.content if res and res.status_code == 200 else None
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								    @classmethod
 								    async def download_file(
 								        cls,
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								        url: str | list[str],
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        path: str | Path,
 								        *,
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								        params: dict[str, str] | None = None,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        verify: bool = True,
 								        use_proxy: bool = True,
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								        proxy: dict[str, str] | None = None,
 								        headers: dict[str, str] | None = None,
 								        cookies: dict[str, str] | None = None,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        timeout: int = 30,
 								        stream: bool = False,
-												✨ 重构自检ui

											
										
										
											2024-09-07 01:25:42 +08:00
+								        follow_redirects: bool = True,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        **kwargs,
 								    ) -> bool:
 								        """下载文件
 								        参数:
 								            url: url
 								            path: 存储路径
 								            params: params
 								            verify: verify
 								            use_proxy: 使用代理
 								            proxy: 指定代理
 								            headers: 请求头
 								            cookies: cookies
 								            timeout: 超时时间
 								            stream: 是否使用流式下载（流式写入+进度条，适用于下载大文件）
 								        """
 								        if isinstance(path, str):
 								            path = Path(path)
 								        path.parent.mkdir(parents=True, exist_ok=True)
 								        try:
 								            for _ in range(3):
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                if not isinstance(url, list):
 								                    url = [url]
 								                for u in url:
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								                    try:
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                        if not stream:
 								                            response = await cls.get(
 								                                u,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								                                params=params,
 								                                headers=headers,
 								                                cookies=cookies,
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                                use_proxy=use_proxy,
 								                                proxy=proxy,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								                                timeout=timeout,
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                                follow_redirects=follow_redirects,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								                                **kwargs,
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                            )
 								                            response.raise_for_status()
 								                            content = response.content
 								                            async with aiofiles.open(path, "wb") as wf:
 								                                await wf.write(content)
 								                                logger.info(f"下载 {u} 成功.. Path：{path.absolute()}")
 								                        else:
 								                            if not headers:
 								                                headers = get_user_agent()
-												🎨 http_utils代码优化

											
										
										
											2024-10-19 23:02:05 +08:00
+								                            _proxy = proxy or (cls.proxy if use_proxy else None)
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                            async with httpx.AsyncClient(
 								                                proxies=_proxy,  # type: ignore
 								                                verify=verify,
 								                            ) as client:
 								                                async with client.stream(
 								                                    "GET",
 								                                    u,
 								                                    params=params,
 								                                    headers=headers,
 								                                    cookies=cookies,
 								                                    timeout=timeout,
 								                                    follow_redirects=True,
 								                                    **kwargs,
 								                                ) as response:
 								                                    response.raise_for_status()
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								                                    logger.info(
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                                        f"开始下载 {path.name}.. "
 								                                        f"Path: {path.absolute()}"
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								                                    )
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                                    async with aiofiles.open(path, "wb") as wf:
-												:bug: fix(http_utils): 流式下载Content-Length错误 (#1647)


											
										
										
											2024-09-22 17:12:21 +08:00
+								                                        total = int(
 								                                            response.headers.get("Content-Length", 0)
 								                                        )
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                                        with rich.progress.Progress(  # type: ignore
 								                                            rich.progress.TextColumn(path.name),  # type: ignore
 								                                            "[progress.percentage]{task.percentage:>3.0f}%",  # type: ignore
 								                                            rich.progress.BarColumn(bar_width=None),  # type: ignore
 								                                            rich.progress.DownloadColumn(),  # type: ignore
 								                                            rich.progress.TransferSpeedColumn(),  # type: ignore
 								                                        ) as progress:
 								                                            download_task = progress.add_task(
-												:bug: fix(http_utils): 流式下载Content-Length错误 (#1647)


											
										
										
											2024-09-22 17:12:21 +08:00
+								                                                "Download",
-												🎨 http_utils代码优化

											
										
										
											2024-10-19 23:02:05 +08:00
+								                                                total=total or None,
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                                            )
 								                                            async for chunk in response.aiter_bytes():
 								                                                await wf.write(chunk)
 								                                                await wf.flush()
 								                                                progress.update(
 								                                                    download_task,
 								                                                    completed=response.num_bytes_downloaded,
 								                                                )
 								                                        logger.info(
 								                                            f"下载 {u} 成功.. "
 								                                            f"Path：{path.absolute()}"
 								                                        )
-												🎨 http_utils代码优化

											
										
										
											2024-10-19 23:02:05 +08:00
+								                        return True
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								                    except (TimeoutError, ConnectTimeout, HTTPStatusError):
 								                        logger.warning(f"下载 {u} 失败.. 尝试下一个地址..")
-												🎨 http_utils代码优化

											
										
										
											2024-10-19 23:02:05 +08:00
+								            logger.error(f"下载 {url} 下载超时.. Path：{path.absolute()}")
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        except Exception as e:
 								            logger.error(f"下载 {url} 错误 Path：{path.absolute()}", e=e)
 								        return False
 								    @classmethod
 								    async def gather_download_file(
 								        cls,
-												🎈 perf(github_utils): 支持github url下载遍历 (#1632)

* 🎈 perf(github_utils): 支持github url下载遍历

* 🐞 fix(http_utils): 修复一些下载问题

* 🦄 refactor(http_utils): 部分重构

* chore(version): Update version to v0.2.2-e6f17c4

---------

Co-authored-by: AkashiCoin <AkashiCoin@users.noreply.github.com>
											
										
										
											2024-09-16 20:08:42 +08:00
+								        url_list: list[str] | list[list[str]],
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        path_list: list[str | Path],
 								        *,
 								        limit_async_number: int | None = None,
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								        params: dict[str, str] | None = None,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        use_proxy: bool = True,
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								        proxy: dict[str, str] | None = None,
 								        headers: dict[str, str] | None = None,
 								        cookies: dict[str, str] | None = None,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        timeout: int = 30,
 								        **kwargs,
 								    ) -> list[bool]:
 								        """分组同时下载文件
 								        参数:
 								            url_list: url列表
 								            path_list: 存储路径列表
 								            limit_async_number: 限制同时请求数量
 								            params: params
 								            use_proxy: 使用代理
 								            proxy: 指定代理
 								            headers: 请求头
 								            cookies: cookies
 								            timeout: 超时时间
 								        """
 								        if n := len(url_list) != len(path_list):
 								            raise UrlPathNumberNotEqual(
 								                f"Url数量与Path数量不对等，Url：{len(url_list)}，Path：{len(path_list)}"
 								            )
 								        if limit_async_number and n > limit_async_number:
 								            m = float(n) / limit_async_number
 								            x = 0
 								            j = limit_async_number
 								            _split_url_list = []
 								            _split_path_list = []
 								            for _ in range(int(m)):
 								                _split_url_list.append(url_list[x:j])
 								                _split_path_list.append(path_list[x:j])
 								                x += limit_async_number
 								                j += limit_async_number
 								            if int(m) < m:
 								                _split_url_list.append(url_list[j:])
 								                _split_path_list.append(path_list[j:])
 								        else:
 								            _split_url_list = [url_list]
 								            _split_path_list = [path_list]
 								        tasks = []
 								        result_ = []
 								        for x, y in zip(_split_url_list, _split_path_list):
 								            for url, path in zip(x, y):
 								                tasks.append(
 								                    asyncio.create_task(
 								                        cls.download_file(
 								                            url,
 								                            path,
 								                            params=params,
 								                            headers=headers,
 								                            cookies=cookies,
 								                            use_proxy=use_proxy,
 								                            timeout=timeout,
 								                            proxy=proxy,
 								                            **kwargs,
 								                        )
 								                    )
 								                )
 								            _x = await asyncio.gather(*tasks)
 								            result_ = result_ + list(_x)
 								            tasks.clear()
 								        return result_
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								    @classmethod
 								    async def get_fastest_mirror(cls, url_list: list[str]) -> list[str]:
 								        assert url_list
 								        async def head_mirror(client: type[AsyncHttpx], url: str) -> dict[str, Any]:
 								            begin_time = time.time()
 								            response = await client.head(url=url, timeout=6)
 								            elapsed_time = (time.time() - begin_time) * 1000
-												🐛 文件下载逻辑修改

											
										
										
											2024-09-03 17:09:55 +08:00
+								            content_length = int(response.headers.get("content-length", 0))
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
 								            return {
 								                "url": url,
 								                "elapsed_time": elapsed_time,
 								                "content_length": content_length,
 								            }
 								        logger.debug(f"开始获取最快镜像，可能需要一段时间... | URL列表：{url_list}")
 								        results = await asyncio.gather(
 								            *(head_mirror(cls, url) for url in url_list),
 								            return_exceptions=True,
 								        )
 								        _results: list[dict[str, Any]] = []
 								        for result in results:
 								            if isinstance(result, BaseException):
 								                logger.warning(f"获取镜像失败，错误：{result}")
 								            else:
 								                logger.debug(f"获取镜像成功，结果：{result}")
 								                _results.append(result)
 								        _results = sorted(iter(_results), key=lambda r: r["elapsed_time"])
 								        return [result["url"] for result in _results]
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
 								class AsyncPlaywright:
 								    @classmethod
 								    @asynccontextmanager
-												🎨 playwright添加cookie参数

											
										
										
											2024-11-07 13:38:20 +08:00
+								    async def new_page(
 								        cls, cookies: list[dict[str, Any]] | dict[str, Any] | None = None, **kwargs
 								    ) -> AsyncGenerator[Page, None]:
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        """获取一个新页面
 								        参数:
-												🎨 playwright添加cookie参数

											
										
										
											2024-11-07 13:38:20 +08:00
+								            cookies: cookies
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        """
-												:bug: 为 'get_browser()' 添加缺少的 'await' 关键字以确保正确等待异步函数执行

											
										
										
											2024-08-22 08:53:33 +08:00
+								        browser = await get_browser()
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        ctx = await browser.new_context(**kwargs)
-												🎨 playwright添加cookie参数

											
										
										
											2024-11-07 13:38:20 +08:00
+								        if cookies:
 								            if isinstance(cookies, dict):
 								                cookies = [cookies]
 								            await ctx.add_cookies(cookies)  # type: ignore
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        page = await ctx.new_page()
 								        try:
 								            yield page
 								        finally:
 								            await page.close()
 								            await ctx.close()
 								    @classmethod
 								    async def screenshot(
 								        cls,
 								        url: str,
 								        path: Path | str,
 								        element: str | list[str],
 								        *,
 								        wait_time: int | None = None,
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								        viewport_size: dict[str, int] | None = None,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        wait_until: (
 								            Literal["domcontentloaded", "load", "networkidle"] | None
 								        ) = "networkidle",
 								        timeout: float | None = None,
 								        type_: Literal["jpeg", "png"] | None = None,
 								        user_agent: str | None = None,
-												🎨 playwright添加cookie参数

											
										
										
											2024-11-07 13:38:20 +08:00
+								        cookies: list[dict[str, Any]] | dict[str, Any] | None = None,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        **kwargs,
-												🎨 统一发送格式

											
										
										
											2024-08-10 02:25:04 +08:00
+								    ) -> UniMessage | None:
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        """截图，该方法仅用于简单快捷截图，复杂截图请操作 page
 								        参数:
 								            url: 网址
 								            path: 存储路径
 								            element: 元素选择
 								            wait_time: 等待截取超时时间
 								            viewport_size: 窗口大小
 								            wait_until: 等待类型
 								            timeout: 超时限制
 								            type_: 保存类型
-												🎨 playwright添加cookie参数

											
										
										
											2024-11-07 13:38:20 +08:00
+								            user_agent: user_agent
 								            cookies: cookies
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        """
 								        if viewport_size is None:
-												:zap: 添加github镜像

											
										
										
											2024-09-03 13:04:49 +08:00
+								            viewport_size = {"width": 2560, "height": 1080}
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        if isinstance(path, str):
 								            path = Path(path)
 								        wait_time = wait_time * 1000 if wait_time else None
-												🎨 http_utils代码优化

											
										
										
											2024-10-19 23:02:05 +08:00
+								        element_list = [element] if isinstance(element, str) else element
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        async with cls.new_page(
-												🎨 playwright添加cookie参数

											
										
										
											2024-11-07 13:38:20 +08:00
+								            cookies,
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								            viewport=viewport_size,
 								            user_agent=user_agent,
 								            **kwargs,
 								        ) as page:
 								            await page.goto(url, timeout=timeout, wait_until=wait_until)
 								            card = page
 								            for e in element_list:
 								                if not card:
 								                    return None
 								                card = await card.wait_for_selector(e, timeout=wait_time)
 								            if card:
 								                await card.screenshot(path=path, timeout=timeout, type=type_)
-												🎨 统一发送格式

											
										
										
											2024-08-10 02:25:04 +08:00
+								                return MessageUtils.build_message(path)
-												feat✨: 更新内置插件

											
										
										
											2024-02-25 03:18:34 +08:00
+								        return None
 								class UrlPathNumberNotEqual(Exception):
 								    pass
 								class BrowserIsNone(Exception):
 								    pass