feat✨: 优化b站解析

2025-12-15 14:22:55 +08:00 · 2024-07-29 23:31:11 +08:00 · 2024-07-29 23:31:11 +08:00 · c219264968
commit c219264968
parent d4a49a47e5
8 changed files with 391 additions and 197 deletions
--- a/zhenxun/builtin_plugins/sign_in/_data_source.py
+++ b/zhenxun/builtin_plugins/sign_in/_data_source.py
@ -1,4 +1,3 @@
 import os
 import random
 import secrets
 from datetime import datetime
@ -103,8 +102,13 @@ class SignManage:
        new_log = (
            await SignLog.filter(user_id=session.id1).order_by("-create_time").first()
        )
        log_time = None
        if new_log:
            log_time = new_log.create_time.astimezone(
                pytz.timezone("Asia/Shanghai")
            ).date()
        if not is_card_view:
-            if not new_log or (new_log and new_log.create_time.date() != now.date()):
+            if not new_log or (log_time and log_time != now.date()):
                return await cls._handle_sign_in(user, nickname, session)
        return await get_card(
            user, nickname, -1, user_console.gold, "", is_card_view=is_card_view
--- a/zhenxun/plugins/draw_card/handles/ba_handle.py
+++ b/zhenxun/plugins/draw_card/handles/ba_handle.py
@ -110,7 +110,7 @@ class BaHandle(BaseHandle[BaChar]):
    async def _update_info(self):
        # TODO: ba获取链接失效
        info = {}
-        url = "https://lonqie.github.io/SchaleDB/data/cn/students.min.json?v=49"
+        url = "https://schale.gg/data/cn/students.min.json?v=49"
        result = (await AsyncHttpx.get(url)).json()
        if not result:
            logger.warning(f"更新 {self.game_name_cn} 出错")
@ -119,12 +119,14 @@ class BaHandle(BaseHandle[BaChar]):
            for char in result:
                try:
                    name = char["Name"]
                    id = str(char["Id"])
                    avatar = (
-                        "https://github.com/lonqie/SchaleDB/raw/main/images/student/icon/"
+                        "https://github.com/SchaleDB/SchaleDB/raw/main/images/student/icon/"
-                        + char["CollectionTexture"]
+                        + id
-                        + ".png"
+                        + ".webp"
                    )
                    star = char["StarGrade"]
                    star = char["StarGrade"]
                except IndexError:
                    continue
                member_dict = {
--- a/zhenxun/plugins/parse_bilibili/init.py
+++ b/zhenxun/plugins/parse_bilibili/init.py
@ -1,14 +1,22 @@
 import re
 import time
 import ujson as json
 from nonebot import on_message
 from nonebot.plugin import PluginMetadata
-from nonebot_plugin_alconna import UniMsg
+from nonebot_plugin_alconna import Hyper, UniMsg
 from nonebot_plugin_saa import Image, MessageFactory, Text
 from nonebot_plugin_session import EventSession
 from zhenxun.configs.path_config import TEMP_PATH
 from zhenxun.configs.utils import PluginExtraData, RegisterConfig, Task
 from zhenxun.models.group_console import GroupConsole
 from zhenxun.models.task_info import TaskInfo
 from zhenxun.services.log import logger
 from zhenxun.utils.http_utils import AsyncHttpx
-from .data_source import Parser
+from .information_container import InformationContainer
 from .parse_url import parse_bili_url
 __plugin_meta__ = PluginMetadata(
    name="B站转发解析",
@ -48,10 +56,132 @@ async def _rule(session: EventSession) -> bool:
 _matcher = on_message(priority=1, block=False, rule=_rule)
 _tmp = {}
@_matcher.handle()
 async def _(session: EventSession, message: UniMsg):
    information_container = InformationContainer()
    # 判断文本消息内容是否相关
    match = None
    # 判断文本消息和小程序的内容是否指向一个b站链接
    get_url = None
    # 判断文本消息是否包含视频相关内容
    vd_flag = False
    # 设定时间阈值，阈值之下不会解析重复内容
    repet_second = 300
    # 尝试解析小程序消息
    data = message[0]
-    if result := await Parser.parse(data, message.extract_plain_text().strip()):
+    if isinstance(data, Hyper) and data.raw:
-        await result.send()
+        try:
-        logger.info(f"b站转发解析: {result}", "BILIBILI_PARSE", session=session)
+            data = json.loads(data.raw)
        except (IndexError, KeyError):
            data = None
        if data:
            # 获取相关数据
            meta_data = data.get("meta", {})
            news_value = meta_data.get("news", {})
            detail_1_value = meta_data.get("detail_1", {})
            qqdocurl_value = detail_1_value.get("qqdocurl", {})
            jumpUrl_value = news_value.get("jumpUrl", {})
            get_url = (qqdocurl_value if qqdocurl_value else jumpUrl_value).split("?")[
                0
            ]
    # 解析文本消息
    elif msg := message.extract_plain_text():
        # 消息中含有视频号
        if "bv" in msg.lower() or "av" in msg.lower():
            match = re.search(r"((?=(?:bv|av))([A-Za-z0-9]+))", msg, re.IGNORECASE)
            vd_flag = True
        # 消息中含有b23的链接，包括视频、专栏、动态、直播
        elif "https://b23.tv" in msg:
            match = re.search(r"https://b23\.tv/[^?\s]+", msg, re.IGNORECASE)
        # 检查消息中是否含有直播、专栏、动态链接
        elif any(
            keyword in msg
            for keyword in [
                "https://live.bilibili.com/",
                "https://www.bilibili.com/read/",
                "https://www.bilibili.com/opus/",
                "https://t.bilibili.com/",
            ]
        ):
            pattern = r"https://(live|www\.bilibili\.com/read|www\.bilibili\.com/opus|t\.bilibili\.com)/[^?\s]+"
            match = re.search(pattern, msg)
    # 匹配成功，则获取链接
    if match:
        if vd_flag:
            number = match.group(1)
            get_url = f"https://www.bilibili.com/video/{number}"
        else:
            get_url = match.group()
    if get_url:
        # 将链接统一发送给处理函数
        vd_info, live_info, vd_url, live_url, image_info, image_url = (
            await parse_bili_url(get_url, information_container)
        )
        if vd_info:
            # 判断一定时间内是否解析重复内容，或者是第一次解析
            if (
                vd_url in _tmp.keys() and time.time() - _tmp[vd_url] > repet_second
            ) or vd_url not in _tmp.keys():
                pic = vd_info.get("pic", "")  # 封面
                aid = vd_info.get("aid", "")  # av号
                title = vd_info.get("title", "")  # 标题
                author = vd_info.get("owner", {}).get("name", "")  # UP主
                reply = vd_info.get("stat", {}).get("reply", "")  # 回复
                favorite = vd_info.get("stat", {}).get("favorite", "")  # 收藏
                coin = vd_info.get("stat", {}).get("coin", "")  # 投币
                like = vd_info.get("stat", {}).get("like", "")  # 点赞
                danmuku = vd_info.get("stat", {}).get("danmaku", "")  # 弹幕
                ctime = vd_info["ctime"]
                date = time.strftime("%Y-%m-%d", time.localtime(ctime))
                logger.info(f"解析bilibili转发 {vd_url}", "b站解析", session=session)
                _tmp[vd_url] = time.time()
                _path = TEMP_PATH / f"{aid}.jpg"
                await AsyncHttpx.download_file(pic, _path)
                await MessageFactory(
                    [
                        Image(_path),
                        Text(
                            f"av{aid}\n标题：{title}\nUP：{author}\n上传日期：{date}\n回复：{reply}，收藏：{favorite}，投币：{coin}\n点赞：{like}，弹幕：{danmuku}\n{vd_url}"
                        ),
                    ]
                ).send()
        elif live_info:
            if (
                live_url in _tmp.keys() and time.time() - _tmp[live_url] > repet_second
            ) or live_url not in _tmp.keys():
                uid = live_info.get("uid", "")  # 主播uid
                title = live_info.get("title", "")  # 直播间标题
                description = live_info.get("description", "")  # 简介，可能会出现标签
                user_cover = live_info.get("user_cover", "")  # 封面
                keyframe = live_info.get("keyframe", "")  # 关键帧画面
                live_time = live_info.get("live_time", "")  # 开播时间
                area_name = live_info.get("area_name", "")  # 分区
                parent_area_name = live_info.get("parent_area_name", "")  # 父分区
                logger.info(f"解析bilibili转发 {live_url}", "b站解析", session=session)
                _tmp[live_url] = time.time()
                await MessageFactory(
                    [
                        Image(user_cover),
                        Text(
                            f"开播用户：https://space.bilibili.com/{uid}\n开播时间：{live_time}\n直播分区：{parent_area_name}——>{area_name}\n标题：{title}\n简介：{description}\n直播截图：\n"
                        ),
                        Image(keyframe),
                        Text(f"{live_url}"),
                    ]
                ).send()
        elif image_info:
            if (
                image_url in _tmp.keys()
                and time.time() - _tmp[image_url] > repet_second
            ) or image_url not in _tmp.keys():
                logger.info(f"解析bilibili转发 {image_url}", "b站解析", session=session)
                _tmp[image_url] = time.time()
                await image_info.send()
--- a/zhenxun/plugins/parse_bilibili/data_source.py
+++ b/zhenxun/plugins/parse_bilibili/data_source.py
@ -1,186 +0,0 @@
 import re
 import time
 import uuid
 from pathlib import Path
 from typing import Any
 import aiohttp
 import ujson as json
 from bilireq import video
 from nonebot_plugin_alconna import Hyper
 from nonebot_plugin_saa import Image, MessageFactory, Text
 from zhenxun.configs.path_config import TEMP_PATH
 from zhenxun.services.log import logger
 from zhenxun.utils.http_utils import AsyncPlaywright
 from zhenxun.utils.user_agent import get_user_agent
 class Parser:
    time_watch: dict[str, float] = {}
    @classmethod
    async def parse(cls, data: Any, raw: str | None = None) -> MessageFactory | None:
        """解析
        参数:
            data: data数据
            raw: 文本.
        返回:
            MessageFactory | None: 返回信息
        """
        if isinstance(data, Hyper) and data.raw:
            json_data = json.loads(data.raw)
            if video_info := await cls.__parse_video_share(json_data):
                return await cls.__handle_video_info(video_info)
            if path := await cls.__parse_news_share(json_data):
                return MessageFactory([Image(path)])
        if raw:
            return await cls.__search(raw)
        return None
    @classmethod
    async def __search(cls, message: str) -> MessageFactory | None:
        """根据bv，av，链接获取视频信息
        参数:
            message: 文本内容
        返回:
            MessageFactory | None: 返回信息
        """
        if "BV" in message:
            index = message.find("BV")
            if len(message[index + 2 :]) >= 10:
                msg = message[index : index + 12]
                url = f"https://www.bilibili.com/video/{msg}"
                return await cls.__handle_video_info(
                    await video.get_video_base_info(msg), url
                )
        elif "av" in message:
            index = message.find("av")
            if len(message[index + 2 :]) >= 1:
                if r := re.search(r"av(\d+)", message):
                    url = f"https://www.bilibili.com/video/av{r.group(1)}"
                    return await cls.__handle_video_info(
                        await video.get_video_base_info(f"av{r.group(1)}"), url
                    )
        elif "https://b23.tv" in message:
            url = (
                "https://"
                + message[message.find("b23.tv") : message.find("b23.tv") + 14]
            )
            async with aiohttp.ClientSession(headers=get_user_agent()) as session:
                async with session.get(
                    url,
                    timeout=7,
                ) as response:
                    url = (str(response.url).split("?")[0]).strip("/")
                    bvid = url.split("/")[-1]
                    return await cls.__handle_video_info(
                        await video.get_video_base_info(bvid), url
                    )
        return None
    @classmethod
    async def __handle_video_info(
        cls, vd_info: dict, url: str = ""
    ) -> MessageFactory | None:
        """处理视频信息
        参数:
            vd_info: 视频数据
            url: 视频url.
        返回:
            MessageFactory | None: 返回信息
        """
        if url:
            if url in cls.time_watch.keys() and time.time() - cls.time_watch[url] < 30:
                logger.debug("b站 url 解析在30秒内重复， 跳过解析...")
                return None
            cls.time_watch[url] = time.time()
        aid = vd_info["aid"]
        title = vd_info["title"]
        author = vd_info["owner"]["name"]
        reply = vd_info["stat"]["reply"]  # 回复
        favorite = vd_info["stat"]["favorite"]  # 收藏
        coin = vd_info["stat"]["coin"]  # 投币
        # like = vd_info['stat']['like']      # 点赞
        # danmu = vd_info['stat']['danmaku']  # 弹幕
        date = time.strftime("%Y-%m-%d", time.localtime(vd_info["ctime"]))
        return MessageFactory(
            [
                Image(vd_info["pic"]),
                Text(
                    f"\nav{aid}\n标题：{title}\nUP：{author}\n上传日期：{date}\n回复：{reply}，收藏：{favorite}，投币：{coin}\n{url}"
                ),
            ]
        )
    @classmethod
    async def __parse_video_share(cls, data: dict) -> dict | None:
        """解析视频转发
        参数:
            data: data数据
        返回:
            dict | None: 视频信息
        """
        try:
            if data["meta"]["detail_1"]["title"] == "哔哩哔哩":
                try:
                    async with aiohttp.ClientSession(
                        headers=get_user_agent()
                    ) as session:
                        async with session.get(
                            data["meta"]["detail_1"]["qqdocurl"],
                            timeout=7,
                        ) as response:
                            url = str(response.url).split("?")[0]
                            if url[-1] == "/":
                                url = url[:-1]
                            bvid = url.split("/")[-1]
                            return await video.get_video_base_info(bvid)
                except Exception as e:
                    logger.warning("解析b站视频失败", e=e)
        except Exception as e:
            pass
        return None
    @classmethod
    async def __parse_news_share(cls, data: dict) -> Path | None:
        """解析b站专栏
        参数:
            data: data数据
        返回:
            Path | None: 截图路径
        """
        try:
            if data["meta"]["news"]["desc"] == "哔哩哔哩专栏":
                try:
                    url = data["meta"]["news"]["jumpUrl"]
                    async with AsyncPlaywright.new_page() as page:
                        await page.goto(url, wait_until="networkidle", timeout=10000)
                        await page.set_viewport_size({"width": 2560, "height": 1080})
                        try:
                            await page.locator("div.bili-mini-close-icon").click()
                        except Exception:
                            pass
                        if div := await page.query_selector("#app > div"):
                            path = TEMP_PATH / f"bl_share_{uuid.uuid1()}.png"
                            await div.screenshot(
                                path=path,
                                timeout=100000,
                            )
                            return path
                except Exception as e:
                    logger.warning("解析b站专栏失败", e=e)
        except Exception as e:
            pass
        return None
--- a/zhenxun/plugins/parse_bilibili/get_image.py
+++ b/zhenxun/plugins/parse_bilibili/get_image.py
@ -0,0 +1,107 @@
 import os
 import re
 from nonebot_plugin_saa import Image
 from zhenxun.configs.path_config import TEMP_PATH
 from zhenxun.services.log import logger
 from zhenxun.utils.http_utils import AsyncPlaywright
 from zhenxun.utils.image_utils import BuildImage
 from zhenxun.utils.user_agent import get_user_agent_str
 async def resize(path: str):
    """调整图像大小的异步函数
    参数:
        path (str): 图像文件路径
    """
    A = BuildImage(background=path)
    await A.resize(0.5)
    await A.save(path)
 async def get_image(url) -> Image | None:
    """获取Bilibili链接的截图，并返回base64格式的图片
    参数:
        url (str): Bilibili链接
    返回:
        Image: Image
    """
    cv_match = None
    opus_match = None
    t_opus_match = None
    cv_number = None
    opus_number = None
    t_opus_number = None
    # 提取cv、opus、t_opus的编号
    url = url.split("?")[0]
    cv_match = re.search(r"read/cv([A-Za-z0-9]+)", url, re.IGNORECASE)
    opus_match = re.search(r"opus/([A-Za-z0-9]+)", url, re.IGNORECASE)
    t_opus_match = re.search(r"https://t\.bilibili\.com/(\d+)", url, re.IGNORECASE)
    if cv_match:
        cv_number = cv_match.group(1)
    elif opus_match:
        opus_number = opus_match.group(1)
    elif t_opus_match:
        t_opus_number = t_opus_match.group(1)
    screenshot_path = None
    # 根据编号构建保存路径
    if cv_number:
        screenshot_path = f"{TEMP_PATH}/bilibili_cv_{cv_number}.png"
    elif opus_number:
        screenshot_path = f"{TEMP_PATH}/bilibili_opus_{opus_number}.png"
    elif t_opus_number:
        screenshot_path = f"{TEMP_PATH}/bilibili_opus_{t_opus_number}.png"
        # t.bilibili.com和https://www.bilibili.com/opus在内容上是一样的，为便于维护，调整url至https://www.bilibili.com/opus/
        url = f"https://www.bilibili.com/opus/{t_opus_number}"
    if screenshot_path:
        try:
            # 如果文件不存在，进行截图
            if not os.path.exists(screenshot_path):
                # 创建页面
                # random.choice(),从列表中随机抽取一个对象
                user_agent = get_user_agent_str()
                try:
                    async with AsyncPlaywright.new_page() as page:
                        await page.set_viewport_size({"width": 5120, "height": 2560})
                        # 设置请求拦截器
                        await page.route(
                            re.compile(r"(\.png$)|(\.jpg$)"),
                            lambda route: route.abort(),
                        )
                        # 访问链接
                        await page.goto(url, wait_until="networkidle", timeout=10000)
                        # 根据不同的链接结构，设置对应的CSS选择器
                        if cv_number:
                            css = "#app > div"
                        elif opus_number or t_opus_number:
                            css = "#app > div.opus-detail > div.bili-opus-view"
                        # 点击对应的元素
                        await page.click(css)
                        # 查询目标元素
                        div = await page.query_selector(css)
                        # 对目标元素进行截图
                        await div.screenshot(  # type: ignore
                            path=screenshot_path,
                            timeout=100000,
                            animations="disabled",
                            type="png",
                        )
                        # 异步执行调整截图大小的操作
                        await resize(screenshot_path)
                except Exception as e:
                    logger.warning(f"尝试解析bilibili转发失败", e=e)
                    return None
            return Image(screenshot_path)
        except Exception as e:
            logger.error(f"尝试解析bilibili转发失败", e=e)
        return None
--- a/zhenxun/plugins/parse_bilibili/information_container.py
+++ b/zhenxun/plugins/parse_bilibili/information_container.py
@ -0,0 +1,60 @@
 class InformationContainer:
    def __init__(
        self,
        vd_info=None,
        live_info=None,
        vd_url=None,
        live_url=None,
        image_info=None,
        image_url=None,
    ):
        self._vd_info = vd_info
        self._live_info = live_info
        self._vd_url = vd_url
        self._live_url = live_url
        self._image_info = image_info
        self._image_url = image_url
    @property
    def vd_info(self):
        return self._vd_info
    @property
    def live_info(self):
        return self._live_info
    @property
    def vd_url(self):
        return self._vd_url
    @property
    def live_url(self):
        return self._live_url
    @property
    def image_info(self):
        return self._image_info
    @property
    def image_url(self):
        return self._image_url
    def update(self, updates):
        """
        更新多个信息的通用方法
        Args:
            updates (dict): 包含信息类型和对应新值的字典
        """
        for info_type, new_value in updates.items():
            if hasattr(self, f"_{info_type}"):
                setattr(self, f"_{info_type}", new_value)
    def get_information(self):
        return (
            self.vd_info,
            self.live_info,
            self.vd_url,
            self.live_url,
            self.image_info,
            self.image_url,
        )
--- a/zhenxun/plugins/parse_bilibili/parse_url.py
+++ b/zhenxun/plugins/parse_bilibili/parse_url.py
@ -0,0 +1,65 @@
 import aiohttp
 from bilireq import live, video
 from zhenxun.utils.user_agent import get_user_agent
 from .get_image import get_image
 from .information_container import InformationContainer
 async def parse_bili_url(get_url: str, information_container: InformationContainer):
    """解析Bilibili链接，获取相关信息
    参数:
        get_url (str): 待解析的Bilibili链接
        information_container (InformationContainer): 信息容器
    返回:
        dict: 包含解析得到的信息的字典
    """
    response_url = ""
    # 去除链接末尾的斜杠
    if get_url[-1] == "/":
        get_url = get_url[:-1]
    # 发起HTTP请求，获取重定向后的链接
    async with aiohttp.ClientSession(headers=get_user_agent()) as session:
        async with session.get(
            get_url,
            timeout=7,
        ) as response:
            response_url = str(response.url).split("?")[0]
    # 去除重定向后链接末尾的斜杠
    if response_url[-1] == "/":
        response_url = response_url[:-1]
    # 根据不同类型的链接进行处理
    if response_url.startswith(
        ("https://www.bilibili.com/video", "https://m.bilibili.com/video/")
    ):
        vd_url = response_url
        vid = vd_url.split("/")[-1]
        vd_info = await video.get_video_base_info(vid)
        information_container.update({"vd_info": vd_info, "vd_url": vd_url})
    elif response_url.startswith("https://live.bilibili.com"):
        live_url = response_url
        liveid = live_url.split("/")[-1]
        live_info = await live.get_room_info_by_id(liveid)
        information_container.update({"live_info": live_info, "live_url": live_url})
    elif response_url.startswith("https://www.bilibili.com/read"):
        cv_url = response_url
        image_info = await get_image(cv_url)
        information_container.update({"image_info": image_info, "image_url": cv_url})
    elif response_url.startswith(
        ("https://www.bilibili.com/opus", "https://t.bilibili.com")
    ):
        opus_url = response_url
        image_info = await get_image(opus_url)
        information_container.update({"image_info": image_info, "image_url": opus_url})
    return information_container.get_information()
--- a/zhenxun/utils/image_utils.py
+++ b/zhenxun/utils/image_utils.py
@ -1,6 +1,7 @@
 import os
 import random
 import re
 from io import BytesIO
 from pathlib import Path
 from typing import Awaitable, Callable
@ -408,3 +409,14 @@ async def get_download_image_hash(url: str, mark: str) -> str:
    except Exception as e:
        logger.warning(f"下载读取图片Hash出错", e=e)
    return ""
 def pic2bytes(image) -> bytes:
    """获取bytes
    返回:
        bytes: bytes
    """
    buf = BytesIO()
    image.save(buf, format="PNG")
    return buf.getvalue()