feat: 优化b站解析

This commit is contained in:
HibiKier 2024-07-29 23:31:11 +08:00
parent d4a49a47e5
commit c219264968
8 changed files with 391 additions and 197 deletions

View File

@ -1,4 +1,3 @@
import os
import random import random
import secrets import secrets
from datetime import datetime from datetime import datetime
@ -103,8 +102,13 @@ class SignManage:
new_log = ( new_log = (
await SignLog.filter(user_id=session.id1).order_by("-create_time").first() await SignLog.filter(user_id=session.id1).order_by("-create_time").first()
) )
log_time = None
if new_log:
log_time = new_log.create_time.astimezone(
pytz.timezone("Asia/Shanghai")
).date()
if not is_card_view: if not is_card_view:
if not new_log or (new_log and new_log.create_time.date() != now.date()): if not new_log or (log_time and log_time != now.date()):
return await cls._handle_sign_in(user, nickname, session) return await cls._handle_sign_in(user, nickname, session)
return await get_card( return await get_card(
user, nickname, -1, user_console.gold, "", is_card_view=is_card_view user, nickname, -1, user_console.gold, "", is_card_view=is_card_view

View File

@ -110,7 +110,7 @@ class BaHandle(BaseHandle[BaChar]):
async def _update_info(self): async def _update_info(self):
# TODO: ba获取链接失效 # TODO: ba获取链接失效
info = {} info = {}
url = "https://lonqie.github.io/SchaleDB/data/cn/students.min.json?v=49" url = "https://schale.gg/data/cn/students.min.json?v=49"
result = (await AsyncHttpx.get(url)).json() result = (await AsyncHttpx.get(url)).json()
if not result: if not result:
logger.warning(f"更新 {self.game_name_cn} 出错") logger.warning(f"更新 {self.game_name_cn} 出错")
@ -119,12 +119,14 @@ class BaHandle(BaseHandle[BaChar]):
for char in result: for char in result:
try: try:
name = char["Name"] name = char["Name"]
id = str(char["Id"])
avatar = ( avatar = (
"https://github.com/lonqie/SchaleDB/raw/main/images/student/icon/" "https://github.com/SchaleDB/SchaleDB/raw/main/images/student/icon/"
+ char["CollectionTexture"] + id
+ ".png" + ".webp"
) )
star = char["StarGrade"] star = char["StarGrade"]
star = char["StarGrade"]
except IndexError: except IndexError:
continue continue
member_dict = { member_dict = {

View File

@ -1,14 +1,22 @@
import re
import time
import ujson as json
from nonebot import on_message from nonebot import on_message
from nonebot.plugin import PluginMetadata from nonebot.plugin import PluginMetadata
from nonebot_plugin_alconna import UniMsg from nonebot_plugin_alconna import Hyper, UniMsg
from nonebot_plugin_saa import Image, MessageFactory, Text
from nonebot_plugin_session import EventSession from nonebot_plugin_session import EventSession
from zhenxun.configs.path_config import TEMP_PATH
from zhenxun.configs.utils import PluginExtraData, RegisterConfig, Task from zhenxun.configs.utils import PluginExtraData, RegisterConfig, Task
from zhenxun.models.group_console import GroupConsole from zhenxun.models.group_console import GroupConsole
from zhenxun.models.task_info import TaskInfo from zhenxun.models.task_info import TaskInfo
from zhenxun.services.log import logger from zhenxun.services.log import logger
from zhenxun.utils.http_utils import AsyncHttpx
from .data_source import Parser from .information_container import InformationContainer
from .parse_url import parse_bili_url
__plugin_meta__ = PluginMetadata( __plugin_meta__ = PluginMetadata(
name="B站转发解析", name="B站转发解析",
@ -48,10 +56,132 @@ async def _rule(session: EventSession) -> bool:
_matcher = on_message(priority=1, block=False, rule=_rule) _matcher = on_message(priority=1, block=False, rule=_rule)
_tmp = {}
@_matcher.handle() @_matcher.handle()
async def _(session: EventSession, message: UniMsg): async def _(session: EventSession, message: UniMsg):
information_container = InformationContainer()
# 判断文本消息内容是否相关
match = None
# 判断文本消息和小程序的内容是否指向一个b站链接
get_url = None
# 判断文本消息是否包含视频相关内容
vd_flag = False
# 设定时间阈值,阈值之下不会解析重复内容
repet_second = 300
# 尝试解析小程序消息
data = message[0] data = message[0]
if result := await Parser.parse(data, message.extract_plain_text().strip()): if isinstance(data, Hyper) and data.raw:
await result.send() try:
logger.info(f"b站转发解析: {result}", "BILIBILI_PARSE", session=session) data = json.loads(data.raw)
except (IndexError, KeyError):
data = None
if data:
# 获取相关数据
meta_data = data.get("meta", {})
news_value = meta_data.get("news", {})
detail_1_value = meta_data.get("detail_1", {})
qqdocurl_value = detail_1_value.get("qqdocurl", {})
jumpUrl_value = news_value.get("jumpUrl", {})
get_url = (qqdocurl_value if qqdocurl_value else jumpUrl_value).split("?")[
0
]
# 解析文本消息
elif msg := message.extract_plain_text():
# 消息中含有视频号
if "bv" in msg.lower() or "av" in msg.lower():
match = re.search(r"((?=(?:bv|av))([A-Za-z0-9]+))", msg, re.IGNORECASE)
vd_flag = True
# 消息中含有b23的链接包括视频、专栏、动态、直播
elif "https://b23.tv" in msg:
match = re.search(r"https://b23\.tv/[^?\s]+", msg, re.IGNORECASE)
# 检查消息中是否含有直播、专栏、动态链接
elif any(
keyword in msg
for keyword in [
"https://live.bilibili.com/",
"https://www.bilibili.com/read/",
"https://www.bilibili.com/opus/",
"https://t.bilibili.com/",
]
):
pattern = r"https://(live|www\.bilibili\.com/read|www\.bilibili\.com/opus|t\.bilibili\.com)/[^?\s]+"
match = re.search(pattern, msg)
# 匹配成功,则获取链接
if match:
if vd_flag:
number = match.group(1)
get_url = f"https://www.bilibili.com/video/{number}"
else:
get_url = match.group()
if get_url:
# 将链接统一发送给处理函数
vd_info, live_info, vd_url, live_url, image_info, image_url = (
await parse_bili_url(get_url, information_container)
)
if vd_info:
# 判断一定时间内是否解析重复内容,或者是第一次解析
if (
vd_url in _tmp.keys() and time.time() - _tmp[vd_url] > repet_second
) or vd_url not in _tmp.keys():
pic = vd_info.get("pic", "") # 封面
aid = vd_info.get("aid", "") # av号
title = vd_info.get("title", "") # 标题
author = vd_info.get("owner", {}).get("name", "") # UP主
reply = vd_info.get("stat", {}).get("reply", "") # 回复
favorite = vd_info.get("stat", {}).get("favorite", "") # 收藏
coin = vd_info.get("stat", {}).get("coin", "") # 投币
like = vd_info.get("stat", {}).get("like", "") # 点赞
danmuku = vd_info.get("stat", {}).get("danmaku", "") # 弹幕
ctime = vd_info["ctime"]
date = time.strftime("%Y-%m-%d", time.localtime(ctime))
logger.info(f"解析bilibili转发 {vd_url}", "b站解析", session=session)
_tmp[vd_url] = time.time()
_path = TEMP_PATH / f"{aid}.jpg"
await AsyncHttpx.download_file(pic, _path)
await MessageFactory(
[
Image(_path),
Text(
f"av{aid}\n标题:{title}\nUP{author}\n上传日期:{date}\n回复:{reply},收藏:{favorite},投币:{coin}\n点赞:{like},弹幕:{danmuku}\n{vd_url}"
),
]
).send()
elif live_info:
if (
live_url in _tmp.keys() and time.time() - _tmp[live_url] > repet_second
) or live_url not in _tmp.keys():
uid = live_info.get("uid", "") # 主播uid
title = live_info.get("title", "") # 直播间标题
description = live_info.get("description", "") # 简介,可能会出现标签
user_cover = live_info.get("user_cover", "") # 封面
keyframe = live_info.get("keyframe", "") # 关键帧画面
live_time = live_info.get("live_time", "") # 开播时间
area_name = live_info.get("area_name", "") # 分区
parent_area_name = live_info.get("parent_area_name", "") # 父分区
logger.info(f"解析bilibili转发 {live_url}", "b站解析", session=session)
_tmp[live_url] = time.time()
await MessageFactory(
[
Image(user_cover),
Text(
f"开播用户https://space.bilibili.com/{uid}\n开播时间:{live_time}\n直播分区:{parent_area_name}——>{area_name}\n标题:{title}\n简介:{description}\n直播截图:\n"
),
Image(keyframe),
Text(f"{live_url}"),
]
).send()
elif image_info:
if (
image_url in _tmp.keys()
and time.time() - _tmp[image_url] > repet_second
) or image_url not in _tmp.keys():
logger.info(f"解析bilibili转发 {image_url}", "b站解析", session=session)
_tmp[image_url] = time.time()
await image_info.send()

View File

@ -1,186 +0,0 @@
import re
import time
import uuid
from pathlib import Path
from typing import Any
import aiohttp
import ujson as json
from bilireq import video
from nonebot_plugin_alconna import Hyper
from nonebot_plugin_saa import Image, MessageFactory, Text
from zhenxun.configs.path_config import TEMP_PATH
from zhenxun.services.log import logger
from zhenxun.utils.http_utils import AsyncPlaywright
from zhenxun.utils.user_agent import get_user_agent
class Parser:
time_watch: dict[str, float] = {}
@classmethod
async def parse(cls, data: Any, raw: str | None = None) -> MessageFactory | None:
"""解析
参数:
data: data数据
raw: 文本.
返回:
MessageFactory | None: 返回信息
"""
if isinstance(data, Hyper) and data.raw:
json_data = json.loads(data.raw)
if video_info := await cls.__parse_video_share(json_data):
return await cls.__handle_video_info(video_info)
if path := await cls.__parse_news_share(json_data):
return MessageFactory([Image(path)])
if raw:
return await cls.__search(raw)
return None
@classmethod
async def __search(cls, message: str) -> MessageFactory | None:
"""根据bvav链接获取视频信息
参数:
message: 文本内容
返回:
MessageFactory | None: 返回信息
"""
if "BV" in message:
index = message.find("BV")
if len(message[index + 2 :]) >= 10:
msg = message[index : index + 12]
url = f"https://www.bilibili.com/video/{msg}"
return await cls.__handle_video_info(
await video.get_video_base_info(msg), url
)
elif "av" in message:
index = message.find("av")
if len(message[index + 2 :]) >= 1:
if r := re.search(r"av(\d+)", message):
url = f"https://www.bilibili.com/video/av{r.group(1)}"
return await cls.__handle_video_info(
await video.get_video_base_info(f"av{r.group(1)}"), url
)
elif "https://b23.tv" in message:
url = (
"https://"
+ message[message.find("b23.tv") : message.find("b23.tv") + 14]
)
async with aiohttp.ClientSession(headers=get_user_agent()) as session:
async with session.get(
url,
timeout=7,
) as response:
url = (str(response.url).split("?")[0]).strip("/")
bvid = url.split("/")[-1]
return await cls.__handle_video_info(
await video.get_video_base_info(bvid), url
)
return None
@classmethod
async def __handle_video_info(
cls, vd_info: dict, url: str = ""
) -> MessageFactory | None:
"""处理视频信息
参数:
vd_info: 视频数据
url: 视频url.
返回:
MessageFactory | None: 返回信息
"""
if url:
if url in cls.time_watch.keys() and time.time() - cls.time_watch[url] < 30:
logger.debug("b站 url 解析在30秒内重复 跳过解析...")
return None
cls.time_watch[url] = time.time()
aid = vd_info["aid"]
title = vd_info["title"]
author = vd_info["owner"]["name"]
reply = vd_info["stat"]["reply"] # 回复
favorite = vd_info["stat"]["favorite"] # 收藏
coin = vd_info["stat"]["coin"] # 投币
# like = vd_info['stat']['like'] # 点赞
# danmu = vd_info['stat']['danmaku'] # 弹幕
date = time.strftime("%Y-%m-%d", time.localtime(vd_info["ctime"]))
return MessageFactory(
[
Image(vd_info["pic"]),
Text(
f"\nav{aid}\n标题:{title}\nUP{author}\n上传日期:{date}\n回复:{reply},收藏:{favorite},投币:{coin}\n{url}"
),
]
)
@classmethod
async def __parse_video_share(cls, data: dict) -> dict | None:
"""解析视频转发
参数:
data: data数据
返回:
dict | None: 视频信息
"""
try:
if data["meta"]["detail_1"]["title"] == "哔哩哔哩":
try:
async with aiohttp.ClientSession(
headers=get_user_agent()
) as session:
async with session.get(
data["meta"]["detail_1"]["qqdocurl"],
timeout=7,
) as response:
url = str(response.url).split("?")[0]
if url[-1] == "/":
url = url[:-1]
bvid = url.split("/")[-1]
return await video.get_video_base_info(bvid)
except Exception as e:
logger.warning("解析b站视频失败", e=e)
except Exception as e:
pass
return None
@classmethod
async def __parse_news_share(cls, data: dict) -> Path | None:
"""解析b站专栏
参数:
data: data数据
返回:
Path | None: 截图路径
"""
try:
if data["meta"]["news"]["desc"] == "哔哩哔哩专栏":
try:
url = data["meta"]["news"]["jumpUrl"]
async with AsyncPlaywright.new_page() as page:
await page.goto(url, wait_until="networkidle", timeout=10000)
await page.set_viewport_size({"width": 2560, "height": 1080})
try:
await page.locator("div.bili-mini-close-icon").click()
except Exception:
pass
if div := await page.query_selector("#app > div"):
path = TEMP_PATH / f"bl_share_{uuid.uuid1()}.png"
await div.screenshot(
path=path,
timeout=100000,
)
return path
except Exception as e:
logger.warning("解析b站专栏失败", e=e)
except Exception as e:
pass
return None

View File

@ -0,0 +1,107 @@
import os
import re
from nonebot_plugin_saa import Image
from zhenxun.configs.path_config import TEMP_PATH
from zhenxun.services.log import logger
from zhenxun.utils.http_utils import AsyncPlaywright
from zhenxun.utils.image_utils import BuildImage
from zhenxun.utils.user_agent import get_user_agent_str
async def resize(path: str):
"""调整图像大小的异步函数
参数:
path (str): 图像文件路径
"""
A = BuildImage(background=path)
await A.resize(0.5)
await A.save(path)
async def get_image(url) -> Image | None:
"""获取Bilibili链接的截图并返回base64格式的图片
参数:
url (str): Bilibili链接
返回:
Image: Image
"""
cv_match = None
opus_match = None
t_opus_match = None
cv_number = None
opus_number = None
t_opus_number = None
# 提取cv、opus、t_opus的编号
url = url.split("?")[0]
cv_match = re.search(r"read/cv([A-Za-z0-9]+)", url, re.IGNORECASE)
opus_match = re.search(r"opus/([A-Za-z0-9]+)", url, re.IGNORECASE)
t_opus_match = re.search(r"https://t\.bilibili\.com/(\d+)", url, re.IGNORECASE)
if cv_match:
cv_number = cv_match.group(1)
elif opus_match:
opus_number = opus_match.group(1)
elif t_opus_match:
t_opus_number = t_opus_match.group(1)
screenshot_path = None
# 根据编号构建保存路径
if cv_number:
screenshot_path = f"{TEMP_PATH}/bilibili_cv_{cv_number}.png"
elif opus_number:
screenshot_path = f"{TEMP_PATH}/bilibili_opus_{opus_number}.png"
elif t_opus_number:
screenshot_path = f"{TEMP_PATH}/bilibili_opus_{t_opus_number}.png"
# t.bilibili.com和https://www.bilibili.com/opus在内容上是一样的为便于维护调整url至https://www.bilibili.com/opus/
url = f"https://www.bilibili.com/opus/{t_opus_number}"
if screenshot_path:
try:
# 如果文件不存在,进行截图
if not os.path.exists(screenshot_path):
# 创建页面
# random.choice(),从列表中随机抽取一个对象
user_agent = get_user_agent_str()
try:
async with AsyncPlaywright.new_page() as page:
await page.set_viewport_size({"width": 5120, "height": 2560})
# 设置请求拦截器
await page.route(
re.compile(r"(\.png$)|(\.jpg$)"),
lambda route: route.abort(),
)
# 访问链接
await page.goto(url, wait_until="networkidle", timeout=10000)
# 根据不同的链接结构设置对应的CSS选择器
if cv_number:
css = "#app > div"
elif opus_number or t_opus_number:
css = "#app > div.opus-detail > div.bili-opus-view"
# 点击对应的元素
await page.click(css)
# 查询目标元素
div = await page.query_selector(css)
# 对目标元素进行截图
await div.screenshot( # type: ignore
path=screenshot_path,
timeout=100000,
animations="disabled",
type="png",
)
# 异步执行调整截图大小的操作
await resize(screenshot_path)
except Exception as e:
logger.warning(f"尝试解析bilibili转发失败", e=e)
return None
return Image(screenshot_path)
except Exception as e:
logger.error(f"尝试解析bilibili转发失败", e=e)
return None

View File

@ -0,0 +1,60 @@
class InformationContainer:
def __init__(
self,
vd_info=None,
live_info=None,
vd_url=None,
live_url=None,
image_info=None,
image_url=None,
):
self._vd_info = vd_info
self._live_info = live_info
self._vd_url = vd_url
self._live_url = live_url
self._image_info = image_info
self._image_url = image_url
@property
def vd_info(self):
return self._vd_info
@property
def live_info(self):
return self._live_info
@property
def vd_url(self):
return self._vd_url
@property
def live_url(self):
return self._live_url
@property
def image_info(self):
return self._image_info
@property
def image_url(self):
return self._image_url
def update(self, updates):
"""
更新多个信息的通用方法
Args:
updates (dict): 包含信息类型和对应新值的字典
"""
for info_type, new_value in updates.items():
if hasattr(self, f"_{info_type}"):
setattr(self, f"_{info_type}", new_value)
def get_information(self):
return (
self.vd_info,
self.live_info,
self.vd_url,
self.live_url,
self.image_info,
self.image_url,
)

View File

@ -0,0 +1,65 @@
import aiohttp
from bilireq import live, video
from zhenxun.utils.user_agent import get_user_agent
from .get_image import get_image
from .information_container import InformationContainer
async def parse_bili_url(get_url: str, information_container: InformationContainer):
"""解析Bilibili链接获取相关信息
参数:
get_url (str): 待解析的Bilibili链接
information_container (InformationContainer): 信息容器
返回:
dict: 包含解析得到的信息的字典
"""
response_url = ""
# 去除链接末尾的斜杠
if get_url[-1] == "/":
get_url = get_url[:-1]
# 发起HTTP请求获取重定向后的链接
async with aiohttp.ClientSession(headers=get_user_agent()) as session:
async with session.get(
get_url,
timeout=7,
) as response:
response_url = str(response.url).split("?")[0]
# 去除重定向后链接末尾的斜杠
if response_url[-1] == "/":
response_url = response_url[:-1]
# 根据不同类型的链接进行处理
if response_url.startswith(
("https://www.bilibili.com/video", "https://m.bilibili.com/video/")
):
vd_url = response_url
vid = vd_url.split("/")[-1]
vd_info = await video.get_video_base_info(vid)
information_container.update({"vd_info": vd_info, "vd_url": vd_url})
elif response_url.startswith("https://live.bilibili.com"):
live_url = response_url
liveid = live_url.split("/")[-1]
live_info = await live.get_room_info_by_id(liveid)
information_container.update({"live_info": live_info, "live_url": live_url})
elif response_url.startswith("https://www.bilibili.com/read"):
cv_url = response_url
image_info = await get_image(cv_url)
information_container.update({"image_info": image_info, "image_url": cv_url})
elif response_url.startswith(
("https://www.bilibili.com/opus", "https://t.bilibili.com")
):
opus_url = response_url
image_info = await get_image(opus_url)
information_container.update({"image_info": image_info, "image_url": opus_url})
return information_container.get_information()

View File

@ -1,6 +1,7 @@
import os import os
import random import random
import re import re
from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Awaitable, Callable from typing import Awaitable, Callable
@ -408,3 +409,14 @@ async def get_download_image_hash(url: str, mark: str) -> str:
except Exception as e: except Exception as e:
logger.warning(f"下载读取图片Hash出错", e=e) logger.warning(f"下载读取图片Hash出错", e=e)
return "" return ""
def pic2bytes(image) -> bytes:
"""获取bytes
返回:
bytes: bytes
"""
buf = BytesIO()
image.save(buf, format="PNG")
return buf.getvalue()