mirror of
https://github.com/zhenxun-org/zhenxun_bot.git
synced 2025-12-15 14:22:55 +08:00
commit
89b87a38b3
@ -1,19 +1,14 @@
|
|||||||
import re
|
import re
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from typing import Tuple, Union
|
from typing import Tuple, Union
|
||||||
|
import pytz
|
||||||
try:
|
|
||||||
from zoneinfo import ZoneInfo
|
|
||||||
except ImportError:
|
|
||||||
from backports.zoneinfo import ZoneInfo # type: ignore
|
|
||||||
|
|
||||||
from nonebot import on_command, get_driver
|
from nonebot import on_command, get_driver
|
||||||
from nonebot.adapters.onebot.v11 import Message, MessageSegment
|
from nonebot.adapters.onebot.v11 import Message, MessageSegment
|
||||||
from nonebot.adapters.onebot.v11.event import GroupMessageEvent
|
from nonebot.adapters.onebot.v11.event import GroupMessageEvent
|
||||||
from nonebot.matcher import Matcher
|
from nonebot.matcher import Matcher
|
||||||
from nonebot.params import Arg, Command, CommandArg, Depends
|
from nonebot.params import Arg, Command, CommandArg, Depends
|
||||||
from nonebot.typing import T_State
|
from nonebot.typing import T_State
|
||||||
from .data_source import draw_word_cloud, get_list_msg
|
from .data_source import draw_word_cloud, get_list_msg
|
||||||
from configs.config import Config
|
from configs.config import Config
|
||||||
|
|
||||||
__zx_plugin_name__ = "词云"
|
__zx_plugin_name__ = "词云"
|
||||||
@ -191,11 +186,10 @@ async def handle_message(
|
|||||||
user_id = int(event.user_id)
|
user_id = int(event.user_id)
|
||||||
else:
|
else:
|
||||||
user_id = None
|
user_id = None
|
||||||
|
# 将时间转换到 东八 时区
|
||||||
# 排除机器人自己发的消息
|
|
||||||
# 将时间转换到 UTC 时区
|
|
||||||
messages = await get_list_msg(user_id, int(event.group_id),
|
messages = await get_list_msg(user_id, int(event.group_id),
|
||||||
days=[start.astimezone(ZoneInfo("UTC")), stop.astimezone(ZoneInfo("UTC"))])
|
days=(start.astimezone(pytz.timezone("Asia/Shanghai")),
|
||||||
|
stop.astimezone(pytz.timezone("Asia/Shanghai"))))
|
||||||
if messages:
|
if messages:
|
||||||
image_bytes = await draw_word_cloud(messages, get_driver().config)
|
image_bytes = await draw_word_cloud(messages, get_driver().config)
|
||||||
if image_bytes:
|
if image_bytes:
|
||||||
|
|||||||
@ -3,7 +3,6 @@ import os
|
|||||||
import random
|
import random
|
||||||
import jieba.analyse
|
import jieba.analyse
|
||||||
import re
|
import re
|
||||||
from collections import Counter
|
|
||||||
from typing import List
|
from typing import List
|
||||||
from PIL import Image as IMG
|
from PIL import Image as IMG
|
||||||
import jieba
|
import jieba
|
||||||
@ -12,18 +11,18 @@ from wordcloud import WordCloud, ImageColorGenerator
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from configs.path_config import IMAGE_PATH, FONT_PATH, TEXT_PATH
|
from configs.path_config import IMAGE_PATH, FONT_PATH
|
||||||
from utils.http_utils import AsyncHttpx
|
from utils.http_utils import AsyncHttpx
|
||||||
from models.chat_history import ChatHistory
|
from models.chat_history import ChatHistory
|
||||||
from configs.config import Config
|
from configs.config import Config
|
||||||
|
|
||||||
|
|
||||||
async def pre_precess(msg: List[str], wordcloud_stopwords_dir: str, config) -> str:
|
async def pre_precess(msg: List[str], config) -> str:
|
||||||
return await asyncio.get_event_loop().run_in_executor(
|
return await asyncio.get_event_loop().run_in_executor(
|
||||||
None, _pre_precess, msg, wordcloud_stopwords_dir, config)
|
None, _pre_precess, msg,config)
|
||||||
|
|
||||||
|
|
||||||
def _pre_precess(msg: List[str], wordcloud_stopwords_dir, config) -> str:
|
def _pre_precess(msg: List[str],config) -> str:
|
||||||
"""对消息进行预处理"""
|
"""对消息进行预处理"""
|
||||||
# 过滤掉命令
|
# 过滤掉命令
|
||||||
command_start = tuple([i for i in config.command_start if i])
|
command_start = tuple([i for i in config.command_start if i])
|
||||||
@ -33,27 +32,20 @@ def _pre_precess(msg: List[str], wordcloud_stopwords_dir, config) -> str:
|
|||||||
msg = re.sub(r"https?://[\w/:%#\$&\?\(\)~\.=\+\-]+", "", msg)
|
msg = re.sub(r"https?://[\w/:%#\$&\?\(\)~\.=\+\-]+", "", msg)
|
||||||
|
|
||||||
# 去除 \u200b
|
# 去除 \u200b
|
||||||
msg = re.sub(r"\u200b", "", msg)
|
msg = re.sub(r"[\u200b]", "", msg)
|
||||||
|
|
||||||
# 去除cq码
|
# 去除cq码
|
||||||
msg = re.sub(r"\[CQ:.*?]", "", msg)
|
msg = re.sub(r"\[CQ:.*?]", "", msg)
|
||||||
|
|
||||||
|
# 去除[]
|
||||||
|
msg = re.sub("[	(1|3);]", "", msg)
|
||||||
|
|
||||||
# 去除 emoji
|
# 去除 emoji
|
||||||
# https://github.com/carpedm20/emoji
|
# https://github.com/carpedm20/emoji
|
||||||
msg = replace_emoji(msg)
|
msg = replace_emoji(msg)
|
||||||
# 分词
|
|
||||||
msg = "".join(cut_message(msg, wordcloud_stopwords_dir))
|
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
|
|
||||||
def cut_message(msg: str, wordcloud_stopwords_dir) -> List[str]:
|
|
||||||
"""分词"""
|
|
||||||
with wordcloud_stopwords_dir.open("r", encoding="utf8") as f:
|
|
||||||
stopwords = [word.strip() for word in f.readlines()]
|
|
||||||
f.close()
|
|
||||||
words = jieba.lcut(msg)
|
|
||||||
return [word.strip() for word in words if word.strip() not in stopwords]
|
|
||||||
|
|
||||||
|
|
||||||
async def draw_word_cloud(messages, config):
|
async def draw_word_cloud(messages, config):
|
||||||
wordcloud_dir = IMAGE_PATH / "wordcloud"
|
wordcloud_dir = IMAGE_PATH / "wordcloud"
|
||||||
@ -61,10 +53,6 @@ async def draw_word_cloud(messages, config):
|
|||||||
# 默认用真寻图片
|
# 默认用真寻图片
|
||||||
zx_logo_path = wordcloud_dir / "default.png"
|
zx_logo_path = wordcloud_dir / "default.png"
|
||||||
wordcloud_ttf = FONT_PATH / "STKAITI.TTF"
|
wordcloud_ttf = FONT_PATH / "STKAITI.TTF"
|
||||||
|
|
||||||
wordcloud_test_dir = TEXT_PATH / "wordcloud"
|
|
||||||
wordcloud_test_dir.mkdir(exist_ok=True, parents=True)
|
|
||||||
wordcloud_stopwords_dir = wordcloud_test_dir / "stopwords.txt"
|
|
||||||
if not os.listdir(wordcloud_dir):
|
if not os.listdir(wordcloud_dir):
|
||||||
url = "https://ghproxy.com/https://raw.githubusercontent.com/HibiKier/zhenxun_bot/main/resources/image/wordcloud/default.png"
|
url = "https://ghproxy.com/https://raw.githubusercontent.com/HibiKier/zhenxun_bot/main/resources/image/wordcloud/default.png"
|
||||||
try:
|
try:
|
||||||
@ -77,15 +65,9 @@ async def draw_word_cloud(messages, config):
|
|||||||
await AsyncHttpx.download_file(ttf_url, wordcloud_ttf)
|
await AsyncHttpx.download_file(ttf_url, wordcloud_ttf)
|
||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
if not wordcloud_stopwords_dir.exists():
|
|
||||||
stopword_url = 'https://ghproxy.com/https://raw.githubusercontent.com/HibiKier/zhenxun_bot/main/resources/text/wordcloud/stopwords.txt'
|
|
||||||
try:
|
|
||||||
await AsyncHttpx.download_file(stopword_url, wordcloud_stopwords_dir)
|
|
||||||
except:
|
|
||||||
return False
|
|
||||||
|
|
||||||
topK = min(int(len(messages)), 100000)
|
topK = min(int(len(messages)), 100000)
|
||||||
read_name = jieba.analyse.extract_tags(await pre_precess(messages, wordcloud_stopwords_dir, config), topK=topK,
|
read_name = jieba.analyse.extract_tags(await pre_precess(messages, config), topK=topK,
|
||||||
withWeight=True,
|
withWeight=True,
|
||||||
allowPOS=())
|
allowPOS=())
|
||||||
name = []
|
name = []
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user