Merge pull request #342 from yajiwa/main

fix bug
This commit is contained in:
HibiKier 2022-05-19 21:03:23 +08:00 committed by GitHub
commit 89b87a38b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 14 additions and 1700 deletions

View File

@ -1,19 +1,14 @@
import re import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Tuple, Union from typing import Tuple, Union
import pytz
try:
from zoneinfo import ZoneInfo
except ImportError:
from backports.zoneinfo import ZoneInfo # type: ignore
from nonebot import on_command, get_driver from nonebot import on_command, get_driver
from nonebot.adapters.onebot.v11 import Message, MessageSegment from nonebot.adapters.onebot.v11 import Message, MessageSegment
from nonebot.adapters.onebot.v11.event import GroupMessageEvent from nonebot.adapters.onebot.v11.event import GroupMessageEvent
from nonebot.matcher import Matcher from nonebot.matcher import Matcher
from nonebot.params import Arg, Command, CommandArg, Depends from nonebot.params import Arg, Command, CommandArg, Depends
from nonebot.typing import T_State from nonebot.typing import T_State
from .data_source import draw_word_cloud, get_list_msg from .data_source import draw_word_cloud, get_list_msg
from configs.config import Config from configs.config import Config
__zx_plugin_name__ = "词云" __zx_plugin_name__ = "词云"
@ -191,11 +186,10 @@ async def handle_message(
user_id = int(event.user_id) user_id = int(event.user_id)
else: else:
user_id = None user_id = None
# 将时间转换到 东八 时区
# 排除机器人自己发的消息
# 将时间转换到 UTC 时区
messages = await get_list_msg(user_id, int(event.group_id), messages = await get_list_msg(user_id, int(event.group_id),
days=[start.astimezone(ZoneInfo("UTC")), stop.astimezone(ZoneInfo("UTC"))]) days=(start.astimezone(pytz.timezone("Asia/Shanghai")),
stop.astimezone(pytz.timezone("Asia/Shanghai"))))
if messages: if messages:
image_bytes = await draw_word_cloud(messages, get_driver().config) image_bytes = await draw_word_cloud(messages, get_driver().config)
if image_bytes: if image_bytes:

View File

@ -3,7 +3,6 @@ import os
import random import random
import jieba.analyse import jieba.analyse
import re import re
from collections import Counter
from typing import List from typing import List
from PIL import Image as IMG from PIL import Image as IMG
import jieba import jieba
@ -12,18 +11,18 @@ from wordcloud import WordCloud, ImageColorGenerator
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from io import BytesIO from io import BytesIO
from configs.path_config import IMAGE_PATH, FONT_PATH, TEXT_PATH from configs.path_config import IMAGE_PATH, FONT_PATH
from utils.http_utils import AsyncHttpx from utils.http_utils import AsyncHttpx
from models.chat_history import ChatHistory from models.chat_history import ChatHistory
from configs.config import Config from configs.config import Config
async def pre_precess(msg: List[str], wordcloud_stopwords_dir: str, config) -> str: async def pre_precess(msg: List[str], config) -> str:
return await asyncio.get_event_loop().run_in_executor( return await asyncio.get_event_loop().run_in_executor(
None, _pre_precess, msg, wordcloud_stopwords_dir, config) None, _pre_precess, msg,config)
def _pre_precess(msg: List[str], wordcloud_stopwords_dir, config) -> str: def _pre_precess(msg: List[str],config) -> str:
"""对消息进行预处理""" """对消息进行预处理"""
# 过滤掉命令 # 过滤掉命令
command_start = tuple([i for i in config.command_start if i]) command_start = tuple([i for i in config.command_start if i])
@ -33,27 +32,20 @@ def _pre_precess(msg: List[str], wordcloud_stopwords_dir, config) -> str:
msg = re.sub(r"https?://[\w/:%#\$&\?\(\)~\.=\+\-]+", "", msg) msg = re.sub(r"https?://[\w/:%#\$&\?\(\)~\.=\+\-]+", "", msg)
# 去除 \u200b # 去除 \u200b
msg = re.sub(r"\u200b", "", msg) msg = re.sub(r"[\u200b]", "", msg)
# 去除cq码 # 去除cq码
msg = re.sub(r"\[CQ:.*?]", "", msg) msg = re.sub(r"\[CQ:.*?]", "", msg)
# 去除&#91&#93
msg = re.sub("[&#9(1|3);]", "", msg)
# 去除 emoji # 去除 emoji
# https://github.com/carpedm20/emoji # https://github.com/carpedm20/emoji
msg = replace_emoji(msg) msg = replace_emoji(msg)
# 分词
msg = "".join(cut_message(msg, wordcloud_stopwords_dir))
return msg return msg
def cut_message(msg: str, wordcloud_stopwords_dir) -> List[str]:
"""分词"""
with wordcloud_stopwords_dir.open("r", encoding="utf8") as f:
stopwords = [word.strip() for word in f.readlines()]
f.close()
words = jieba.lcut(msg)
return [word.strip() for word in words if word.strip() not in stopwords]
async def draw_word_cloud(messages, config): async def draw_word_cloud(messages, config):
wordcloud_dir = IMAGE_PATH / "wordcloud" wordcloud_dir = IMAGE_PATH / "wordcloud"
@ -61,10 +53,6 @@ async def draw_word_cloud(messages, config):
# 默认用真寻图片 # 默认用真寻图片
zx_logo_path = wordcloud_dir / "default.png" zx_logo_path = wordcloud_dir / "default.png"
wordcloud_ttf = FONT_PATH / "STKAITI.TTF" wordcloud_ttf = FONT_PATH / "STKAITI.TTF"
wordcloud_test_dir = TEXT_PATH / "wordcloud"
wordcloud_test_dir.mkdir(exist_ok=True, parents=True)
wordcloud_stopwords_dir = wordcloud_test_dir / "stopwords.txt"
if not os.listdir(wordcloud_dir): if not os.listdir(wordcloud_dir):
url = "https://ghproxy.com/https://raw.githubusercontent.com/HibiKier/zhenxun_bot/main/resources/image/wordcloud/default.png" url = "https://ghproxy.com/https://raw.githubusercontent.com/HibiKier/zhenxun_bot/main/resources/image/wordcloud/default.png"
try: try:
@ -77,15 +65,9 @@ async def draw_word_cloud(messages, config):
await AsyncHttpx.download_file(ttf_url, wordcloud_ttf) await AsyncHttpx.download_file(ttf_url, wordcloud_ttf)
except: except:
return False return False
if not wordcloud_stopwords_dir.exists():
stopword_url = 'https://ghproxy.com/https://raw.githubusercontent.com/HibiKier/zhenxun_bot/main/resources/text/wordcloud/stopwords.txt'
try:
await AsyncHttpx.download_file(stopword_url, wordcloud_stopwords_dir)
except:
return False
topK = min(int(len(messages)), 100000) topK = min(int(len(messages)), 100000)
read_name = jieba.analyse.extract_tags(await pre_precess(messages, wordcloud_stopwords_dir, config), topK=topK, read_name = jieba.analyse.extract_tags(await pre_precess(messages, config), topK=topK,
withWeight=True, withWeight=True,
allowPOS=()) allowPOS=())
name = [] name = []

File diff suppressed because it is too large Load Diff