zhenxun_bot/plugins/send_setu_/update_setu/data_source.py

172 lines
7.1 KiB
Python
Raw Normal View History

2021-11-23 21:44:59 +08:00
from configs.path_config import IMAGE_PATH, TEXT_PATH, TEMP_PATH
from services.log import logger
from datetime import datetime
from utils.image_utils import compressed_image, get_img_hash
from utils.utils import get_bot
2022-01-05 22:32:59 +08:00
from PIL import UnidentifiedImageError
2022-02-19 18:20:19 +08:00
from .._model import Setu
2021-11-23 21:44:59 +08:00
from asyncpg.exceptions import UniqueViolationError
from configs.config import Config
from utils.http_utils import AsyncHttpx
from nonebot import Driver
import nonebot
import os
import ujson as json
import shutil
driver: Driver = nonebot.get_driver()
2022-02-19 18:20:19 +08:00
_path = IMAGE_PATH
2021-11-23 21:44:59 +08:00
# 替换旧色图数据修复local_id一直是50的问题
@driver.on_startup
async def update_old_setu_data():
2022-02-19 18:20:19 +08:00
path = TEXT_PATH
2021-11-23 21:44:59 +08:00
setu_data_file = path / "setu_data.json"
r18_data_file = path / "r18_setu_data.json"
if setu_data_file.exists() or r18_data_file.exists():
index = 0
r18_index = 0
count = 0
fail_count = 0
for file in [setu_data_file, r18_data_file]:
if file.exists():
data = json.load(open(file, "r", encoding="utf8"))
for x in data:
if file == setu_data_file:
idx = index
2021-12-01 14:03:34 +08:00
if "R-18" in data[x]["tags"]:
data[x]["tags"].remove("R-18")
2021-11-23 21:44:59 +08:00
else:
idx = r18_index
img_url = (
data[x]["img_url"].replace("i.pixiv.cat", "i.pximg.net")
if "i.pixiv.cat" in data[x]["img_url"]
else data[x]["img_url"]
)
# idx = r18_index if 'R-18' in data[x]["tags"] else index
try:
await Setu.add_setu_data(
idx,
data[x]["title"],
data[x]["author"],
data[x]["pid"],
data[x]["img_hash"],
img_url,
",".join(data[x]["tags"]),
)
count += 1
2021-12-01 14:03:34 +08:00
if "R-18" in data[x]["tags"]:
2021-11-23 21:44:59 +08:00
r18_index += 1
else:
index += 1
logger.info(f'添加旧色图数据成功 PID{data[x]["pid"]} index{idx}....')
except UniqueViolationError:
fail_count += 1
2021-12-01 14:03:34 +08:00
logger.info(
f'添加旧色图数据失败,色图重复 PID{data[x]["pid"]} index{idx}....'
)
2021-11-23 21:44:59 +08:00
file.unlink()
setu_url_path = path / "setu_url.json"
setu_r18_url_path = path / "setu_r18_url.json"
if setu_url_path.exists():
setu_url_path.unlink()
if setu_r18_url_path.exists():
setu_r18_url_path.unlink()
logger.info(f"更新旧色图数据完成,成功更新数据:{count} 条,累计失败:{fail_count}")
# 删除色图rar文件夹
2022-02-19 18:20:19 +08:00
shutil.rmtree(IMAGE_PATH / "setu_rar", ignore_errors=True)
shutil.rmtree(IMAGE_PATH / "r18_rar", ignore_errors=True)
shutil.rmtree(IMAGE_PATH / "rar", ignore_errors=True)
2021-11-23 21:44:59 +08:00
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6;"
" rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Referer": "https://www.pixiv.net",
}
2022-01-16 14:52:50 +08:00
async def update_setu_img(flag: bool = False):
"""
更新色图
:param flag: 是否手动更新
"""
2021-11-23 21:44:59 +08:00
image_list = await Setu.get_all_setu()
image_list.reverse()
_success = 0
error_info = []
error_type = []
count = 0
for image in image_list:
count += 1
path = _path / "_r18" if image.is_r18 else _path / "_setu"
local_image = path / f"{image.local_id}.jpg"
path.mkdir(exist_ok=True, parents=True)
2022-02-19 18:20:19 +08:00
TEMP_PATH.mkdir(exist_ok=True, parents=True)
2021-11-23 21:44:59 +08:00
if not local_image.exists() or not image.img_hash:
2022-02-19 18:20:19 +08:00
temp_file = TEMP_PATH / f"{image.local_id}.jpg"
2022-01-16 14:52:50 +08:00
if temp_file.exists():
temp_file.unlink()
2021-11-23 21:44:59 +08:00
url_ = image.img_url
ws_url = Config.get_config("pixiv", "PIXIV_NGINX_URL")
if ws_url:
2021-12-01 14:03:34 +08:00
url_ = url_.replace("i.pximg.net", ws_url).replace(
"i.pixiv.cat", ws_url
)
2021-11-23 21:44:59 +08:00
try:
2021-12-01 14:03:34 +08:00
if not await AsyncHttpx.download_file(
2022-02-19 18:20:19 +08:00
url_, TEMP_PATH / f"{image.local_id}.jpg"
2021-12-01 14:03:34 +08:00
):
2021-11-23 21:44:59 +08:00
continue
_success += 1
try:
if (
os.path.getsize(
2022-02-19 18:20:19 +08:00
TEMP_PATH / f"{image.local_id}.jpg",
2021-11-23 21:44:59 +08:00
)
> 1024 * 1024 * 1.5
):
compressed_image(
2022-02-19 18:20:19 +08:00
TEMP_PATH / f"{image.local_id}.jpg",
2021-12-01 14:03:34 +08:00
path / f"{image.local_id}.jpg",
2021-11-23 21:44:59 +08:00
)
else:
logger.info(
2022-02-19 18:20:19 +08:00
f"不需要压缩,移动图片{TEMP_PATH}/{image.local_id}.jpg "
2021-11-23 21:44:59 +08:00
f"--> /{path}/{image.local_id}.jpg"
)
os.rename(
2022-04-26 14:45:04 +08:00
TEMP_PATH / f"/{image.local_id}.jpg",
2022-02-19 18:20:19 +08:00
path / f"{image.local_id}.jpg",
2021-11-23 21:44:59 +08:00
)
except FileNotFoundError:
logger.warning(f"文件 {image.local_id}.jpg 不存在,跳过...")
continue
2021-12-01 14:03:34 +08:00
img_hash = str(get_img_hash(f"{path}/{image.local_id}.jpg"))
await Setu.update_setu_data(image.pid, img_hash=img_hash)
2022-01-05 22:32:59 +08:00
except UnidentifiedImageError:
# 图片已删除
with open(local_image, 'r') as f:
if '404 Not Found' in f.read():
max_num = await Setu.delete_image(image.pid)
local_image.unlink()
os.rename(path / f"{max_num}.jpg", local_image)
logger.warning(f"更新色图 PID{image.pid} 404已删除并替换")
2021-11-23 21:44:59 +08:00
except Exception as e:
_success -= 1
logger.error(f"更新色图 {image.local_id}.jpg 错误 {type(e)}: {e}")
if type(e) not in error_type:
error_type.append(type(e))
2021-12-01 14:03:34 +08:00
error_info.append(f"更新色图 {image.local_id}.jpg 错误 {type(e)}: {e}")
2021-11-23 21:44:59 +08:00
else:
2021-12-01 14:03:34 +08:00
logger.info(f"更新色图 {image.local_id}.jpg 已存在")
2022-01-16 14:52:50 +08:00
if _success or error_info or flag:
2021-11-23 21:44:59 +08:00
await get_bot().send_private_msg(
user_id=int(list(get_bot().config.superusers)[0]),
2021-12-01 14:03:34 +08:00
message=f'{str(datetime.now()).split(".")[0]} 更新 色图 完成,本地存在 {count} 张,实际更新 {_success} 张,'
f"以下为更新时未知错误:\n" + "\n".join(error_info),
2021-11-23 21:44:59 +08:00
)