zhenxun_bot/plugins/update_setu/data_source.py

165 lines
7.4 KiB
Python
Raw Normal View History

2021-10-03 14:24:07 +08:00
from configs.path_config import IMAGE_PATH, TEXT_PATH
2021-05-20 19:25:51 +08:00
from services.log import logger
from datetime import datetime
2021-07-30 21:21:51 +08:00
from utils.image_utils import compressed_image, get_img_hash
2021-06-30 19:50:55 +08:00
from utils.utils import get_bot, get_local_proxy
2021-05-20 19:25:51 +08:00
from asyncio.exceptions import TimeoutError
2021-07-30 21:21:51 +08:00
from models.setu import Setu
from aiohttp.client_exceptions import ClientConnectorError
from asyncpg.exceptions import UniqueViolationError
from pathlib import Path
from nonebot import Driver
import nonebot
2021-05-20 19:25:51 +08:00
import aiofiles
import aiohttp
2021-07-30 21:21:51 +08:00
import os
import ujson as json
driver: Driver = nonebot.get_driver()
_path = Path(IMAGE_PATH)
2021-06-15 10:57:08 +08:00
2021-07-30 21:21:51 +08:00
2021-10-03 14:24:07 +08:00
# 替换旧色图数据修复local_id一直是50的问题
2021-07-30 21:21:51 +08:00
@driver.on_startup
async def update_old_setu_data():
2021-10-03 14:24:07 +08:00
path = Path(TEXT_PATH)
2021-07-30 21:21:51 +08:00
setu_data_file = path / "setu_data.json"
r18_data_file = path / "r18_setu_data.json"
2021-08-04 15:19:45 +08:00
if setu_data_file.exists() or r18_data_file.exists():
index = 0
r18_index = 0
count = 0
fail_count = 0
for file in [setu_data_file, r18_data_file]:
if file.exists():
data = json.load(open(file, "r", encoding="utf8"))
for x in data:
if file == setu_data_file:
idx = index
if 'R-18' in data[x]["tags"]:
data[x]["tags"].remove('R-18')
2021-07-30 21:21:51 +08:00
else:
2021-08-04 15:19:45 +08:00
idx = r18_index
img_url = (
data[x]["img_url"].replace("i.pixiv.cat", "i.pximg.net")
if "i.pixiv.cat" in data[x]["img_url"]
else data[x]["img_url"]
)
# idx = r18_index if 'R-18' in data[x]["tags"] else index
try:
await Setu.add_setu_data(
idx,
data[x]["title"],
data[x]["author"],
data[x]["pid"],
data[x]["img_hash"],
img_url,
",".join(data[x]["tags"]),
)
count += 1
if 'R-18' in data[x]["tags"]:
r18_index += 1
else:
index += 1
logger.info(f'添加旧色图数据成功 PID{data[x]["pid"]} index{idx}....')
except UniqueViolationError:
fail_count += 1
logger.info(f'添加旧色图数据失败,色图重复 PID{data[x]["pid"]} index{idx}....')
file.unlink()
setu_url_path = path / "setu_url.json"
setu_r18_url_path = path / "setu_r18_url.json"
if setu_url_path.exists():
setu_url_path.unlink()
if setu_r18_url_path.exists():
setu_r18_url_path.unlink()
logger.info(f"更新旧色图数据完成,成功更新数据:{count} 条,累计失败:{fail_count}")
2021-07-30 21:21:51 +08:00
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6;"
" rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Referer": "https://www.pixiv.net",
}
2021-05-20 19:25:51 +08:00
async def update_setu_img():
2021-07-30 21:21:51 +08:00
image_list = await Setu.get_all_setu()
image_list.reverse()
_success = 0
error_info = []
error_type = []
2021-08-10 23:03:46 +08:00
count = 0
2021-07-30 21:21:51 +08:00
async with aiohttp.ClientSession(headers=headers) as session:
for image in image_list:
2021-08-10 23:03:46 +08:00
count += 1
2021-07-30 21:21:51 +08:00
path = _path / "_r18" if image.is_r18 else _path / "_setu"
2021-08-17 23:17:08 +08:00
rar_path = _path / "r18_rar" if image.is_r18 else _path / "rar"
2021-07-30 21:21:51 +08:00
local_image = path / f"{image.local_id}.jpg"
2021-08-17 23:17:08 +08:00
path.mkdir(exist_ok=True, parents=True)
rar_path.mkdir(exist_ok=True, parents=True)
2021-08-10 23:03:46 +08:00
if not local_image.exists() or not image.img_hash:
2021-06-17 10:21:32 +08:00
for _ in range(3):
try:
2021-07-30 21:21:51 +08:00
async with session.get(
image.img_url, proxy=get_local_proxy(), timeout=30
) as response:
2021-06-17 10:21:32 +08:00
if response.status == 200:
2021-07-30 21:21:51 +08:00
async with aiofiles.open(
2021-08-17 23:17:08 +08:00
rar_path / f'{image.local_id}.jpg',
2021-07-30 21:21:51 +08:00
"wb",
) as f:
2021-06-17 10:21:32 +08:00
await f.write(await response.read())
_success += 1
2021-07-30 21:21:51 +08:00
try:
if (
os.path.getsize(
2021-08-17 23:17:08 +08:00
rar_path / f'{image.local_id}.jpg',
2021-07-30 21:21:51 +08:00
)
> 1024 * 1024 * 1.5
):
compressed_image(
2021-08-17 23:17:08 +08:00
rar_path / f"{image.local_id}.jpg",
path / f"{image.local_id}.jpg"
2021-07-30 21:21:51 +08:00
)
else:
logger.info(
2021-08-17 23:17:08 +08:00
f"不需要压缩,移动图片{rar_path}/{image.local_id}.jpg "
2021-07-30 21:21:51 +08:00
f"--> /{path}/{image.local_id}.jpg"
)
os.rename(
2021-08-17 23:17:08 +08:00
f"{rar_path}/{image.local_id}.jpg",
2021-07-30 21:21:51 +08:00
f"{path}/{image.local_id}.jpg",
)
except FileNotFoundError:
logger.warning(f"文件 {image.local_id}.jpg 不存在,跳过...")
continue
img_hash = str(
get_img_hash(
f"{path}/{image.local_id}.jpg"
)
2021-06-17 10:21:32 +08:00
)
2021-07-30 21:21:51 +08:00
await Setu.update_setu_data(
image.pid, img_hash=img_hash
)
break
2021-06-17 10:21:32 +08:00
except (TimeoutError, ClientConnectorError) as e:
2021-07-30 21:21:51 +08:00
logger.warning(f"{image.local_id}.jpg 更新失败 ..{type(e)}{e}")
2021-06-17 10:21:32 +08:00
except Exception as e:
2021-07-30 21:21:51 +08:00
logger.error(f"更新色图 {image.local_id}.jpg 错误 {type(e)}: {e}")
if type(e) not in error_type:
error_type.append(type(e))
error_info.append(
f"更新色图 {image.local_id}.jpg 错误 {type(e)}: {e}"
)
2021-08-04 15:19:45 +08:00
else:
logger.info(f'更新色图 {image.local_id}.jpg 已存在')
2021-08-10 23:03:46 +08:00
error_info = ['无报错..'] if not error_info else error_info
2021-07-30 21:21:51 +08:00
await get_bot().send_private_msg(
user_id=int(list(get_bot().config.superusers)[0]),
2021-08-11 10:41:40 +08:00
message=f'{str(datetime.now()).split(".")[0]} 更新 色图 完成,本地存在 {count} 张,实际更新 {_success} 张,以下为更新时未知错误:\n'
2021-07-30 21:21:51 +08:00
+ "\n".join(error_info),
)