zhenxun_bot/plugins/pix_gallery/_data_source.py

397 lines
13 KiB
Python
Raw Normal View History

2022-02-19 18:20:19 +08:00
from asyncpg.exceptions import UniqueViolationError
from ._model.omega_pixiv_illusts import OmegaPixivIllusts
from asyncio.locks import Semaphore
from asyncio.exceptions import TimeoutError
from ._model.pixiv import Pixiv
from typing import List, Optional
from utils.utils import change_pixiv_image_links
from utils.image_utils import BuildImage
from utils.http_utils import AsyncHttpx
from services.log import logger
from configs.config import Config
from configs.path_config import TEMP_PATH
import aiofiles
import platform
import asyncio
import math
try:
import ujson as json
except ModuleNotFoundError:
import json
if str(platform.system()).lower() == "windows":
policy = asyncio.WindowsSelectorEventLoopPolicy()
asyncio.set_event_loop_policy(policy)
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6;"
" rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Referer": "https://www.pixiv.net",
}
HIBIAPI = Config.get_config("hibiapi", "HIBIAPI")
if not HIBIAPI:
HIBIAPI = "https://api.obfs.dev"
HIBIAPI = HIBIAPI[:-1] if HIBIAPI[-1] == "/" else HIBIAPI
async def start_update_image_url(
current_keyword: List[str], black_pid: List[str]
) -> "int, int":
"""
开始更新图片url
:param current_keyword: 关键词
:param black_pid: 黑名单pid
:return: pid数量和图片数量
"""
global HIBIAPI
pid_count = 0
pic_count = 0
tasks = []
semaphore = asyncio.Semaphore(10)
for keyword in current_keyword:
for page in range(1, 110):
if keyword.startswith("uid:"):
url = f"{HIBIAPI}/api/pixiv/member_illust"
params = {"id": keyword[4:], "page": page}
if page == 30:
break
elif keyword.startswith("pid:"):
url = f"{HIBIAPI}/api/pixiv/illust"
params = {"id": keyword[4:]}
else:
url = f"{HIBIAPI}/api/pixiv/search"
params = {"word": keyword, "page": page}
tasks.append(
asyncio.ensure_future(
search_image(
url, keyword, params, semaphore, page, black_pid
)
)
)
if keyword.startswith("pid:"):
break
result = await asyncio.gather(*tasks)
for x in result:
pid_count += x[0]
pic_count += x[1]
return pid_count, pic_count
async def search_image(
url: str,
keyword: str,
params: dict,
semaphore: Semaphore,
page: int = 1,
black: List[str] = None,
) -> "int, int":
"""
搜索图片
:param url: 搜索url
:param keyword: 关键词
:param params: params参数
:param semaphore: semaphore
:param page: 页面
:param black: pid黑名单
:return: pid数量和图片数量
"""
tmp_pid = []
pic_count = 0
pid_count = 0
async with semaphore:
try:
data = (await AsyncHttpx.get(url, params=params)).json()
if (
not data
or data.get("error")
or (not data.get("illusts") and not data.get("illust"))
):
return 0, 0
if url != f"{HIBIAPI}/api/pixiv/illust":
logger.info(f'{keyword}: 获取数据成功...数据总量:{len(data["illusts"])}')
data = data["illusts"]
else:
logger.info(f'获取数据成功...PID{params.get("id")}')
data = [data["illust"]]
img_data = {}
for x in data:
pid = x["id"]
title = x["title"]
width = x["width"]
height = x["height"]
view = x["total_view"]
bookmarks = x["total_bookmarks"]
uid = x["user"]["id"]
author = x["user"]["name"]
tags = []
for tag in x["tags"]:
for i in tag:
if tag[i]:
tags.append(tag[i])
img_urls = []
if x["page_count"] == 1:
img_urls.append(x["meta_single_page"]["original_image_url"])
else:
for urls in x["meta_pages"]:
img_urls.append(urls["image_urls"]["original"])
if (
(
bookmarks
>= Config.get_config("pix", "SEARCH_HIBIAPI_BOOKMARKS")
or (
url == f"{HIBIAPI}/api/pixiv/member_illust"
and bookmarks >= 1500
)
or (url == f"{HIBIAPI}/api/pixiv/illust")
)
and len(img_urls) < 10
and _check_black(img_urls, black)
):
img_data[pid] = {
"pid": pid,
"title": title,
"width": width,
"height": height,
"view": view,
"bookmarks": bookmarks,
"img_urls": img_urls,
"uid": uid,
"author": author,
"tags": tags,
}
else:
continue
for x in img_data.keys():
data = img_data[x]
for img_url in data["img_urls"]:
img_p = img_url[img_url.rfind("_") + 1 : img_url.rfind(".")]
try:
if await Pixiv.add_image_data(
data["pid"],
data["title"],
data["width"],
data["height"],
data["view"],
data["bookmarks"],
img_url,
img_p,
data["uid"],
data["author"],
",".join(data["tags"]),
):
if data["pid"] not in tmp_pid:
pid_count += 1
tmp_pid.append(data["pid"])
pic_count += 1
logger.info(f'存储图片PID{data["pid"]} IMG_P{img_p}')
except UniqueViolationError:
logger.warning(f'{data["pid"]} | {img_url} 已存在...')
except Exception as e:
logger.warning(f"PIX在线搜索图片错误已再次调用 {type(e)}{e}")
await search_image(url, keyword, params, semaphore, page, black)
return pid_count, pic_count
async def get_image(img_url: str, user_id: int) -> Optional[str]:
"""
下载图片
:param img_url:
:param user_id:
:return: 图片名称
"""
if "https://www.pixiv.net/artworks" in img_url:
pid = img_url.rsplit("/", maxsplit=1)[-1]
params = {"id": pid}
for _ in range(3):
try:
response = await AsyncHttpx.get(f"{HIBIAPI}/api/pixiv/illust", params=params)
if response.status_code == 200:
data = response.json()
if data.get("illust"):
if data["illust"]["page_count"] == 1:
img_url = data["illust"]["meta_single_page"][
"original_image_url"
]
else:
img_url = data["illust"]["meta_pages"][0][
"image_urls"
]["original"]
break
except TimeoutError:
pass
old_img_url = img_url
img_url = change_pixiv_image_links(
img_url, Config.get_config("pix", "PIX_IMAGE_SIZE"), Config.get_config("pixiv", "PIXIV_NGINX_URL")
)
old_img_url = change_pixiv_image_links(
old_img_url, None, Config.get_config("pixiv", "PIXIV_NGINX_URL")
)
for _ in range(3):
try:
response = await AsyncHttpx.get(img_url, headers=headers, timeout=Config.get_config("pix", "TIMEOUT"),)
if response.status_code == 404:
img_url = old_img_url
continue
async with aiofiles.open(
2022-03-01 20:26:11 +08:00
TEMP_PATH / f"pix_{user_id}_{img_url.split('/')[-1][:-4]}.jpg", "wb"
2022-02-19 18:20:19 +08:00
) as f:
await f.write(response.content)
2022-03-01 20:26:11 +08:00
return TEMP_PATH / f"pix_{user_id}_{img_url.split('/')[-1][:-4]}.jpg"
2022-02-19 18:20:19 +08:00
except TimeoutError:
logger.warning(f"PIX{img_url} 图片下载超时...")
pass
return None
async def uid_pid_exists(id_: str) -> bool:
"""
检测 pid/uid 是否有效
:param id_: pid/uid
"""
if id_.startswith("uid:"):
url = f"{HIBIAPI}/api/pixiv/member"
elif id_.startswith("pid:"):
url = f"{HIBIAPI}/api/pixiv/illust"
else:
return False
params = {"id": int(id_[4:])}
data = (await AsyncHttpx.get(url, params=params)).json()
if data.get("error"):
return False
return True
async def get_keyword_num(keyword: str) -> "int, int, int, int, int":
"""
查看图片相关 tag 数量
:param keyword: 关键词tag
"""
count, r18_count = await Pixiv.get_keyword_num(keyword.split())
count_, setu_count, r18_count_ = await OmegaPixivIllusts.get_keyword_num(
keyword.split()
)
return count, r18_count, count_, setu_count, r18_count_
async def remove_image(pid: int, img_p: str) -> bool:
"""
删除置顶图片
:param pid: pid
:param img_p: 图片 p p0p1
"""
if img_p:
if "p" not in img_p:
img_p = f"p{img_p}"
return await Pixiv.remove_image_data(pid, img_p)
def gen_keyword_pic(
_pass_keyword: List[str], not_pass_keyword: List[str], is_superuser: bool
):
"""
已通过或未通过的所有关键词/uid/pid
:param _pass_keyword: 通过列表
:param not_pass_keyword: 未通过列表
:param is_superuser: 是否超级用户
"""
_keyword = [
x
for x in _pass_keyword
if not x.startswith("uid:")
and not x.startswith("pid:")
and not x.startswith("black:")
]
_uid = [x for x in _pass_keyword if x.startswith("uid:")]
_pid = [x for x in _pass_keyword if x.startswith("pid:")]
_n_keyword = [
x
for x in not_pass_keyword
if not x.startswith("uid:")
and not x.startswith("pid:")
and not x.startswith("black:")
]
_n_uid = [
x
for x in not_pass_keyword
if x.startswith("uid:") and not x.startswith("black:")
]
_n_pid = [
x
for x in not_pass_keyword
if x.startswith("pid:") and not x.startswith("black:")
]
img_width = 0
img_data = {
"_keyword": {"width": 0, "data": _keyword},
"_uid": {"width": 0, "data": _uid},
"_pid": {"width": 0, "data": _pid},
"_n_keyword": {"width": 0, "data": _n_keyword},
"_n_uid": {"width": 0, "data": _n_uid},
"_n_pid": {"width": 0, "data": _n_pid},
}
for x in list(img_data.keys()):
img_data[x]["width"] = math.ceil(len(img_data[x]["data"]) / 40)
img_width += img_data[x]["width"] * 200
if not is_superuser:
img_width = (
img_width
- (
img_data["_n_keyword"]["width"]
+ img_data["_n_uid"]["width"]
+ img_data["_n_pid"]["width"]
)
* 200
)
del img_data["_n_keyword"]
del img_data["_n_pid"]
del img_data["_n_uid"]
current_width = 0
A = BuildImage(img_width, 1100)
for x in list(img_data.keys()):
if img_data[x]["data"]:
img = BuildImage(img_data[x]["width"] * 200, 1100, 200, 1100, font_size=40)
start_index = 0
end_index = 40
total_index = img_data[x]["width"] * 40
for _ in range(img_data[x]["width"]):
tmp = BuildImage(198, 1100, font_size=20)
text_img = BuildImage(198, 100, font_size=50)
key_str = "\n".join(
[key for key in img_data[x]["data"][start_index:end_index]]
)
tmp.text((10, 100), key_str)
if x.find("_n") == -1:
text_img.text((24, 24), "已收录")
else:
text_img.text((24, 24), "待收录")
tmp.paste(text_img, (0, 0))
start_index += 40
end_index = (
end_index + 40 if end_index + 40 <= total_index else total_index
)
background_img = BuildImage(200, 1100, color="#FFE4C4")
background_img.paste(tmp, (1, 1))
img.paste(background_img)
A.paste(img, (current_width, 0))
current_width += img_data[x]["width"] * 200
return A.pic2bs4()
def _check_black(img_urls: List[str], black: List[str]) -> bool:
"""
检测pid是否在黑名单中
:param img_urls: 图片img列表
:param black: 黑名单
:return:
"""
for b in black:
for img_url in img_urls:
if b in img_url:
return False
return True