zhenxun_bot/plugins/pixiv/data_source.py

99 lines
3.5 KiB
Python
Raw Normal View History

2021-05-20 19:23:32 +08:00
import aiohttp
import aiofiles
from configs.path_config import IMAGE_PATH
2021-06-30 19:50:55 +08:00
from utils.utils import get_local_proxy
from utils.user_agent import get_user_agent
2021-05-20 19:23:32 +08:00
from bs4 import BeautifulSoup
import feedparser
2021-07-30 21:21:51 +08:00
from utils.message_builder import image
2021-05-20 19:23:32 +08:00
from asyncio.exceptions import TimeoutError
2021-06-17 19:22:07 +08:00
from configs.config import RSSHUBAPP
from aiohttp.client_exceptions import ClientConnectorError
2021-05-20 19:23:32 +08:00
import platform
2021-07-30 21:21:51 +08:00
if platform.system() == "Windows":
2021-05-20 19:23:32 +08:00
import asyncio
2021-07-30 21:21:51 +08:00
2021-05-20 19:23:32 +08:00
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
2021-07-30 21:21:51 +08:00
async def get_pixiv_urls(mode: str, num: int = 5, date: str = "") -> "list, list, int":
url = f"{RSSHUBAPP}/pixiv/ranking/{mode}"
2021-05-20 19:23:32 +08:00
if date:
2021-07-30 21:21:51 +08:00
url += f"/{date}"
2021-06-17 19:22:07 +08:00
try:
return await parser_data(url, num)
except ClientConnectorError:
2021-07-21 23:11:53 +08:00
return await get_pixiv_urls(mode, num, date)
2021-05-20 19:23:32 +08:00
async def download_pixiv_imgs(urls: list, user_id: int) -> str:
2021-07-30 21:21:51 +08:00
result = ""
2021-05-20 19:23:32 +08:00
index = 0
for img in urls:
async with aiohttp.ClientSession(headers=get_user_agent()) as session:
for _ in range(3):
2021-07-30 21:21:51 +08:00
async with session.get(
img, proxy=get_local_proxy(), timeout=2
) as response:
async with aiofiles.open(
IMAGE_PATH + f"temp/{user_id}_{index}_pixiv.jpg", "wb"
) as f:
2021-05-20 19:23:32 +08:00
try:
await f.write(await response.read())
2021-07-30 21:21:51 +08:00
result += image(f"{user_id}_{index}_pixiv.jpg", "temp")
2021-05-20 19:23:32 +08:00
index += 1
break
except TimeoutError:
# result += '\n这张图下载失败了..\n'
pass
else:
2021-07-30 21:21:51 +08:00
result += "\n这张图下载失败了..\n"
2021-05-20 19:23:32 +08:00
return result
2021-07-30 21:21:51 +08:00
async def search_pixiv_urls(
keyword: str, num: int, order: str, r18: int
) -> "list, list":
url = f"{RSSHUBAPP}/pixiv/search/{keyword}/{order}/{r18}"
2021-05-20 19:23:32 +08:00
return await parser_data(url, num)
2021-07-30 21:21:51 +08:00
async def parser_data(url: str, num: int) -> "list, list, int":
2021-05-20 19:23:32 +08:00
text_list = []
urls = []
async with aiohttp.ClientSession() as session:
for _ in range(3):
try:
2021-07-30 21:21:51 +08:00
async with session.get(
url, proxy=get_local_proxy(), timeout=2
) as response:
data = feedparser.parse(await response.text())["entries"]
2021-05-20 19:23:32 +08:00
break
except TimeoutError:
pass
else:
2021-07-30 21:21:51 +08:00
return ["网络不太好,也许过一会就好了"], [], 998
2021-05-20 19:23:32 +08:00
try:
if len(data) == 0:
2021-07-30 21:21:51 +08:00
return ["没有搜索到喔"], [], 997
2021-05-20 19:23:32 +08:00
if num > len(data):
num = len(data)
data = data[:num]
for data in data:
2021-07-30 21:21:51 +08:00
soup = BeautifulSoup(data["summary"], "lxml")
title = "标题:" + data["title"]
pl = soup.find_all("p")
author = pl[0].text.split("-")[0].strip()
2021-05-20 19:23:32 +08:00
imgs = []
2021-07-30 21:21:51 +08:00
text_list.append(f"{title}\n{author}\n")
2021-05-20 19:23:32 +08:00
for p in pl[1:]:
2021-07-30 21:21:51 +08:00
imgs.append(p.find("img").get("src"))
2021-05-20 19:23:32 +08:00
urls.append(imgs)
except ValueError:
2021-07-30 21:21:51 +08:00
return ["是网站坏了啊,也许过一会就好了"], [], 999
2021-05-20 19:23:32 +08:00
return text_list, urls, 200
# asyncio.get_event_loop().run_until_complete(get_pixiv_urls('day'))