zhenxun_bot/plugins/search_anime/data_source.py

53 lines
1.8 KiB
Python
Raw Normal View History

2021-05-20 19:23:32 +08:00
from lxml import etree
import feedparser
from urllib import parse
from services.log import logger
2021-11-23 21:44:59 +08:00
from utils.http_utils import AsyncHttpx
2022-05-21 13:15:53 +08:00
from typing import List, Union
2021-05-20 19:23:32 +08:00
import time
2022-05-21 13:15:53 +08:00
async def from_anime_get_info(key_word: str, max_: int) -> Union[str, List[str]]:
2021-05-20 19:23:32 +08:00
s_time = time.time()
2021-07-30 21:21:51 +08:00
url = "https://share.dmhy.org/topics/rss/rss.xml?keyword=" + parse.quote(key_word)
2021-05-20 19:23:32 +08:00
try:
2021-07-30 21:21:51 +08:00
repass = await get_repass(url, max_)
2021-05-20 19:23:32 +08:00
except Exception as e:
2022-05-21 13:15:53 +08:00
logger.error(f"发生了一些错误 {type(e)}{e}")
return "发生了一些错误!"
2021-11-04 16:11:50 +08:00
repass.insert(0, f"搜索 {key_word} 结果(耗时 {int(time.time() - s_time)} 秒):\n")
return repass
2021-05-20 19:23:32 +08:00
2021-11-04 16:11:50 +08:00
async def get_repass(url: str, max_: int) -> List[str]:
put_line = []
2021-11-23 21:44:59 +08:00
text = (await AsyncHttpx.get(url)).text
d = feedparser.parse(text)
max_ = max_ if max_ < len([e.link for e in d.entries]) else len([e.link for e in d.entries])
url_list = [e.link for e in d.entries][:max_]
for u in url_list:
try:
text = (await AsyncHttpx.get(u)).text
html = etree.HTML(text)
magent = html.xpath('.//a[@id="a_magnet"]/text()')[0]
title = html.xpath(".//h3/text()")[0]
item = html.xpath(
'//div[@class="info resource-info right"]/ul/li'
)
class_a = (
item[0]
.xpath("string(.)")[5:]
.strip()
.replace("\xa0", "")
.replace("\t", "")
)
size = item[3].xpath("string(.)")[5:].strip()
put_line.append(
"{}】| {}\n{}】| {}".format(class_a, title, size, magent)
)
except Exception as e:
logger.error(f"搜番发生错误 {type(e)}{e}")
2021-11-04 16:11:50 +08:00
return put_line
2021-07-30 21:21:51 +08:00