mirror of
https://github.com/zhenxun-org/zhenxun_bot.git
synced 2025-12-15 14:22:55 +08:00
113 lines
4.3 KiB
Python
113 lines
4.3 KiB
Python
from lxml import etree
|
|
import time
|
|
import aiohttp
|
|
from services.log import logger
|
|
from configs.config import MAXINFO_REIMU
|
|
from utils.user_agent import get_user_agent
|
|
from utils.utils import get_local_proxy
|
|
from asyncio.exceptions import TimeoutError
|
|
|
|
|
|
async def from_reimu_get_info(key_word: str, page: int) -> str:
|
|
if "miku" in key_word.lower():
|
|
logger.warning("Taboo words are being triggered")
|
|
return ''
|
|
repass = ""
|
|
url = f"https://blog.reimu.net/search/{key_word}/page/{page}"
|
|
url_s = "https://blog.reimu.net/"
|
|
# try:
|
|
# if key_word == "最近的存档":
|
|
# logger.debug("Now starting get the {}".format(url_s))
|
|
# repass = await get_repass(url_s)
|
|
# else:
|
|
# logger.debug("Now starting get the {}".format(url))
|
|
# repass = await get_repass(url)
|
|
# except TimeoutError as e:
|
|
# logger.error("Timeout! {}".format(e))
|
|
#
|
|
# return repass
|
|
|
|
|
|
# async def get_repass(url: str) -> str:
|
|
# repass = ""
|
|
# info = "[Note]注意大部分资源解压密码为⑨\n"
|
|
# fund = None
|
|
# print(url)
|
|
# async with aiohttp.ClientSession(headers=get_user_agent()) as session:
|
|
# async with session.get(url, proxy=get_local_proxy(), timeout=15) as response:
|
|
# html = etree.HTML(await response.text())
|
|
#
|
|
# fund_l = html.xpath('//h1[@class="page-title"]/text()')
|
|
# if fund_l:
|
|
# fund = fund_l[0]
|
|
# if fund == "未找到":
|
|
# return "老司机也找不到路了……"
|
|
# else:
|
|
# pass
|
|
#
|
|
# headers = html.xpath("//article/header/h2/a/text()")
|
|
# urls = html.xpath("//article/header/h2/a/@href")
|
|
# logger.debug("Now get {} post from search page".format(len(headers)))
|
|
#
|
|
# headers_d = []
|
|
# urls_d = []
|
|
# for i, header in enumerate(headers):
|
|
# if check_need_list(header):
|
|
# headers_d.append(headers[i])
|
|
# urls_d.append(urls[i])
|
|
# else:
|
|
# logger.debug(
|
|
# "This title {} does not meet the requirements".format(header)
|
|
# )
|
|
#
|
|
# header_len = len(headers_d)
|
|
# logger.debug("Get {} post after processing".format(header_len))
|
|
# if header_len > MAXINFO_REIMU:
|
|
# headers_d = headers_d[:MAXINFO_REIMU]
|
|
# urls_d = urls_d[:MAXINFO_REIMU]
|
|
#
|
|
# for h_s, url_s in zip(headers_d, urls_d):
|
|
# if h_s != "审核结果存档":
|
|
# time.sleep(1.5)
|
|
# putline = await get_son_html_info(h_s, url_s)
|
|
# if putline:
|
|
# if repass:
|
|
# repass = "\n\n- - - - - - - - \n".join([repass, putline])
|
|
# else:
|
|
# repass = putline
|
|
# else:
|
|
# logger.info("审核归档页面已跳过")
|
|
#
|
|
# if repass:
|
|
# repass = info + repass
|
|
# return repass
|
|
#
|
|
#
|
|
# async def get_son_html_info(h_s, url_s) -> str:
|
|
# repass = ""
|
|
# logger.debug("Now starting get the {}".format(url_s))
|
|
# async with aiohttp.ClientSession(headers=get_user_agent()) as session:
|
|
# async with session.get(url_s, proxy=get_local_proxy(), timeout=15) as response:
|
|
# html = etree.HTML(await response.text())
|
|
# pres = html.xpath('//div[@class="entry-content"]/pre/text()')
|
|
# a_texts = html.xpath('//div[@class="entry-content"]/pre//a/text()')
|
|
# a_hrefs = html.xpath('//div[@class="entry-content"]/pre//a/@href')
|
|
#
|
|
# if pres and a_texts and a_hrefs:
|
|
# while "" in pres:
|
|
# pres.remove("")
|
|
#
|
|
# repass = "【资源名称】 {}\n\n{}".format(h_s, pres[0].strip())
|
|
# for i, (a_t_s, a_h_s) in enumerate(zip(a_texts, a_hrefs)):
|
|
# a = "\n {} {} {} ".format(a_t_s, a_h_s, pres[i + 1].strip())
|
|
# repass += a
|
|
# else:
|
|
# logger.warning("Not get putline from {}".format(url_s))
|
|
#
|
|
# return repass
|
|
|
|
|
|
|
|
|
|
|