zhenxun_bot/plugins/reimu/data_source.py

from lxml import etree
import time
import aiohttp
from services.log import logger
from configs.config import MAXINFO_REIMU
from utils.user_agent import get_user_agent
from utils.utils import get_local_proxy
from asyncio.exceptions import TimeoutError


async def from_reimu_get_info(key_word: str, page: int) -> str:
    if "miku" in key_word.lower():
        logger.warning("Taboo words are being triggered")
        return ''
    repass = ""
    url = f"https://blog.reimu.net/search/{key_word}/page/{page}"
    url_s = "https://blog.reimu.net/"
    # try:
    #     if key_word == "最近的存档":
    #         logger.debug("Now starting get the {}".format(url_s))
    #         repass = await get_repass(url_s)
    #     else:
    #         logger.debug("Now starting get the {}".format(url))
    #         repass = await get_repass(url)
    # except TimeoutError as e:
    #     logger.error("Timeout! {}".format(e))
    #
    # return repass


# async def get_repass(url: str) -> str:
#     repass = ""
#     info = "[Note]注意大部分资源解压密码为⑨\n"
#     fund = None
#     print(url)
#     async with aiohttp.ClientSession(headers=get_user_agent()) as session:
#         async with session.get(url, proxy=get_local_proxy(), timeout=15) as response:
#             html = etree.HTML(await response.text())
#
#             fund_l = html.xpath('//h1[@class="page-title"]/text()')
#             if fund_l:
#                 fund = fund_l[0]
#                 if fund == "未找到":
#                     return "老司机也找不到路了……"
#             else:
#                 pass
#
#             headers = html.xpath("//article/header/h2/a/text()")
#             urls = html.xpath("//article/header/h2/a/@href")
#             logger.debug("Now get {} post from search page".format(len(headers)))
#
#             headers_d = []
#             urls_d = []
#             for i, header in enumerate(headers):
#                 if check_need_list(header):
#                     headers_d.append(headers[i])
#                     urls_d.append(urls[i])
#                 else:
#                     logger.debug(
#                         "This title {} does not meet the requirements".format(header)
#                     )
#
#             header_len = len(headers_d)
#             logger.debug("Get {} post after processing".format(header_len))
#             if header_len > MAXINFO_REIMU:
#                 headers_d = headers_d[:MAXINFO_REIMU]
#                 urls_d = urls_d[:MAXINFO_REIMU]
#
#             for h_s, url_s in zip(headers_d, urls_d):
#                 if h_s != "审核结果存档":
#                     time.sleep(1.5)
#                     putline = await get_son_html_info(h_s, url_s)
#                     if putline:
#                         if repass:
#                             repass = "\n\n- - - - - - - - \n".join([repass, putline])
#                         else:
#                             repass = putline
#                 else:
#                     logger.info("审核归档页面已跳过")
#
#             if repass:
#                 repass = info + repass
#             return repass
#
#
# async def get_son_html_info(h_s, url_s) -> str:
#     repass = ""
#     logger.debug("Now starting get the {}".format(url_s))
#     async with aiohttp.ClientSession(headers=get_user_agent()) as session:
#         async with session.get(url_s, proxy=get_local_proxy(), timeout=15) as response:
#             html = etree.HTML(await response.text())
#             pres = html.xpath('//div[@class="entry-content"]/pre/text()')
#             a_texts = html.xpath('//div[@class="entry-content"]/pre//a/text()')
#             a_hrefs = html.xpath('//div[@class="entry-content"]/pre//a/@href')
#
#             if pres and a_texts and a_hrefs:
#                 while "" in pres:
#                     pres.remove("")
#
#                 repass = "【资源名称】 {}\n\n{}".format(h_s, pres[0].strip())
#                 for i, (a_t_s, a_h_s) in enumerate(zip(a_texts, a_hrefs)):
#                     a = "\n {}  {}  {} ".format(a_t_s, a_h_s, pres[i + 1].strip())
#                     repass += a
#             else:
#                 logger.warning("Not get putline from {}".format(url_s))
#
#             return repass