zhenxun_bot/plugins/bt/data_source.py

51 lines
1.9 KiB
Python
Raw Normal View History

2021-06-30 19:50:55 +08:00
from utils.user_agent import get_user_agent
2021-05-20 19:21:05 +08:00
import aiohttp
2021-11-04 16:11:50 +08:00
from configs.config import Config
2021-06-04 18:01:33 +08:00
from bs4 import BeautifulSoup
2021-06-30 19:50:55 +08:00
from utils.utils import get_local_proxy
2021-05-20 19:21:05 +08:00
import platform
2021-07-30 21:21:51 +08:00
if platform.system() == "Windows":
2021-05-20 19:21:05 +08:00
import asyncio
2021-07-30 21:21:51 +08:00
2021-05-20 19:21:05 +08:00
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
2021-07-30 21:21:51 +08:00
url = "http://www.eclzz.world"
2021-05-20 19:21:05 +08:00
2021-06-04 18:01:33 +08:00
async def get_bt_info(keyword: str, page: str):
async with aiohttp.ClientSession(headers=get_user_agent()) as session:
2021-07-30 21:21:51 +08:00
async with session.get(
f"{url}/s/{keyword}_rel_{page}.html", proxy=get_local_proxy(), timeout=5
) as response:
2021-06-04 18:01:33 +08:00
text = await response.text()
2021-07-30 21:21:51 +08:00
if text.find("大约0条结果") != -1:
2021-06-04 18:01:33 +08:00
return
2021-07-30 21:21:51 +08:00
soup = BeautifulSoup(text, "lxml")
item_lst = soup.find_all("div", {"class": "search-item"})
2021-11-04 16:11:50 +08:00
bt_max_num = Config.get_config("bt", "BT_MAX_NUM")
bt_max_num = bt_max_num if bt_max_num < len(item_lst) else len(item_lst)
for item in item_lst[:bt_max_num]:
2021-07-30 21:21:51 +08:00
divs = item.find_all("div")
title = (
str(divs[0].find("a").text)
.replace("<em>", "")
.replace("</em>", "")
.strip()
)
spans = divs[2].find_all("span")
2021-06-04 18:01:33 +08:00
itype = spans[0].text
2021-07-30 21:21:51 +08:00
create_time = spans[1].find("b").text
file_size = spans[2].find("b").text
link = await get_download_link(divs[0].find("a")["href"], session)
2021-06-04 18:01:33 +08:00
yield title, itype, create_time, file_size, link
2021-05-20 19:21:05 +08:00
2021-06-04 18:01:33 +08:00
async def get_download_link(_url: str, session) -> str:
2021-07-30 21:21:51 +08:00
async with session.get(
f"{url}{_url}", proxy=get_local_proxy(), timeout=30
) as response:
soup = BeautifulSoup(await response.text(), "lxml")
return soup.find("a", {"id": "down-url"})["href"]