mirror of
https://github.com/zhenxun-org/zhenxun_bot.git
synced 2025-12-15 14:22:55 +08:00
Update data_source.py
This commit is contained in:
parent
98fb742d4d
commit
5eb0319ce5
@ -17,109 +17,111 @@ except ModuleNotFoundError:
|
|||||||
|
|
||||||
|
|
||||||
async def update_setu_img():
|
async def update_setu_img():
|
||||||
try:
|
async with aiohttp.ClientSession(headers=get_user_agent()) as session:
|
||||||
async with aiohttp.ClientSession(headers=get_user_agent()) as session:
|
for file_name in ['setu_url.json', 'setu_r18_url.json']:
|
||||||
for file_name in ['setu_url.json', 'setu_r18_url.json']:
|
if file_name == 'setu_url.json':
|
||||||
if file_name == 'setu_url.json':
|
json_name = 'setu_data.json'
|
||||||
json_name = 'setu_data.json'
|
path = '_setu/'
|
||||||
path = '_setu/'
|
rar_path = 'setu_rar/'
|
||||||
rar_path = 'setu_rar/'
|
else:
|
||||||
else:
|
json_name = 'r18_setu_data.json'
|
||||||
json_name = 'r18_setu_data.json'
|
path = '_r18/'
|
||||||
path = '_r18/'
|
rar_path = 'r18_rar/'
|
||||||
rar_path = 'r18_rar/'
|
if not os.path.exists(IMAGE_PATH + path):
|
||||||
if not os.path.exists(IMAGE_PATH + path):
|
os.mkdir(IMAGE_PATH + path)
|
||||||
os.mkdir(IMAGE_PATH + path)
|
if not os.path.exists(IMAGE_PATH + rar_path):
|
||||||
if not os.path.exists(IMAGE_PATH + rar_path):
|
os.mkdir(IMAGE_PATH + rar_path)
|
||||||
os.mkdir(IMAGE_PATH + rar_path)
|
try:
|
||||||
try:
|
data = json.load(open(TXT_PATH + json_name, encoding='utf8'))
|
||||||
data = json.load(open(TXT_PATH + json_name, encoding='utf8'))
|
if not data:
|
||||||
if not data:
|
|
||||||
data = {}
|
|
||||||
except (FileNotFoundError, TypeError):
|
|
||||||
data = {}
|
data = {}
|
||||||
_success = 0
|
except (FileNotFoundError, TypeError):
|
||||||
_similar = 0
|
data = {}
|
||||||
try:
|
_success = 0
|
||||||
with open(TXT_PATH + file_name, 'r', encoding='utf8') as f:
|
_similar = 0
|
||||||
txt_data = json.load(f)
|
try:
|
||||||
if not txt_data:
|
with open(TXT_PATH + file_name, 'r', encoding='utf8') as f:
|
||||||
continue
|
txt_data = json.load(f)
|
||||||
except (FileNotFoundError, ValueError):
|
if not txt_data:
|
||||||
continue
|
continue
|
||||||
total = len(txt_data)
|
except (FileNotFoundError, ValueError):
|
||||||
urls = [data[x]['img_url'] for x in data.keys()]
|
continue
|
||||||
for pid in txt_data.keys():
|
total = len(txt_data)
|
||||||
index = str(len(os.listdir(IMAGE_PATH + path)))
|
urls = [data[x]['img_url'] for x in data.keys()]
|
||||||
url = txt_data[pid]["img_url"].replace('img-master', 'img-original').replace('_master1200', '')
|
for pid in txt_data.keys():
|
||||||
if url in urls or txt_data[pid]["img_url"] in urls:
|
index = str(len(os.listdir(IMAGE_PATH + path)))
|
||||||
continue
|
url = txt_data[pid]["img_url"].replace('img-master', 'img-original').replace('_master1200', '')
|
||||||
logger.info(f'开始更新 index:{index} --> {url}')
|
if url in urls or txt_data[pid]["img_url"] in urls:
|
||||||
for _ in range(3):
|
continue
|
||||||
try:
|
logger.info(f'开始更新 index:{index} --> {url}')
|
||||||
async with session.get(url, proxy=get_local_proxy(), timeout=15) as response:
|
for _ in range(3):
|
||||||
if response.status == 200:
|
try:
|
||||||
async with aiofiles.open(IMAGE_PATH + rar_path + index + ".jpg", 'wb') as f:
|
async with session.get(url, proxy=get_local_proxy(), timeout=15) as response:
|
||||||
await f.write(await response.read())
|
if response.status == 200:
|
||||||
_success += 1
|
async with aiofiles.open(IMAGE_PATH + rar_path + index + ".jpg", 'wb') as f:
|
||||||
else:
|
await f.write(await response.read())
|
||||||
logger.info(f'{url} 不存在,使用更新原url')
|
_success += 1
|
||||||
url = txt_data[pid]["img_url"]
|
|
||||||
async with session.get(txt_data[pid]["img_url"], proxy=get_local_proxy(),
|
|
||||||
timeout=15) as response:
|
|
||||||
if response.status == 200:
|
|
||||||
async with aiofiles.open(IMAGE_PATH + rar_path + index + ".jpg", 'wb') as f:
|
|
||||||
await f.write(await response.read())
|
|
||||||
_success += 1
|
|
||||||
try:
|
|
||||||
if os.path.getsize(IMAGE_PATH + rar_path + str(index) + ".jpg") > 1024 * 1024 * 1.5:
|
|
||||||
rar_imgs(
|
|
||||||
rar_path,
|
|
||||||
path,
|
|
||||||
in_file_name=index,
|
|
||||||
out_file_name=index
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info('不需要压缩,移动图片 ' + IMAGE_PATH + rar_path + index + ".jpg --> "
|
|
||||||
+ IMAGE_PATH + path + index + ".jpg")
|
|
||||||
os.rename(IMAGE_PATH + rar_path + index + ".jpg",
|
|
||||||
IMAGE_PATH + path + index + ".jpg")
|
|
||||||
except FileNotFoundError:
|
|
||||||
_success -= 1
|
|
||||||
continue
|
|
||||||
img_hash = str(get_img_hash(f'{IMAGE_PATH}{path}{index}.jpg'))
|
|
||||||
if img_hash in [data[x]['img_hash'] for x in data.keys()]:
|
|
||||||
logger.info(f'index:{index} 与 '
|
|
||||||
f'{[data[x]["img_hash"] for x in data.keys()].index(img_hash)} 存在重复,删除')
|
|
||||||
os.remove(IMAGE_PATH + path + index + ".jpg")
|
|
||||||
_similar += 1
|
|
||||||
else:
|
else:
|
||||||
data[index] = {
|
logger.info(f'{url} 不存在,使用更新原url')
|
||||||
'title': txt_data[pid]['title'],
|
url = txt_data[pid]["img_url"]
|
||||||
'author': txt_data[pid]['author'],
|
async with session.get(txt_data[pid]["img_url"], proxy=get_local_proxy(),
|
||||||
'pid': txt_data[pid]['pid'],
|
timeout=15) as response:
|
||||||
'img_hash': img_hash,
|
if response.status == 200:
|
||||||
'img_url': url,
|
async with aiofiles.open(IMAGE_PATH + rar_path + index + ".jpg", 'wb') as f:
|
||||||
'tags': txt_data[pid]['tags'],
|
await f.write(await response.read())
|
||||||
}
|
_success += 1
|
||||||
break
|
try:
|
||||||
except (TimeoutError, ClientConnectorError) as e:
|
if os.path.getsize(IMAGE_PATH + rar_path + str(index) + ".jpg") > 1024 * 1024 * 1.5:
|
||||||
logger.warning(f'{url} 更新失败 ..{type(e)}:{e}')
|
rar_imgs(
|
||||||
|
rar_path,
|
||||||
|
path,
|
||||||
|
in_file_name=index,
|
||||||
|
out_file_name=index
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info('不需要压缩,移动图片 ' + IMAGE_PATH + rar_path + index + ".jpg --> "
|
||||||
|
+ IMAGE_PATH + path + index + ".jpg")
|
||||||
|
os.rename(IMAGE_PATH + rar_path + index + ".jpg",
|
||||||
|
IMAGE_PATH + path + index + ".jpg")
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.warning(f'文件 {index}.jpg 不存在,跳过...')
|
||||||
|
_success -= 1
|
||||||
continue
|
continue
|
||||||
with open(TXT_PATH + json_name, 'w', encoding='utf-8') as f:
|
img_hash = str(get_img_hash(f'{IMAGE_PATH}{path}{index}.jpg'))
|
||||||
json.dump(data, f, indent=4, ensure_ascii=False)
|
if img_hash in [data[x]['img_hash'] for x in data.keys()]:
|
||||||
open(TXT_PATH + file_name, 'w')
|
logger.info(f'index:{index} 与 '
|
||||||
logger.info(
|
f'{[data[x]["img_hash"] for x in data.keys()].index(img_hash)} 存在重复,删除')
|
||||||
f'{str(datetime.now()).split(".")[0]} 更新 {file_name.split(".")[0]}完成,预计更新 {total} 张,'
|
os.remove(IMAGE_PATH + path + index + ".jpg")
|
||||||
f'实际更新 {_success} 张,相似 {_similar} 张,实际存入 {_success - _similar} 张')
|
_similar += 1
|
||||||
await get_bot().send_private_msg(
|
else:
|
||||||
user_id=int(list(get_bot().config.superusers)[0]),
|
data[index] = {
|
||||||
message=f'{str(datetime.now()).split(".")[0]} 更新{file_name.split(".")[0]}完成,预计更新 {total} 张,'
|
'title': txt_data[pid]['title'],
|
||||||
f'实际更新 {_success} 张,相似 {_similar} 张,实际存入 {_success - _similar} 张'
|
'author': txt_data[pid]['author'],
|
||||||
)
|
'pid': txt_data[pid]['pid'],
|
||||||
except Exception as e:
|
'img_hash': img_hash,
|
||||||
await get_bot().send_private_msg(
|
'img_url': url,
|
||||||
user_id=int(list(get_bot().config.superusers)[0]),
|
'tags': txt_data[pid]['tags'],
|
||||||
message=f'更新色图错误 {type(e)}: {e}'
|
}
|
||||||
)
|
break
|
||||||
logger.error(f'更新色图错误 {type(e)}: {e}')
|
except (TimeoutError, ClientConnectorError) as e:
|
||||||
|
logger.warning(f'{url} 更新失败 ..{type(e)}:{e}')
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
await get_bot().send_private_msg(
|
||||||
|
user_id=int(list(get_bot().config.superusers)[0]),
|
||||||
|
message=f'更新 {index}.jpg 色图错误 {type(e)}: {e}'
|
||||||
|
)
|
||||||
|
_success -= 1
|
||||||
|
logger.error(f'更新色图 {index}.jpg 错误 {type(e)}: {e}')
|
||||||
|
continue
|
||||||
|
with open(TXT_PATH + json_name, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, indent=4, ensure_ascii=False)
|
||||||
|
open(TXT_PATH + file_name, 'w')
|
||||||
|
logger.info(
|
||||||
|
f'{str(datetime.now()).split(".")[0]} 更新 {file_name.split(".")[0]}完成,预计更新 {total} 张,'
|
||||||
|
f'实际更新 {_success} 张,相似 {_similar} 张,实际存入 {_success - _similar} 张')
|
||||||
|
await get_bot().send_private_msg(
|
||||||
|
user_id=int(list(get_bot().config.superusers)[0]),
|
||||||
|
message=f'{str(datetime.now()).split(".")[0]} 更新{file_name.split(".")[0]}完成,预计更新 {total} 张,'
|
||||||
|
f'实际更新 {_success} 张,相似 {_similar} 张,实际存入 {_success - _similar} 张'
|
||||||
|
)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user