Update data_source.py

This commit is contained in:
HibiKier 2021-06-17 10:21:32 +08:00 committed by GitHub
parent 98fb742d4d
commit 5eb0319ce5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -17,109 +17,111 @@ except ModuleNotFoundError:
async def update_setu_img(): async def update_setu_img():
try: async with aiohttp.ClientSession(headers=get_user_agent()) as session:
async with aiohttp.ClientSession(headers=get_user_agent()) as session: for file_name in ['setu_url.json', 'setu_r18_url.json']:
for file_name in ['setu_url.json', 'setu_r18_url.json']: if file_name == 'setu_url.json':
if file_name == 'setu_url.json': json_name = 'setu_data.json'
json_name = 'setu_data.json' path = '_setu/'
path = '_setu/' rar_path = 'setu_rar/'
rar_path = 'setu_rar/' else:
else: json_name = 'r18_setu_data.json'
json_name = 'r18_setu_data.json' path = '_r18/'
path = '_r18/' rar_path = 'r18_rar/'
rar_path = 'r18_rar/' if not os.path.exists(IMAGE_PATH + path):
if not os.path.exists(IMAGE_PATH + path): os.mkdir(IMAGE_PATH + path)
os.mkdir(IMAGE_PATH + path) if not os.path.exists(IMAGE_PATH + rar_path):
if not os.path.exists(IMAGE_PATH + rar_path): os.mkdir(IMAGE_PATH + rar_path)
os.mkdir(IMAGE_PATH + rar_path) try:
try: data = json.load(open(TXT_PATH + json_name, encoding='utf8'))
data = json.load(open(TXT_PATH + json_name, encoding='utf8')) if not data:
if not data:
data = {}
except (FileNotFoundError, TypeError):
data = {} data = {}
_success = 0 except (FileNotFoundError, TypeError):
_similar = 0 data = {}
try: _success = 0
with open(TXT_PATH + file_name, 'r', encoding='utf8') as f: _similar = 0
txt_data = json.load(f) try:
if not txt_data: with open(TXT_PATH + file_name, 'r', encoding='utf8') as f:
continue txt_data = json.load(f)
except (FileNotFoundError, ValueError): if not txt_data:
continue continue
total = len(txt_data) except (FileNotFoundError, ValueError):
urls = [data[x]['img_url'] for x in data.keys()] continue
for pid in txt_data.keys(): total = len(txt_data)
index = str(len(os.listdir(IMAGE_PATH + path))) urls = [data[x]['img_url'] for x in data.keys()]
url = txt_data[pid]["img_url"].replace('img-master', 'img-original').replace('_master1200', '') for pid in txt_data.keys():
if url in urls or txt_data[pid]["img_url"] in urls: index = str(len(os.listdir(IMAGE_PATH + path)))
continue url = txt_data[pid]["img_url"].replace('img-master', 'img-original').replace('_master1200', '')
logger.info(f'开始更新 index:{index} --> {url}') if url in urls or txt_data[pid]["img_url"] in urls:
for _ in range(3): continue
try: logger.info(f'开始更新 index:{index} --> {url}')
async with session.get(url, proxy=get_local_proxy(), timeout=15) as response: for _ in range(3):
if response.status == 200: try:
async with aiofiles.open(IMAGE_PATH + rar_path + index + ".jpg", 'wb') as f: async with session.get(url, proxy=get_local_proxy(), timeout=15) as response:
await f.write(await response.read()) if response.status == 200:
_success += 1 async with aiofiles.open(IMAGE_PATH + rar_path + index + ".jpg", 'wb') as f:
else: await f.write(await response.read())
logger.info(f'{url} 不存在使用更新原url') _success += 1
url = txt_data[pid]["img_url"]
async with session.get(txt_data[pid]["img_url"], proxy=get_local_proxy(),
timeout=15) as response:
if response.status == 200:
async with aiofiles.open(IMAGE_PATH + rar_path + index + ".jpg", 'wb') as f:
await f.write(await response.read())
_success += 1
try:
if os.path.getsize(IMAGE_PATH + rar_path + str(index) + ".jpg") > 1024 * 1024 * 1.5:
rar_imgs(
rar_path,
path,
in_file_name=index,
out_file_name=index
)
else:
logger.info('不需要压缩,移动图片 ' + IMAGE_PATH + rar_path + index + ".jpg --> "
+ IMAGE_PATH + path + index + ".jpg")
os.rename(IMAGE_PATH + rar_path + index + ".jpg",
IMAGE_PATH + path + index + ".jpg")
except FileNotFoundError:
_success -= 1
continue
img_hash = str(get_img_hash(f'{IMAGE_PATH}{path}{index}.jpg'))
if img_hash in [data[x]['img_hash'] for x in data.keys()]:
logger.info(f'index:{index}'
f'{[data[x]["img_hash"] for x in data.keys()].index(img_hash)} 存在重复,删除')
os.remove(IMAGE_PATH + path + index + ".jpg")
_similar += 1
else: else:
data[index] = { logger.info(f'{url} 不存在使用更新原url')
'title': txt_data[pid]['title'], url = txt_data[pid]["img_url"]
'author': txt_data[pid]['author'], async with session.get(txt_data[pid]["img_url"], proxy=get_local_proxy(),
'pid': txt_data[pid]['pid'], timeout=15) as response:
'img_hash': img_hash, if response.status == 200:
'img_url': url, async with aiofiles.open(IMAGE_PATH + rar_path + index + ".jpg", 'wb') as f:
'tags': txt_data[pid]['tags'], await f.write(await response.read())
} _success += 1
break try:
except (TimeoutError, ClientConnectorError) as e: if os.path.getsize(IMAGE_PATH + rar_path + str(index) + ".jpg") > 1024 * 1024 * 1.5:
logger.warning(f'{url} 更新失败 ..{type(e)}{e}') rar_imgs(
rar_path,
path,
in_file_name=index,
out_file_name=index
)
else:
logger.info('不需要压缩,移动图片 ' + IMAGE_PATH + rar_path + index + ".jpg --> "
+ IMAGE_PATH + path + index + ".jpg")
os.rename(IMAGE_PATH + rar_path + index + ".jpg",
IMAGE_PATH + path + index + ".jpg")
except FileNotFoundError:
logger.warning(f'文件 {index}.jpg 不存在,跳过...')
_success -= 1
continue continue
with open(TXT_PATH + json_name, 'w', encoding='utf-8') as f: img_hash = str(get_img_hash(f'{IMAGE_PATH}{path}{index}.jpg'))
json.dump(data, f, indent=4, ensure_ascii=False) if img_hash in [data[x]['img_hash'] for x in data.keys()]:
open(TXT_PATH + file_name, 'w') logger.info(f'index:{index}'
logger.info( f'{[data[x]["img_hash"] for x in data.keys()].index(img_hash)} 存在重复,删除')
f'{str(datetime.now()).split(".")[0]} 更新 {file_name.split(".")[0]}完成,预计更新 {total} 张,' os.remove(IMAGE_PATH + path + index + ".jpg")
f'实际更新 {_success} 张,相似 {_similar} 张,实际存入 {_success - _similar}') _similar += 1
await get_bot().send_private_msg( else:
user_id=int(list(get_bot().config.superusers)[0]), data[index] = {
message=f'{str(datetime.now()).split(".")[0]} 更新{file_name.split(".")[0]}完成,预计更新 {total} 张,' 'title': txt_data[pid]['title'],
f'实际更新 {_success} 张,相似 {_similar} 张,实际存入 {_success - _similar}' 'author': txt_data[pid]['author'],
) 'pid': txt_data[pid]['pid'],
except Exception as e: 'img_hash': img_hash,
await get_bot().send_private_msg( 'img_url': url,
user_id=int(list(get_bot().config.superusers)[0]), 'tags': txt_data[pid]['tags'],
message=f'更新色图错误 {type(e)}: {e}' }
) break
logger.error(f'更新色图错误 {type(e)}: {e}') except (TimeoutError, ClientConnectorError) as e:
logger.warning(f'{url} 更新失败 ..{type(e)}{e}')
continue
except Exception as e:
await get_bot().send_private_msg(
user_id=int(list(get_bot().config.superusers)[0]),
message=f'更新 {index}.jpg 色图错误 {type(e)}: {e}'
)
_success -= 1
logger.error(f'更新色图 {index}.jpg 错误 {type(e)}: {e}')
continue
with open(TXT_PATH + json_name, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=4, ensure_ascii=False)
open(TXT_PATH + file_name, 'w')
logger.info(
f'{str(datetime.now()).split(".")[0]} 更新 {file_name.split(".")[0]}完成,预计更新 {total} 张,'
f'实际更新 {_success} 张,相似 {_similar} 张,实际存入 {_success - _similar}')
await get_bot().send_private_msg(
user_id=int(list(get_bot().config.superusers)[0]),
message=f'{str(datetime.now()).split(".")[0]} 更新{file_name.split(".")[0]}完成,预计更新 {total} 张,'
f'实际更新 {_success} 张,相似 {_similar} 张,实际存入 {_success - _similar}'
)