mirror of
https://github.com/zhenxun-org/zhenxun_bot.git
synced 2025-12-15 14:22:55 +08:00
Delete util directory
This commit is contained in:
parent
dca1686851
commit
8e57ee0759
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,140 +0,0 @@
|
||||
import requests
|
||||
from util.user_agent import get_user_agent
|
||||
from bs4 import BeautifulSoup
|
||||
from time import sleep
|
||||
import threading
|
||||
import os
|
||||
from configs.path_config import IMAGE_PATH
|
||||
|
||||
lock = threading.Lock()
|
||||
|
||||
url = "https://search.bilibili.com/article"
|
||||
# path = IMAGE_PATH + "setu/"
|
||||
|
||||
index = 1
|
||||
THREAD_SUM_REMAINDER = 2 # 越小线程越多
|
||||
|
||||
|
||||
class bilibiliThread (threading.Thread):
|
||||
def __init__(self, threadId, url_list, path, nolist):
|
||||
threading.Thread.__init__(self)
|
||||
self.threadId = threadId
|
||||
self.url_list = url_list
|
||||
self.path = path
|
||||
self.nolist = nolist
|
||||
def run(self):
|
||||
print("开始线程<><><><><><><><><> " + self.threadId)
|
||||
thread_get_url(self.threadId, self.url_list, self.path, self.nolist)
|
||||
|
||||
|
||||
def get_bilibili_img(name, path, nolist=None):
|
||||
global index
|
||||
index = get_dirfile_len(path)
|
||||
print("index===", index)
|
||||
threadId = 1
|
||||
params = {
|
||||
'keyword': name,
|
||||
'page': '1'
|
||||
}
|
||||
res = requests.get(url, headers=get_user_agent(), params=params)
|
||||
sleep(8)
|
||||
soup = BeautifulSoup(res.text, 'html.parser')
|
||||
# print(soup.text)
|
||||
try:
|
||||
total_page = soup.find_all('button', {'class': 'pagination-btn'})[-1].text.strip()
|
||||
print("1 try")
|
||||
except:
|
||||
try:
|
||||
total_page = soup.find_all('button', {'class': 'pagination-btn num-btn'})[-1].text.strip()
|
||||
print("2 try")
|
||||
except:
|
||||
total_page = 1
|
||||
print("3 except")
|
||||
print(total_page)
|
||||
url_list = []
|
||||
for page in range(1, int(total_page)+1):
|
||||
url_r = "https://search.bilibili.com/article?keyword=" + name + "&page=" + str(page)
|
||||
url_list.append(url_r)
|
||||
if page % THREAD_SUM_REMAINDER == 0:
|
||||
print('-----> ' + str(page) + " =======>", url_list)
|
||||
# _thread.start_new_thread(thread_get_url, (url_list, path,))
|
||||
bilibiliThread(str(threadId), url_list, path, nolist).start()
|
||||
threadId += 1
|
||||
sleep(0.5)
|
||||
url_list = []
|
||||
if url_list:
|
||||
print("=========================最后一个线程启动========================= url数量: ", len(url_list))
|
||||
bilibiliThread(str(threadId), url_list, path, nolist).start()
|
||||
|
||||
|
||||
def thread_get_url(threadId, url_list, path, nolist):
|
||||
for url in url_list:
|
||||
res = requests.get(url, headers=get_user_agent())
|
||||
sleep(2)
|
||||
soup = BeautifulSoup(res.text, 'lxml')
|
||||
alist = soup.find_all('a', {'class': 'poster'})
|
||||
img_content_page = []
|
||||
# print(alist)
|
||||
for a in alist:
|
||||
if nolist != None:
|
||||
if a.get('href') not in nolist:
|
||||
img_content_page.append("https://" + a.get('href')[2:])
|
||||
else:
|
||||
img_content_page.append("https://" + a.get('href')[2:])
|
||||
pic_url = []
|
||||
for img_content in img_content_page:
|
||||
print("开始获取---------->", img_content)
|
||||
res = requests.get(img_content, headers=get_user_agent())
|
||||
sleep(2)
|
||||
soup = BeautifulSoup(res.text, 'lxml')
|
||||
figure_ls = soup.body.find_all('figure')
|
||||
# print(figure_ls)
|
||||
for figure in figure_ls:
|
||||
try:
|
||||
_ = figure.img.attrs['class']
|
||||
except:
|
||||
data_src = figure.img.attrs['data-src']
|
||||
pic_url.append('https:' + data_src)
|
||||
print("线程 " + threadId + " 获取完毕------> 开始存储")
|
||||
for url in pic_url:
|
||||
print("线程 " + threadId + "正在存储---------------->", url)
|
||||
res = requests.get(url, headers=get_user_agent())
|
||||
save_img(res.content, path, threadId)
|
||||
pic_url = []
|
||||
print("线程 " + threadId + " ---------------->执行完毕")
|
||||
|
||||
|
||||
def save_img(img, path, threadId):
|
||||
global index
|
||||
try:
|
||||
lock.acquire()
|
||||
img_index = index
|
||||
finally:
|
||||
lock.release()
|
||||
try:
|
||||
with open(path + str(img_index) + ".jpg", 'wb') as f:
|
||||
f.write(img)
|
||||
lock.acquire()
|
||||
index += 1
|
||||
except:
|
||||
print("线程 " + threadId + "存储失败-------->" + str(img_index) + ".jpg")
|
||||
finally:
|
||||
lock.release()
|
||||
|
||||
|
||||
def get_dirfile_len(path):
|
||||
return len(os.listdir(path))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# url = "https://search.bilibili.com" \
|
||||
# "/article?keyword=%23%E4%BB%8A%E6%97%A5%E4%BB%BD%E7%9A%84%E5%8F%AF%E7%88%B1%" \
|
||||
# "E5%B0%8F%E8%90%9D%E8%8E%89%EF%BC%8C%E8%BF%9B%E6%9D%A5%E7%9C%8B%E7%9C%8B%EF%BC%8C%E" \
|
||||
# "6%8F%90%E7%A5%9E%E9%86%92%E8%84%91%EF%BC%81"
|
||||
# res = requests.get(url, headers=get_user_agent())
|
||||
# sleep(2)
|
||||
# soup = BeautifulSoup(res.text, 'lxml')
|
||||
# alist = soup.find_all('button', {'class': 'pagination-btn num-btn'})
|
||||
# total_page = soup.find_all('button', {'class': 'pagination-btn num-btn'})[-1].text.strip()
|
||||
# print(total_page)
|
||||
get_bilibili_img("精选动漫壁纸手机电脑壁纸&动漫游戏专题", IMAGE_PATH + "bizhi/")
|
||||
@ -1,296 +0,0 @@
|
||||
import os
|
||||
from configs.path_config import IMAGE_PATH, TXT_PATH, TTF_PATH
|
||||
from PIL import Image, ImageFile, ImageDraw, ImageFont
|
||||
import cv2
|
||||
import imagehash
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
|
||||
# 扫描图库id是否连贯
|
||||
def scan_img(path):
|
||||
path = IMAGE_PATH + path
|
||||
nolist = []
|
||||
length = len(os.listdir(path))
|
||||
print(length)
|
||||
for i in range(length):
|
||||
if i in nolist:
|
||||
continue
|
||||
img_path = path + "{}.jpg".format(i)
|
||||
if not os.path.exists(img_path):
|
||||
print("不存在=== " + str(length) + ".jpg -------> " + str(i) + ".jpg")
|
||||
os.rename(path + "{}.jpg".format(length - 1), img_path)
|
||||
nolist.append(length)
|
||||
length -= 1
|
||||
|
||||
|
||||
# 比较hash值
|
||||
def compare_image_with_hash(image_file1, image_file2, max_dif=1.5):
|
||||
"""
|
||||
max_dif: 允许最大hash差值, 越小越精确,最小为0
|
||||
推荐使用
|
||||
"""
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
hash_1 = None
|
||||
hash_2 = None
|
||||
hash_1 = get_img_hash(image_file1)
|
||||
hash_2 = get_img_hash(image_file2)
|
||||
dif = hash_1 - hash_2
|
||||
if dif < 0:
|
||||
dif = -dif
|
||||
if dif <= max_dif:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
# 比较图片与hash值
|
||||
def compare_one_img_hash(image_file, hash_2, max_dif=1.5):
|
||||
hash_1 = get_img_hash(image_file)
|
||||
dif = hash_1 - hash_2
|
||||
if dif < 0:
|
||||
dif = -dif
|
||||
if dif <= max_dif:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def get_img_hash(image_file):
|
||||
with open(image_file, 'rb') as fp:
|
||||
hash_value = imagehash.average_hash(Image.open(fp))
|
||||
return hash_value
|
||||
|
||||
|
||||
# 压缩图片
|
||||
def rar_imgs(inpath, outpath, ratio=0.9, start=0, end=0, lens=0, maxsize=0.0, in_file_name='', out_file_name='',
|
||||
itype='jpg'):
|
||||
in_path = IMAGE_PATH + inpath + '/'
|
||||
out_path = IMAGE_PATH + outpath + '/'
|
||||
# scan_img(inpath)
|
||||
l = []
|
||||
if in_file_name != '' and out_file_name != '':
|
||||
filein = in_path + in_file_name + "." + itype
|
||||
fileout = out_path + out_file_name + "." + itype
|
||||
h, w, d = cv2.imread(filein).shape
|
||||
width = int(w * ratio)
|
||||
height = int(h * ratio)
|
||||
ResizeImage(filein, fileout, width, height)
|
||||
else:
|
||||
if lens == 0:
|
||||
lens = len(os.listdir(in_path))
|
||||
if end == 0:
|
||||
end = lens
|
||||
for i in range(start, end):
|
||||
if i in l:
|
||||
continue
|
||||
if maxsize != 0:
|
||||
if os.path.getsize(in_path + str(i) + ".jpg") > maxsize:
|
||||
print("压缩----->", i, ".jpg")
|
||||
filein = in_path + str(i) + ".jpg"
|
||||
fileout = out_path + str(i) + ".jpg"
|
||||
h, w, d = cv2.imread(filein).shape
|
||||
width = int(w * ratio)
|
||||
height = int(h * ratio)
|
||||
ResizeImage(filein, fileout, width, height)
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
print("压缩----->", i, ".jpg")
|
||||
filein = in_path + str(i) + ".jpg"
|
||||
fileout = out_path + str(i) + ".jpg"
|
||||
h, w, d = cv2.imread(filein).shape
|
||||
width = int(w * ratio)
|
||||
height = int(h * ratio)
|
||||
ResizeImage(filein, fileout, width, height)
|
||||
|
||||
|
||||
# 压缩
|
||||
def ResizeImage(filein, fileout, width, height):
|
||||
img = cv2.resize(cv2.imread(filein), (int(width), int(height)))
|
||||
cv2.imwrite(fileout, img)
|
||||
|
||||
|
||||
# 保存图片压缩后的hash值
|
||||
def save_img_hash(path, name):
|
||||
for file in os.listdir(IMAGE_PATH + path):
|
||||
if os.path.getsize(IMAGE_PATH + path + file) > 1024 * 1024 * 1.5:
|
||||
compare_img_hash_in_txt(IMAGE_PATH + 'rar/' + file, name)
|
||||
else:
|
||||
compare_img_hash_in_txt(IMAGE_PATH + path + file, name)
|
||||
|
||||
|
||||
# 比较色图hash值
|
||||
def compare_img_hash_in_txt(file, name, mode=1):
|
||||
with open(TXT_PATH + name + ".txt", 'a+') as txtfile:
|
||||
txtfile.seek(0)
|
||||
hash_list = txtfile.read()[:-1].strip(",")
|
||||
txtfile.seek(2)
|
||||
with open(file, 'rb') as fp:
|
||||
img_hash = str(imagehash.average_hash(Image.open(fp)))
|
||||
if img_hash not in hash_list:
|
||||
if mode == 1:
|
||||
txtfile.write(img_hash + ",")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# 透明背景 -> 白色
|
||||
def alphabg2white_PIL(img):
|
||||
img = img.convert('RGBA')
|
||||
sp = img.size
|
||||
width = sp[0]
|
||||
height = sp[1]
|
||||
for yh in range(height):
|
||||
for xw in range(width):
|
||||
dot = (xw, yh)
|
||||
color_d = img.getpixel(dot)
|
||||
if color_d[3] == 0:
|
||||
color_d = (255, 255, 255, 255)
|
||||
img.putpixel(dot, color_d)
|
||||
return img
|
||||
|
||||
|
||||
def pic2b64(pic: Image) -> str:
|
||||
buf = BytesIO()
|
||||
pic.save(buf, format='PNG')
|
||||
base64_str = base64.b64encode(buf.getvalue()).decode()
|
||||
return 'base64://' + base64_str
|
||||
|
||||
|
||||
def fig2b64(plt: plt) -> str:
|
||||
buf = BytesIO()
|
||||
plt.savefig(buf, format='PNG', dpi=100)
|
||||
base64_str = base64.b64encode(buf.getvalue()).decode()
|
||||
return 'base64://' + base64_str
|
||||
|
||||
|
||||
class CreateImg:
|
||||
def __init__(self,
|
||||
w,
|
||||
h,
|
||||
img_w=0,
|
||||
img_h=0,
|
||||
color='white',
|
||||
image_type='RGBA',
|
||||
font_size=10,
|
||||
background='',
|
||||
ttf='yz.ttf',
|
||||
divisor=1):
|
||||
self.w = int(w)
|
||||
self.h = int(h)
|
||||
self.img_w = int(img_w)
|
||||
self.img_h = int(img_h)
|
||||
self.current_w = 0
|
||||
self.current_h = 0
|
||||
self.ttfont = ImageFont.truetype(TTF_PATH + ttf, int(font_size))
|
||||
if not background:
|
||||
self.markImg = Image.new(image_type, (self.w, self.h), color)
|
||||
else:
|
||||
if w == 0 and h == 0:
|
||||
self.markImg = Image.open(background)
|
||||
w, h = self.markImg.size
|
||||
if divisor:
|
||||
self.w = int(divisor * w)
|
||||
self.h = int(divisor * h)
|
||||
self.markImg = self.markImg.resize((self.w, self.h), Image.ANTIALIAS)
|
||||
else:
|
||||
self.w = w
|
||||
self.h = h
|
||||
else:
|
||||
self.markImg = Image.open(background).resize((self.w, self.h), Image.ANTIALIAS)
|
||||
self.draw = ImageDraw.Draw(self.markImg)
|
||||
self.size = self.w, self.h
|
||||
|
||||
# 贴图
|
||||
def paste(self, img, pos=None, alpha=False):
|
||||
if isinstance(img, CreateImg):
|
||||
img = img.markImg
|
||||
if self.current_w == self.w:
|
||||
self.current_w = 0
|
||||
self.current_h += self.img_h
|
||||
if not pos:
|
||||
pos = (self.current_w, self.current_h)
|
||||
if alpha:
|
||||
try:
|
||||
self.markImg.paste(img, pos, img)
|
||||
except ValueError:
|
||||
img = img.convert("RGBA")
|
||||
self.markImg.paste(img, pos, img)
|
||||
else:
|
||||
self.markImg.paste(img, pos)
|
||||
self.current_w += self.img_w
|
||||
return self.markImg
|
||||
|
||||
# 获取文字大小
|
||||
def getsize(self, msg):
|
||||
return self.ttfont.getsize(msg)
|
||||
|
||||
# 写字
|
||||
def text(self, pos, text, fill=(0, 0, 0)):
|
||||
self.draw.text(pos, text, fill=fill, font=self.ttfont)
|
||||
return self.markImg
|
||||
|
||||
# 饼图
|
||||
def pieslice(self):
|
||||
self.draw.pieslice((350, 50, 500, 200), -150, -30, 'pink', 'crimson')
|
||||
return self.markImg
|
||||
|
||||
# 保存
|
||||
def save(self, path):
|
||||
self.markImg.save(path)
|
||||
|
||||
# 显示
|
||||
def show(self):
|
||||
self.markImg.show(self.markImg)
|
||||
|
||||
# 压缩
|
||||
def resize(self, ratio):
|
||||
self.markImg = self.markImg.resize((int(self.w * ratio), int(self.h * ratio)), Image.ANTIALIAS)
|
||||
self.w, self.h = self.markImg.size
|
||||
self.size = self.w, self.h
|
||||
self.draw = ImageDraw.Draw(self.markImg)
|
||||
|
||||
# 检查字体大小
|
||||
def check_font_size(self, word):
|
||||
return self.ttfont.getsize(word)[0] > self.w
|
||||
|
||||
# 透明化
|
||||
def transparent(self, n=0):
|
||||
self.markImg = self.markImg.convert('RGBA') # 修改颜色通道为RGBA
|
||||
x, y = self.markImg.size # 获得长和宽
|
||||
# 设置每个像素点颜色的透明度
|
||||
for i in range(n, x - n):
|
||||
for k in range(n, y - n):
|
||||
color = self.markImg.getpixel((i, k))
|
||||
color = color[:-1] + (100, )
|
||||
self.markImg.putpixel((i, k), color)
|
||||
return self.markImg
|
||||
|
||||
# 转bs4:
|
||||
def pic2bs4(self):
|
||||
buf = BytesIO()
|
||||
self.markImg.save(buf, format='PNG')
|
||||
base64_str = base64.b64encode(buf.getvalue()).decode()
|
||||
return 'base64://' + base64_str
|
||||
|
||||
#
|
||||
def convert(self, itype):
|
||||
self.markImg = self.markImg.convert(itype)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,86 +0,0 @@
|
||||
from configs.path_config import IMAGE_PATH, VOICE_PATH
|
||||
from nonebot.adapters.cqhttp.message import MessageSegment
|
||||
import os
|
||||
from services.log import logger
|
||||
|
||||
|
||||
def image(img_name: str = None, path: str = '', abspath: str = None, b64: str = None):
|
||||
if abspath:
|
||||
if os.path.exists(abspath):
|
||||
return MessageSegment.image("file:///" + abspath)
|
||||
else:
|
||||
return ''
|
||||
elif b64:
|
||||
if b64.find('base64://') != -1:
|
||||
return MessageSegment.image(b64)
|
||||
else:
|
||||
return MessageSegment.image('base64://' + b64)
|
||||
else:
|
||||
img_name = str(img_name)
|
||||
if img_name.find('http') == -1:
|
||||
if len(img_name.split('.')) == 1:
|
||||
img_name += '.jpg'
|
||||
if os.path.exists(IMAGE_PATH + path + '/' + img_name):
|
||||
return MessageSegment.image("file:///" + IMAGE_PATH + path + '/' + img_name)
|
||||
else:
|
||||
logger.warning(f"图片 {path}/{img_name}缺失.")
|
||||
return ''
|
||||
else:
|
||||
return MessageSegment.image(img_name)
|
||||
|
||||
|
||||
def at(qq):
|
||||
return MessageSegment.at(qq)
|
||||
|
||||
|
||||
def record(voice_name='', path=''):
|
||||
if len(voice_name.split('.')) == 1:
|
||||
voice_name += '.mp3'
|
||||
if path == "":
|
||||
name = VOICE_PATH + "{}.".format(voice_name)
|
||||
else:
|
||||
name = VOICE_PATH + "{}/{}".format(path, voice_name)
|
||||
if voice_name.find('http') == -1:
|
||||
if os.path.exists(name):
|
||||
result = MessageSegment.record("file:///" + name)
|
||||
return result
|
||||
else:
|
||||
logger.warning(f"语音{path}/{voice_name}缺失...")
|
||||
return ""
|
||||
else:
|
||||
return MessageSegment.record(voice_name)
|
||||
|
||||
|
||||
def text(msg):
|
||||
return MessageSegment.text(msg)
|
||||
|
||||
|
||||
def contact_user(qq):
|
||||
return MessageSegment.contact_user(qq)
|
||||
|
||||
|
||||
def share(url, title, content='', image_url=''):
|
||||
return MessageSegment.share(url, title, content, image_url)
|
||||
|
||||
|
||||
def xml(data):
|
||||
return MessageSegment.xml(data)
|
||||
|
||||
|
||||
def json(data):
|
||||
return MessageSegment.json(data)
|
||||
|
||||
|
||||
def face(id_):
|
||||
return MessageSegment.face(id_)
|
||||
|
||||
|
||||
def poke(qq):
|
||||
return MessageSegment('poke', {"qq": qq})
|
||||
|
||||
|
||||
def forward():
|
||||
return MessageSegment.forward()
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# print(get_record_result("dadada", "", type="amr"))
|
||||
274
util/langconv.py
274
util/langconv.py
@ -1,274 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from copy import deepcopy
|
||||
import re
|
||||
|
||||
try:
|
||||
import psyco
|
||||
psyco.full()
|
||||
except:
|
||||
pass
|
||||
|
||||
from .zh_wiki import zh2Hant, zh2Hans
|
||||
|
||||
import sys
|
||||
py3k = sys.version_info >= (3, 0, 0)
|
||||
|
||||
if py3k:
|
||||
UEMPTY = ''
|
||||
else:
|
||||
_zh2Hant, _zh2Hans = {}, {}
|
||||
for old, new in ((zh2Hant, _zh2Hant), (zh2Hans, _zh2Hans)):
|
||||
for k, v in old.items():
|
||||
new[k.decode('utf8')] = v.decode('utf8')
|
||||
zh2Hant = _zh2Hant
|
||||
zh2Hans = _zh2Hans
|
||||
UEMPTY = ''.decode('utf8')
|
||||
|
||||
# states
|
||||
(START, END, FAIL, WAIT_TAIL) = list(range(4))
|
||||
# conditions
|
||||
(TAIL, ERROR, MATCHED_SWITCH, UNMATCHED_SWITCH, CONNECTOR) = list(range(5))
|
||||
|
||||
MAPS = {}
|
||||
|
||||
class Node(object):
|
||||
def __init__(self, from_word, to_word=None, is_tail=True,
|
||||
have_child=False):
|
||||
self.from_word = from_word
|
||||
if to_word is None:
|
||||
self.to_word = from_word
|
||||
self.data = (is_tail, have_child, from_word)
|
||||
self.is_original = True
|
||||
else:
|
||||
self.to_word = to_word or from_word
|
||||
self.data = (is_tail, have_child, to_word)
|
||||
self.is_original = False
|
||||
self.is_tail = is_tail
|
||||
self.have_child = have_child
|
||||
|
||||
def is_original_long_word(self):
|
||||
return self.is_original and len(self.from_word)>1
|
||||
|
||||
def is_follow(self, chars):
|
||||
return chars != self.from_word[:-1]
|
||||
|
||||
def __str__(self):
|
||||
return '<Node, %s, %s, %s, %s>' % (repr(self.from_word),
|
||||
repr(self.to_word), self.is_tail, self.have_child)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
class ConvertMap(object):
|
||||
def __init__(self, name, mapping=None):
|
||||
self.name = name
|
||||
self._map = {}
|
||||
if mapping:
|
||||
self.set_convert_map(mapping)
|
||||
|
||||
def set_convert_map(self, mapping):
|
||||
convert_map = {}
|
||||
have_child = {}
|
||||
max_key_length = 0
|
||||
for key in sorted(mapping.keys()):
|
||||
if len(key)>1:
|
||||
for i in range(1, len(key)):
|
||||
parent_key = key[:i]
|
||||
have_child[parent_key] = True
|
||||
have_child[key] = False
|
||||
max_key_length = max(max_key_length, len(key))
|
||||
for key in sorted(have_child.keys()):
|
||||
convert_map[key] = (key in mapping, have_child[key],
|
||||
mapping.get(key, UEMPTY))
|
||||
self._map = convert_map
|
||||
self.max_key_length = max_key_length
|
||||
|
||||
def __getitem__(self, k):
|
||||
try:
|
||||
is_tail, have_child, to_word = self._map[k]
|
||||
return Node(k, to_word, is_tail, have_child)
|
||||
except:
|
||||
return Node(k)
|
||||
|
||||
def __contains__(self, k):
|
||||
return k in self._map
|
||||
|
||||
def __len__(self):
|
||||
return len(self._map)
|
||||
|
||||
class StatesMachineException(Exception): pass
|
||||
|
||||
class StatesMachine(object):
|
||||
def __init__(self):
|
||||
self.state = START
|
||||
self.final = UEMPTY
|
||||
self.len = 0
|
||||
self.pool = UEMPTY
|
||||
|
||||
def clone(self, pool):
|
||||
new = deepcopy(self)
|
||||
new.state = WAIT_TAIL
|
||||
new.pool = pool
|
||||
return new
|
||||
|
||||
def feed(self, char, map):
|
||||
node = map[self.pool+char]
|
||||
|
||||
if node.have_child:
|
||||
if node.is_tail:
|
||||
if node.is_original:
|
||||
cond = UNMATCHED_SWITCH
|
||||
else:
|
||||
cond = MATCHED_SWITCH
|
||||
else:
|
||||
cond = CONNECTOR
|
||||
else:
|
||||
if node.is_tail:
|
||||
cond = TAIL
|
||||
else:
|
||||
cond = ERROR
|
||||
|
||||
new = None
|
||||
if cond == ERROR:
|
||||
self.state = FAIL
|
||||
elif cond == TAIL:
|
||||
if self.state == WAIT_TAIL and node.is_original_long_word():
|
||||
self.state = FAIL
|
||||
else:
|
||||
self.final += node.to_word
|
||||
self.len += 1
|
||||
self.pool = UEMPTY
|
||||
self.state = END
|
||||
elif self.state == START or self.state == WAIT_TAIL:
|
||||
if cond == MATCHED_SWITCH:
|
||||
new = self.clone(node.from_word)
|
||||
self.final += node.to_word
|
||||
self.len += 1
|
||||
self.state = END
|
||||
self.pool = UEMPTY
|
||||
elif cond == UNMATCHED_SWITCH or cond == CONNECTOR:
|
||||
if self.state == START:
|
||||
new = self.clone(node.from_word)
|
||||
self.final += node.to_word
|
||||
self.len += 1
|
||||
self.state = END
|
||||
else:
|
||||
if node.is_follow(self.pool):
|
||||
self.state = FAIL
|
||||
else:
|
||||
self.pool = node.from_word
|
||||
elif self.state == END:
|
||||
# END is a new START
|
||||
self.state = START
|
||||
new = self.feed(char, map)
|
||||
elif self.state == FAIL:
|
||||
raise StatesMachineException('Translate States Machine '
|
||||
'have error with input data %s' % node)
|
||||
return new
|
||||
|
||||
def __len__(self):
|
||||
return self.len + 1
|
||||
|
||||
def __str__(self):
|
||||
return '<StatesMachine %s, pool: "%s", state: %s, final: %s>' % (
|
||||
id(self), self.pool, self.state, self.final)
|
||||
__repr__ = __str__
|
||||
|
||||
class Converter(object):
|
||||
def __init__(self, to_encoding):
|
||||
self.to_encoding = to_encoding
|
||||
self.map = MAPS[to_encoding]
|
||||
self.start()
|
||||
|
||||
def feed(self, char):
|
||||
branches = []
|
||||
for fsm in self.machines:
|
||||
new = fsm.feed(char, self.map)
|
||||
if new:
|
||||
branches.append(new)
|
||||
if branches:
|
||||
self.machines.extend(branches)
|
||||
self.machines = [fsm for fsm in self.machines if fsm.state != FAIL]
|
||||
all_ok = True
|
||||
for fsm in self.machines:
|
||||
if fsm.state != END:
|
||||
all_ok = False
|
||||
if all_ok:
|
||||
self._clean()
|
||||
return self.get_result()
|
||||
|
||||
def _clean(self):
|
||||
if len(self.machines):
|
||||
self.machines.sort(key=lambda x: len(x))
|
||||
# self.machines.sort(cmp=lambda x,y: cmp(len(x), len(y)))
|
||||
self.final += self.machines[0].final
|
||||
self.machines = [StatesMachine()]
|
||||
|
||||
def start(self):
|
||||
self.machines = [StatesMachine()]
|
||||
self.final = UEMPTY
|
||||
|
||||
def end(self):
|
||||
self.machines = [fsm for fsm in self.machines
|
||||
if fsm.state == FAIL or fsm.state == END]
|
||||
self._clean()
|
||||
|
||||
def convert(self, string):
|
||||
self.start()
|
||||
for char in string:
|
||||
self.feed(char)
|
||||
self.end()
|
||||
return self.get_result()
|
||||
|
||||
def get_result(self):
|
||||
return self.final
|
||||
|
||||
|
||||
def registery(name, mapping):
|
||||
global MAPS
|
||||
MAPS[name] = ConvertMap(name, mapping)
|
||||
|
||||
registery('zh-hant', zh2Hant)
|
||||
registery('zh-hans', zh2Hans)
|
||||
del zh2Hant, zh2Hans
|
||||
|
||||
|
||||
def run():
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser()
|
||||
parser.add_option('-e', type='string', dest='encoding',
|
||||
help='encoding')
|
||||
parser.add_option('-f', type='string', dest='file_in',
|
||||
help='input file (- for stdin)')
|
||||
parser.add_option('-t', type='string', dest='file_out',
|
||||
help='output file')
|
||||
(options, args) = parser.parse_args()
|
||||
if not options.encoding:
|
||||
parser.error('encoding must be set')
|
||||
if options.file_in:
|
||||
if options.file_in == '-':
|
||||
file_in = sys.stdin
|
||||
else:
|
||||
file_in = open(options.file_in)
|
||||
else:
|
||||
file_in = sys.stdin
|
||||
if options.file_out:
|
||||
if options.file_out == '-':
|
||||
file_out = sys.stdout
|
||||
else:
|
||||
file_out = open(options.file_out, 'wb')
|
||||
else:
|
||||
file_out = sys.stdout
|
||||
|
||||
c = Converter(options.encoding)
|
||||
for line in file_in:
|
||||
# print >> file_out, c.convert(line.rstrip('\n').decode(
|
||||
file_out.write(c.convert(line.rstrip('\n').decode(
|
||||
'utf8')).encode('utf8'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
@ -1,47 +0,0 @@
|
||||
import random
|
||||
|
||||
|
||||
user_agent = [
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
|
||||
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
||||
"Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
||||
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
||||
"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
||||
"MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
||||
"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
|
||||
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
|
||||
"Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
|
||||
"Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
|
||||
"Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
|
||||
"UCWEB7.0.2.37/28/999",
|
||||
"NOKIA5700/ UCWEB7.0.2.37/28/999",
|
||||
"Openwave/ UCWEB7.0.2.37/28/999",
|
||||
"Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999",
|
||||
# iPhone 6:
|
||||
"Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25"
|
||||
]
|
||||
|
||||
|
||||
def get_user_agent():
|
||||
return {'User-Agent': random.choice(user_agent)}
|
||||
263
util/utils.py
263
util/utils.py
@ -1,263 +0,0 @@
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from collections import defaultdict
|
||||
from nonebot import require
|
||||
import nonebot
|
||||
import json
|
||||
import pytz
|
||||
from configs.path_config import TXT_PATH
|
||||
from configs.config import system_proxy
|
||||
import pypinyin
|
||||
|
||||
|
||||
scheduler = require('nonebot_plugin_apscheduler').scheduler
|
||||
|
||||
|
||||
# 次数检测
|
||||
class CountLimiter:
|
||||
def __init__(self, max):
|
||||
self.count = defaultdict(int)
|
||||
self.max = max
|
||||
|
||||
def add(self, key):
|
||||
self.count[key] += 1
|
||||
|
||||
def check(self, key) -> bool:
|
||||
if self.count[key] >= self.max:
|
||||
self.count[key] = 0
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# 用户正在执行此命令
|
||||
class UserExistLimiter:
|
||||
def __init__(self):
|
||||
self.mbool = defaultdict(bool)
|
||||
self.time = time.time()
|
||||
|
||||
def set_True(self, key):
|
||||
self.time = time.time()
|
||||
self.mbool[key] = True
|
||||
|
||||
def set_False(self, key):
|
||||
self.mbool[key] = False
|
||||
|
||||
def check(self, key):
|
||||
if time.time() - self.time > 30:
|
||||
self.set_False(key)
|
||||
return False
|
||||
return self.mbool[key]
|
||||
|
||||
|
||||
# 命令cd
|
||||
class FreqLimiter:
|
||||
def __init__(self, default_cd_seconds):
|
||||
self.next_time = defaultdict(float)
|
||||
self.default_cd = default_cd_seconds
|
||||
|
||||
def check(self, key) -> bool:
|
||||
return time.time() >= self.next_time[key]
|
||||
|
||||
def start_cd(self, key, cd_time=0):
|
||||
self.next_time[key] = time.time() + (cd_time if cd_time > 0 else self.default_cd)
|
||||
|
||||
def left_time(self, key) -> float:
|
||||
return self.next_time[key] - time.time()
|
||||
|
||||
|
||||
static_flmt = FreqLimiter(15)
|
||||
|
||||
|
||||
# 恶意触发命令检测
|
||||
class BanCheckLimiter:
|
||||
def __init__(self, default_check_time: float = 5, default_count: int = 4):
|
||||
self.mint = defaultdict(int)
|
||||
self.mtime = defaultdict(float)
|
||||
self.default_check_time = default_check_time
|
||||
self.default_count = default_count
|
||||
|
||||
def add(self, key):
|
||||
if self.mint[key] == 1:
|
||||
self.mtime[key] = time.time()
|
||||
self.mint[key] += 1
|
||||
|
||||
def check(self, key) -> bool:
|
||||
# print(self.mint[key])
|
||||
# print(time.time() - self.mtime[key])
|
||||
if time.time() - self.mtime[key] > self.default_check_time:
|
||||
self.mtime[key] = time.time()
|
||||
self.mint[key] = 0
|
||||
return False
|
||||
if self.mint[key] >= self.default_count and time.time() - self.mtime[key] < self.default_check_time:
|
||||
self.mtime[key] = time.time()
|
||||
self.mint[key] = 0
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# 每日次数
|
||||
class DailyNumberLimiter:
|
||||
tz = pytz.timezone('Asia/Shanghai')
|
||||
|
||||
def __init__(self, max_num):
|
||||
self.today = -1
|
||||
self.count = defaultdict(int)
|
||||
self.max = max_num
|
||||
|
||||
def check(self, key) -> bool:
|
||||
now = datetime.now(self.tz)
|
||||
day = (now - timedelta(hours=5)).day
|
||||
if day != self.today:
|
||||
self.today = day
|
||||
self.count.clear()
|
||||
return bool(self.count[key] < self.max)
|
||||
|
||||
def get_num(self, key):
|
||||
return self.count[key]
|
||||
|
||||
def increase(self, key, num=1):
|
||||
self.count[key] += num
|
||||
|
||||
def reset(self, key):
|
||||
self.count[key] = 0
|
||||
|
||||
|
||||
def is_number(s) -> bool:
|
||||
try:
|
||||
float(s)
|
||||
return True
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
import unicodedata
|
||||
unicodedata.numeric(s)
|
||||
return True
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def get_lines(path: str, start: int = 0, end: int = 0) -> list:
|
||||
l = []
|
||||
with open(path, 'r', errors='ignore', encoding="UTF-8") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
if line != "\n" and line != "":
|
||||
if end == 0:
|
||||
l.append(line[start:])
|
||||
else:
|
||||
l.append(line[start: end])
|
||||
return l
|
||||
|
||||
|
||||
# 获取bot
|
||||
def get_bot():
|
||||
return list(nonebot.get_bots().values())[0]
|
||||
|
||||
|
||||
def get_message_at(data: str) -> list:
|
||||
qq_list = []
|
||||
data = json.loads(data)
|
||||
try:
|
||||
for msg in data['message']:
|
||||
if msg['type'] == 'at':
|
||||
qq_list.append(int(msg['data']['qq']))
|
||||
return qq_list
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def get_message_imgs(data: str) -> list:
|
||||
img_list = []
|
||||
data = json.loads(data)
|
||||
try:
|
||||
for msg in data['message']:
|
||||
if msg['type'] == 'image':
|
||||
img_list.append(msg['data']['url'])
|
||||
return img_list
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def get_message_text(data: str) -> str:
|
||||
data = json.loads(data)
|
||||
result = ''
|
||||
try:
|
||||
for msg in data['message']:
|
||||
if msg['type'] == 'text':
|
||||
result += msg['data']['text'].strip() + ' '
|
||||
return result.strip()
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
def get_message_type(data: str) -> str:
|
||||
return json.loads(data)['message_type']
|
||||
|
||||
|
||||
def get_message_record(data: str) -> str:
|
||||
data = json.loads(data)
|
||||
try:
|
||||
for msg in data['message']:
|
||||
if msg['type'] == 'record':
|
||||
return msg['data']['url']
|
||||
return ''
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
def get_message_json(data: str) -> dict:
|
||||
data = json.loads(data)
|
||||
try:
|
||||
for msg in data['message']:
|
||||
if msg['type'] == 'json':
|
||||
return msg['data']
|
||||
return {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def add_to_16(value):
|
||||
while len(value) % 16 != 0:
|
||||
value += '\0'
|
||||
return str.encode(value)
|
||||
|
||||
|
||||
# 获取文本加密后的cookie
|
||||
def get_cookie_text(cookie_name: str) -> str:
|
||||
with open(TXT_PATH + "cookie/" + cookie_name + ".txt", 'r') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
# 获取本地http代理
|
||||
def get_local_proxy():
|
||||
# from urllib.request import getproxies
|
||||
# import platform
|
||||
# proxy = getproxies()['http']
|
||||
# if platform.system() != 'Windows':
|
||||
# proxy = 'http://' + proxy
|
||||
return system_proxy if system_proxy else None
|
||||
|
||||
|
||||
# 判断是否为中文
|
||||
def is_Chinese(word):
|
||||
for ch in word:
|
||||
if '\u4e00' <= ch <= '\u9fff':
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def user_avatar(qq):
|
||||
return f'http://q1.qlogo.cn/g?b=qq&nk={qq}&s=160'
|
||||
|
||||
|
||||
def group_avatar(group_id):
|
||||
return f'http://p.qlogo.cn/gh/{group_id}/{group_id}/640/'
|
||||
|
||||
|
||||
def cn2py(word) -> str:
|
||||
temp = ""
|
||||
for i in pypinyin.pinyin(word, style=pypinyin.NORMAL):
|
||||
temp += ''.join(i)
|
||||
return temp
|
||||
|
||||
8275
util/zh_wiki.py
8275
util/zh_wiki.py
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user