mirror of
https://github.com/zhenxun-org/zhenxun_bot.git
synced 2025-12-15 06:12:53 +08:00
Delete util directory
This commit is contained in:
parent
5db2cb656a
commit
dfa5643a05
@ -1,140 +0,0 @@
|
||||
import requests
|
||||
from util.user_agent import get_user_agent
|
||||
from bs4 import BeautifulSoup
|
||||
from time import sleep
|
||||
import threading
|
||||
import os
|
||||
from configs.path_config import IMAGE_PATH
|
||||
|
||||
lock = threading.Lock()
|
||||
|
||||
url = "https://search.bilibili.com/article"
|
||||
# path = IMAGE_PATH + "setu/"
|
||||
|
||||
index = 1
|
||||
THREAD_SUM_REMAINDER = 2 # 越小线程越多
|
||||
|
||||
|
||||
class bilibiliThread (threading.Thread):
|
||||
def __init__(self, threadId, url_list, path, nolist):
|
||||
threading.Thread.__init__(self)
|
||||
self.threadId = threadId
|
||||
self.url_list = url_list
|
||||
self.path = path
|
||||
self.nolist = nolist
|
||||
def run(self):
|
||||
print("开始线程<><><><><><><><><> " + self.threadId)
|
||||
thread_get_url(self.threadId, self.url_list, self.path, self.nolist)
|
||||
|
||||
|
||||
def get_bilibili_img(name, path, nolist=None):
|
||||
global index
|
||||
index = get_dirfile_len(path)
|
||||
print("index===", index)
|
||||
threadId = 1
|
||||
params = {
|
||||
'keyword': name,
|
||||
'page': '1'
|
||||
}
|
||||
res = requests.get(url, headers=get_user_agent(), params=params)
|
||||
sleep(8)
|
||||
soup = BeautifulSoup(res.text, 'html.parser')
|
||||
# print(soup.text)
|
||||
try:
|
||||
total_page = soup.find_all('button', {'class': 'pagination-btn'})[-1].text.strip()
|
||||
print("1 try")
|
||||
except:
|
||||
try:
|
||||
total_page = soup.find_all('button', {'class': 'pagination-btn num-btn'})[-1].text.strip()
|
||||
print("2 try")
|
||||
except:
|
||||
total_page = 1
|
||||
print("3 except")
|
||||
print(total_page)
|
||||
url_list = []
|
||||
for page in range(1, int(total_page)+1):
|
||||
url_r = "https://search.bilibili.com/article?keyword=" + name + "&page=" + str(page)
|
||||
url_list.append(url_r)
|
||||
if page % THREAD_SUM_REMAINDER == 0:
|
||||
print('-----> ' + str(page) + " =======>", url_list)
|
||||
# _thread.start_new_thread(thread_get_url, (url_list, path,))
|
||||
bilibiliThread(str(threadId), url_list, path, nolist).start()
|
||||
threadId += 1
|
||||
sleep(0.5)
|
||||
url_list = []
|
||||
if url_list:
|
||||
print("=========================最后一个线程启动========================= url数量: ", len(url_list))
|
||||
bilibiliThread(str(threadId), url_list, path, nolist).start()
|
||||
|
||||
|
||||
def thread_get_url(threadId, url_list, path, nolist):
|
||||
for url in url_list:
|
||||
res = requests.get(url, headers=get_user_agent())
|
||||
sleep(2)
|
||||
soup = BeautifulSoup(res.text, 'lxml')
|
||||
alist = soup.find_all('a', {'class': 'poster'})
|
||||
img_content_page = []
|
||||
# print(alist)
|
||||
for a in alist:
|
||||
if nolist != None:
|
||||
if a.get('href') not in nolist:
|
||||
img_content_page.append("https://" + a.get('href')[2:])
|
||||
else:
|
||||
img_content_page.append("https://" + a.get('href')[2:])
|
||||
pic_url = []
|
||||
for img_content in img_content_page:
|
||||
print("开始获取---------->", img_content)
|
||||
res = requests.get(img_content, headers=get_user_agent())
|
||||
sleep(2)
|
||||
soup = BeautifulSoup(res.text, 'lxml')
|
||||
figure_ls = soup.body.find_all('figure')
|
||||
# print(figure_ls)
|
||||
for figure in figure_ls:
|
||||
try:
|
||||
_ = figure.img.attrs['class']
|
||||
except:
|
||||
data_src = figure.img.attrs['data-src']
|
||||
pic_url.append('https:' + data_src)
|
||||
print("线程 " + threadId + " 获取完毕------> 开始存储")
|
||||
for url in pic_url:
|
||||
print("线程 " + threadId + "正在存储---------------->", url)
|
||||
res = requests.get(url, headers=get_user_agent())
|
||||
save_img(res.content, path, threadId)
|
||||
pic_url = []
|
||||
print("线程 " + threadId + " ---------------->执行完毕")
|
||||
|
||||
|
||||
def save_img(img, path, threadId):
|
||||
global index
|
||||
try:
|
||||
lock.acquire()
|
||||
img_index = index
|
||||
finally:
|
||||
lock.release()
|
||||
try:
|
||||
with open(path + str(img_index) + ".jpg", 'wb') as f:
|
||||
f.write(img)
|
||||
lock.acquire()
|
||||
index += 1
|
||||
except:
|
||||
print("线程 " + threadId + "存储失败-------->" + str(img_index) + ".jpg")
|
||||
finally:
|
||||
lock.release()
|
||||
|
||||
|
||||
def get_dirfile_len(path):
|
||||
return len(os.listdir(path))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# url = "https://search.bilibili.com" \
|
||||
# "/article?keyword=%23%E4%BB%8A%E6%97%A5%E4%BB%BD%E7%9A%84%E5%8F%AF%E7%88%B1%" \
|
||||
# "E5%B0%8F%E8%90%9D%E8%8E%89%EF%BC%8C%E8%BF%9B%E6%9D%A5%E7%9C%8B%E7%9C%8B%EF%BC%8C%E" \
|
||||
# "6%8F%90%E7%A5%9E%E9%86%92%E8%84%91%EF%BC%81"
|
||||
# res = requests.get(url, headers=get_user_agent())
|
||||
# sleep(2)
|
||||
# soup = BeautifulSoup(res.text, 'lxml')
|
||||
# alist = soup.find_all('button', {'class': 'pagination-btn num-btn'})
|
||||
# total_page = soup.find_all('button', {'class': 'pagination-btn num-btn'})[-1].text.strip()
|
||||
# print(total_page)
|
||||
get_bilibili_img("精选动漫壁纸手机电脑壁纸&动漫游戏专题", IMAGE_PATH + "bizhi/")
|
||||
@ -1,296 +0,0 @@
|
||||
import os
|
||||
from configs.path_config import IMAGE_PATH, TXT_PATH, TTF_PATH
|
||||
from PIL import Image, ImageFile, ImageDraw, ImageFont
|
||||
import cv2
|
||||
import imagehash
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
|
||||
# 扫描图库id是否连贯
|
||||
def scan_img(path):
|
||||
path = IMAGE_PATH + path
|
||||
nolist = []
|
||||
length = len(os.listdir(path))
|
||||
print(length)
|
||||
for i in range(length):
|
||||
if i in nolist:
|
||||
continue
|
||||
img_path = path + "{}.jpg".format(i)
|
||||
if not os.path.exists(img_path):
|
||||
print("不存在=== " + str(length) + ".jpg -------> " + str(i) + ".jpg")
|
||||
os.rename(path + "{}.jpg".format(length - 1), img_path)
|
||||
nolist.append(length)
|
||||
length -= 1
|
||||
|
||||
|
||||
# 比较hash值
|
||||
def compare_image_with_hash(image_file1, image_file2, max_dif=1.5):
|
||||
"""
|
||||
max_dif: 允许最大hash差值, 越小越精确,最小为0
|
||||
推荐使用
|
||||
"""
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
hash_1 = None
|
||||
hash_2 = None
|
||||
hash_1 = get_img_hash(image_file1)
|
||||
hash_2 = get_img_hash(image_file2)
|
||||
dif = hash_1 - hash_2
|
||||
if dif < 0:
|
||||
dif = -dif
|
||||
if dif <= max_dif:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
# 比较图片与hash值
|
||||
def compare_one_img_hash(image_file, hash_2, max_dif=1.5):
|
||||
hash_1 = get_img_hash(image_file)
|
||||
dif = hash_1 - hash_2
|
||||
if dif < 0:
|
||||
dif = -dif
|
||||
if dif <= max_dif:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def get_img_hash(image_file):
|
||||
with open(image_file, 'rb') as fp:
|
||||
hash_value = imagehash.average_hash(Image.open(fp))
|
||||
return hash_value
|
||||
|
||||
|
||||
# 压缩图片
|
||||
def rar_imgs(inpath, outpath, ratio=0.9, start=0, end=0, lens=0, maxsize=0.0, in_file_name='', out_file_name='',
|
||||
itype='jpg'):
|
||||
in_path = IMAGE_PATH + inpath + '/'
|
||||
out_path = IMAGE_PATH + outpath + '/'
|
||||
# scan_img(inpath)
|
||||
l = []
|
||||
if in_file_name != '' and out_file_name != '':
|
||||
filein = in_path + in_file_name + "." + itype
|
||||
fileout = out_path + out_file_name + "." + itype
|
||||
h, w, d = cv2.imread(filein).shape
|
||||
width = int(w * ratio)
|
||||
height = int(h * ratio)
|
||||
ResizeImage(filein, fileout, width, height)
|
||||
else:
|
||||
if lens == 0:
|
||||
lens = len(os.listdir(in_path))
|
||||
if end == 0:
|
||||
end = lens
|
||||
for i in range(start, end):
|
||||
if i in l:
|
||||
continue
|
||||
if maxsize != 0:
|
||||
if os.path.getsize(in_path + str(i) + ".jpg") > maxsize:
|
||||
print("压缩----->", i, ".jpg")
|
||||
filein = in_path + str(i) + ".jpg"
|
||||
fileout = out_path + str(i) + ".jpg"
|
||||
h, w, d = cv2.imread(filein).shape
|
||||
width = int(w * ratio)
|
||||
height = int(h * ratio)
|
||||
ResizeImage(filein, fileout, width, height)
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
print("压缩----->", i, ".jpg")
|
||||
filein = in_path + str(i) + ".jpg"
|
||||
fileout = out_path + str(i) + ".jpg"
|
||||
h, w, d = cv2.imread(filein).shape
|
||||
width = int(w * ratio)
|
||||
height = int(h * ratio)
|
||||
ResizeImage(filein, fileout, width, height)
|
||||
|
||||
|
||||
# 压缩
|
||||
def ResizeImage(filein, fileout, width, height):
|
||||
img = cv2.resize(cv2.imread(filein), (int(width), int(height)))
|
||||
cv2.imwrite(fileout, img)
|
||||
|
||||
|
||||
# 保存图片压缩后的hash值
|
||||
def save_img_hash(path, name):
|
||||
for file in os.listdir(IMAGE_PATH + path):
|
||||
if os.path.getsize(IMAGE_PATH + path + file) > 1024 * 1024 * 1.5:
|
||||
compare_img_hash_in_txt(IMAGE_PATH + 'rar/' + file, name)
|
||||
else:
|
||||
compare_img_hash_in_txt(IMAGE_PATH + path + file, name)
|
||||
|
||||
|
||||
# 比较色图hash值
|
||||
def compare_img_hash_in_txt(file, name, mode=1):
|
||||
with open(TXT_PATH + name + ".txt", 'a+') as txtfile:
|
||||
txtfile.seek(0)
|
||||
hash_list = txtfile.read()[:-1].strip(",")
|
||||
txtfile.seek(2)
|
||||
with open(file, 'rb') as fp:
|
||||
img_hash = str(imagehash.average_hash(Image.open(fp)))
|
||||
if img_hash not in hash_list:
|
||||
if mode == 1:
|
||||
txtfile.write(img_hash + ",")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# 透明背景 -> 白色
|
||||
def alphabg2white_PIL(img):
|
||||
img = img.convert('RGBA')
|
||||
sp = img.size
|
||||
width = sp[0]
|
||||
height = sp[1]
|
||||
for yh in range(height):
|
||||
for xw in range(width):
|
||||
dot = (xw, yh)
|
||||
color_d = img.getpixel(dot)
|
||||
if color_d[3] == 0:
|
||||
color_d = (255, 255, 255, 255)
|
||||
img.putpixel(dot, color_d)
|
||||
return img
|
||||
|
||||
|
||||
def pic2b64(pic: Image) -> str:
|
||||
buf = BytesIO()
|
||||
pic.save(buf, format='PNG')
|
||||
base64_str = base64.b64encode(buf.getvalue()).decode()
|
||||
return 'base64://' + base64_str
|
||||
|
||||
|
||||
def fig2b64(plt: plt) -> str:
|
||||
buf = BytesIO()
|
||||
plt.savefig(buf, format='PNG', dpi=100)
|
||||
base64_str = base64.b64encode(buf.getvalue()).decode()
|
||||
return 'base64://' + base64_str
|
||||
|
||||
|
||||
class CreateImg:
|
||||
def __init__(self,
|
||||
w,
|
||||
h,
|
||||
img_w=0,
|
||||
img_h=0,
|
||||
color='white',
|
||||
image_type='RGBA',
|
||||
font_size=10,
|
||||
background='',
|
||||
ttf='yz.ttf',
|
||||
divisor=1):
|
||||
self.w = int(w)
|
||||
self.h = int(h)
|
||||
self.img_w = int(img_w)
|
||||
self.img_h = int(img_h)
|
||||
self.current_w = 0
|
||||
self.current_h = 0
|
||||
self.ttfont = ImageFont.truetype(TTF_PATH + ttf, int(font_size))
|
||||
if not background:
|
||||
self.markImg = Image.new(image_type, (self.w, self.h), color)
|
||||
else:
|
||||
if w == 0 and h == 0:
|
||||
self.markImg = Image.open(background)
|
||||
w, h = self.markImg.size
|
||||
if divisor:
|
||||
self.w = int(divisor * w)
|
||||
self.h = int(divisor * h)
|
||||
self.markImg = self.markImg.resize((self.w, self.h), Image.ANTIALIAS)
|
||||
else:
|
||||
self.w = w
|
||||
self.h = h
|
||||
else:
|
||||
self.markImg = Image.open(background).resize((self.w, self.h), Image.ANTIALIAS)
|
||||
self.draw = ImageDraw.Draw(self.markImg)
|
||||
self.size = self.w, self.h
|
||||
|
||||
# 贴图
|
||||
def paste(self, img, pos=None, alpha=False):
|
||||
if isinstance(img, CreateImg):
|
||||
img = img.markImg
|
||||
if self.current_w == self.w:
|
||||
self.current_w = 0
|
||||
self.current_h += self.img_h
|
||||
if not pos:
|
||||
pos = (self.current_w, self.current_h)
|
||||
if alpha:
|
||||
try:
|
||||
self.markImg.paste(img, pos, img)
|
||||
except ValueError:
|
||||
img = img.convert("RGBA")
|
||||
self.markImg.paste(img, pos, img)
|
||||
else:
|
||||
self.markImg.paste(img, pos)
|
||||
self.current_w += self.img_w
|
||||
return self.markImg
|
||||
|
||||
# 获取文字大小
|
||||
def getsize(self, msg):
|
||||
return self.ttfont.getsize(msg)
|
||||
|
||||
# 写字
|
||||
def text(self, pos, text, fill=(0, 0, 0)):
|
||||
self.draw.text(pos, text, fill=fill, font=self.ttfont)
|
||||
return self.markImg
|
||||
|
||||
# 饼图
|
||||
def pieslice(self):
|
||||
self.draw.pieslice((350, 50, 500, 200), -150, -30, 'pink', 'crimson')
|
||||
return self.markImg
|
||||
|
||||
# 保存
|
||||
def save(self, path):
|
||||
self.markImg.save(path)
|
||||
|
||||
# 显示
|
||||
def show(self):
|
||||
self.markImg.show(self.markImg)
|
||||
|
||||
# 压缩
|
||||
def resize(self, ratio):
|
||||
self.markImg = self.markImg.resize((int(self.w * ratio), int(self.h * ratio)), Image.ANTIALIAS)
|
||||
self.w, self.h = self.markImg.size
|
||||
self.size = self.w, self.h
|
||||
self.draw = ImageDraw.Draw(self.markImg)
|
||||
|
||||
# 检查字体大小
|
||||
def check_font_size(self, word):
|
||||
return self.ttfont.getsize(word)[0] > self.w
|
||||
|
||||
# 透明化
|
||||
def transparent(self, n=0):
|
||||
self.markImg = self.markImg.convert('RGBA') # 修改颜色通道为RGBA
|
||||
x, y = self.markImg.size # 获得长和宽
|
||||
# 设置每个像素点颜色的透明度
|
||||
for i in range(n, x - n):
|
||||
for k in range(n, y - n):
|
||||
color = self.markImg.getpixel((i, k))
|
||||
color = color[:-1] + (100, )
|
||||
self.markImg.putpixel((i, k), color)
|
||||
return self.markImg
|
||||
|
||||
# 转bs4:
|
||||
def pic2bs4(self):
|
||||
buf = BytesIO()
|
||||
self.markImg.save(buf, format='PNG')
|
||||
base64_str = base64.b64encode(buf.getvalue()).decode()
|
||||
return 'base64://' + base64_str
|
||||
|
||||
#
|
||||
def convert(self, itype):
|
||||
self.markImg = self.markImg.convert(itype)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,86 +0,0 @@
|
||||
from configs.path_config import IMAGE_PATH, VOICE_PATH
|
||||
from nonebot.adapters.cqhttp.message import MessageSegment
|
||||
import os
|
||||
from services.log import logger
|
||||
|
||||
|
||||
def image(img_name: str = None, path: str = '', abspath: str = None, b64: str = None):
|
||||
if abspath:
|
||||
if os.path.exists(abspath):
|
||||
return MessageSegment.image("file:///" + abspath)
|
||||
else:
|
||||
return ''
|
||||
elif b64:
|
||||
if b64.find('base64://') != -1:
|
||||
return MessageSegment.image(b64)
|
||||
else:
|
||||
return MessageSegment.image('base64://' + b64)
|
||||
else:
|
||||
img_name = str(img_name)
|
||||
if img_name.find('http') == -1:
|
||||
if len(img_name.split('.')) == 1:
|
||||
img_name += '.jpg'
|
||||
if os.path.exists(IMAGE_PATH + path + '/' + img_name):
|
||||
return MessageSegment.image("file:///" + IMAGE_PATH + path + '/' + img_name)
|
||||
else:
|
||||
logger.warning(f"图片 {path}/{img_name}缺失.")
|
||||
return ''
|
||||
else:
|
||||
return MessageSegment.image(img_name)
|
||||
|
||||
|
||||
def at(qq):
|
||||
return MessageSegment.at(qq)
|
||||
|
||||
|
||||
def record(voice_name='', path=''):
|
||||
if len(voice_name.split('.')) == 1:
|
||||
voice_name += '.mp3'
|
||||
if path == "":
|
||||
name = VOICE_PATH + "{}.".format(voice_name)
|
||||
else:
|
||||
name = VOICE_PATH + "{}/{}".format(path, voice_name)
|
||||
if voice_name.find('http') == -1:
|
||||
if os.path.exists(name):
|
||||
result = MessageSegment.record("file:///" + name)
|
||||
return result
|
||||
else:
|
||||
logger.warning(f"语音{path}/{voice_name}缺失...")
|
||||
return ""
|
||||
else:
|
||||
return MessageSegment.record(voice_name)
|
||||
|
||||
|
||||
def text(msg):
|
||||
return MessageSegment.text(msg)
|
||||
|
||||
|
||||
def contact_user(qq):
|
||||
return MessageSegment.contact_user(qq)
|
||||
|
||||
|
||||
def share(url, title, content='', image_url=''):
|
||||
return MessageSegment.share(url, title, content, image_url)
|
||||
|
||||
|
||||
def xml(data):
|
||||
return MessageSegment.xml(data)
|
||||
|
||||
|
||||
def json(data):
|
||||
return MessageSegment.json(data)
|
||||
|
||||
|
||||
def face(id_):
|
||||
return MessageSegment.face(id_)
|
||||
|
||||
|
||||
def poke(qq):
|
||||
return MessageSegment('poke', {"qq": qq})
|
||||
|
||||
|
||||
def forward():
|
||||
return MessageSegment.forward()
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# print(get_record_result("dadada", "", type="amr"))
|
||||
274
util/langconv.py
274
util/langconv.py
@ -1,274 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from copy import deepcopy
|
||||
import re
|
||||
|
||||
try:
|
||||
import psyco
|
||||
psyco.full()
|
||||
except:
|
||||
pass
|
||||
|
||||
from .zh_wiki import zh2Hant, zh2Hans
|
||||
|
||||
import sys
|
||||
py3k = sys.version_info >= (3, 0, 0)
|
||||
|
||||
if py3k:
|
||||
UEMPTY = ''
|
||||
else:
|
||||
_zh2Hant, _zh2Hans = {}, {}
|
||||
for old, new in ((zh2Hant, _zh2Hant), (zh2Hans, _zh2Hans)):
|
||||
for k, v in old.items():
|
||||
new[k.decode('utf8')] = v.decode('utf8')
|
||||
zh2Hant = _zh2Hant
|
||||
zh2Hans = _zh2Hans
|
||||
UEMPTY = ''.decode('utf8')
|
||||
|
||||
# states
|
||||
(START, END, FAIL, WAIT_TAIL) = list(range(4))
|
||||
# conditions
|
||||
(TAIL, ERROR, MATCHED_SWITCH, UNMATCHED_SWITCH, CONNECTOR) = list(range(5))
|
||||
|
||||
MAPS = {}
|
||||
|
||||
class Node(object):
|
||||
def __init__(self, from_word, to_word=None, is_tail=True,
|
||||
have_child=False):
|
||||
self.from_word = from_word
|
||||
if to_word is None:
|
||||
self.to_word = from_word
|
||||
self.data = (is_tail, have_child, from_word)
|
||||
self.is_original = True
|
||||
else:
|
||||
self.to_word = to_word or from_word
|
||||
self.data = (is_tail, have_child, to_word)
|
||||
self.is_original = False
|
||||
self.is_tail = is_tail
|
||||
self.have_child = have_child
|
||||
|
||||
def is_original_long_word(self):
|
||||
return self.is_original and len(self.from_word)>1
|
||||
|
||||
def is_follow(self, chars):
|
||||
return chars != self.from_word[:-1]
|
||||
|
||||
def __str__(self):
|
||||
return '<Node, %s, %s, %s, %s>' % (repr(self.from_word),
|
||||
repr(self.to_word), self.is_tail, self.have_child)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
class ConvertMap(object):
|
||||
def __init__(self, name, mapping=None):
|
||||
self.name = name
|
||||
self._map = {}
|
||||
if mapping:
|
||||
self.set_convert_map(mapping)
|
||||
|
||||
def set_convert_map(self, mapping):
|
||||
convert_map = {}
|
||||
have_child = {}
|
||||
max_key_length = 0
|
||||
for key in sorted(mapping.keys()):
|
||||
if len(key)>1:
|
||||
for i in range(1, len(key)):
|
||||
parent_key = key[:i]
|
||||
have_child[parent_key] = True
|
||||
have_child[key] = False
|
||||
max_key_length = max(max_key_length, len(key))
|
||||
for key in sorted(have_child.keys()):
|
||||
convert_map[key] = (key in mapping, have_child[key],
|
||||
mapping.get(key, UEMPTY))
|
||||
self._map = convert_map
|
||||
self.max_key_length = max_key_length
|
||||
|
||||
def __getitem__(self, k):
|
||||
try:
|
||||
is_tail, have_child, to_word = self._map[k]
|
||||
return Node(k, to_word, is_tail, have_child)
|
||||
except:
|
||||
return Node(k)
|
||||
|
||||
def __contains__(self, k):
|
||||
return k in self._map
|
||||
|
||||
def __len__(self):
|
||||
return len(self._map)
|
||||
|
||||
class StatesMachineException(Exception): pass
|
||||
|
||||
class StatesMachine(object):
|
||||
def __init__(self):
|
||||
self.state = START
|
||||
self.final = UEMPTY
|
||||
self.len = 0
|
||||
self.pool = UEMPTY
|
||||
|
||||
def clone(self, pool):
|
||||
new = deepcopy(self)
|
||||
new.state = WAIT_TAIL
|
||||
new.pool = pool
|
||||
return new
|
||||
|
||||
def feed(self, char, map):
|
||||
node = map[self.pool+char]
|
||||
|
||||
if node.have_child:
|
||||
if node.is_tail:
|
||||
if node.is_original:
|
||||
cond = UNMATCHED_SWITCH
|
||||
else:
|
||||
cond = MATCHED_SWITCH
|
||||
else:
|
||||
cond = CONNECTOR
|
||||
else:
|
||||
if node.is_tail:
|
||||
cond = TAIL
|
||||
else:
|
||||
cond = ERROR
|
||||
|
||||
new = None
|
||||
if cond == ERROR:
|
||||
self.state = FAIL
|
||||
elif cond == TAIL:
|
||||
if self.state == WAIT_TAIL and node.is_original_long_word():
|
||||
self.state = FAIL
|
||||
else:
|
||||
self.final += node.to_word
|
||||
self.len += 1
|
||||
self.pool = UEMPTY
|
||||
self.state = END
|
||||
elif self.state == START or self.state == WAIT_TAIL:
|
||||
if cond == MATCHED_SWITCH:
|
||||
new = self.clone(node.from_word)
|
||||
self.final += node.to_word
|
||||
self.len += 1
|
||||
self.state = END
|
||||
self.pool = UEMPTY
|
||||
elif cond == UNMATCHED_SWITCH or cond == CONNECTOR:
|
||||
if self.state == START:
|
||||
new = self.clone(node.from_word)
|
||||
self.final += node.to_word
|
||||
self.len += 1
|
||||
self.state = END
|
||||
else:
|
||||
if node.is_follow(self.pool):
|
||||
self.state = FAIL
|
||||
else:
|
||||
self.pool = node.from_word
|
||||
elif self.state == END:
|
||||
# END is a new START
|
||||
self.state = START
|
||||
new = self.feed(char, map)
|
||||
elif self.state == FAIL:
|
||||
raise StatesMachineException('Translate States Machine '
|
||||
'have error with input data %s' % node)
|
||||
return new
|
||||
|
||||
def __len__(self):
|
||||
return self.len + 1
|
||||
|
||||
def __str__(self):
|
||||
return '<StatesMachine %s, pool: "%s", state: %s, final: %s>' % (
|
||||
id(self), self.pool, self.state, self.final)
|
||||
__repr__ = __str__
|
||||
|
||||
class Converter(object):
|
||||
def __init__(self, to_encoding):
|
||||
self.to_encoding = to_encoding
|
||||
self.map = MAPS[to_encoding]
|
||||
self.start()
|
||||
|
||||
def feed(self, char):
|
||||
branches = []
|
||||
for fsm in self.machines:
|
||||
new = fsm.feed(char, self.map)
|
||||
if new:
|
||||
branches.append(new)
|
||||
if branches:
|
||||
self.machines.extend(branches)
|
||||
self.machines = [fsm for fsm in self.machines if fsm.state != FAIL]
|
||||
all_ok = True
|
||||
for fsm in self.machines:
|
||||
if fsm.state != END:
|
||||
all_ok = False
|
||||
if all_ok:
|
||||
self._clean()
|
||||
return self.get_result()
|
||||
|
||||
def _clean(self):
|
||||
if len(self.machines):
|
||||
self.machines.sort(key=lambda x: len(x))
|
||||
# self.machines.sort(cmp=lambda x,y: cmp(len(x), len(y)))
|
||||
self.final += self.machines[0].final
|
||||
self.machines = [StatesMachine()]
|
||||
|
||||
def start(self):
|
||||
self.machines = [StatesMachine()]
|
||||
self.final = UEMPTY
|
||||
|
||||
def end(self):
|
||||
self.machines = [fsm for fsm in self.machines
|
||||
if fsm.state == FAIL or fsm.state == END]
|
||||
self._clean()
|
||||
|
||||
def convert(self, string):
|
||||
self.start()
|
||||
for char in string:
|
||||
self.feed(char)
|
||||
self.end()
|
||||
return self.get_result()
|
||||
|
||||
def get_result(self):
|
||||
return self.final
|
||||
|
||||
|
||||
def registery(name, mapping):
|
||||
global MAPS
|
||||
MAPS[name] = ConvertMap(name, mapping)
|
||||
|
||||
registery('zh-hant', zh2Hant)
|
||||
registery('zh-hans', zh2Hans)
|
||||
del zh2Hant, zh2Hans
|
||||
|
||||
|
||||
def run():
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser()
|
||||
parser.add_option('-e', type='string', dest='encoding',
|
||||
help='encoding')
|
||||
parser.add_option('-f', type='string', dest='file_in',
|
||||
help='input file (- for stdin)')
|
||||
parser.add_option('-t', type='string', dest='file_out',
|
||||
help='output file')
|
||||
(options, args) = parser.parse_args()
|
||||
if not options.encoding:
|
||||
parser.error('encoding must be set')
|
||||
if options.file_in:
|
||||
if options.file_in == '-':
|
||||
file_in = sys.stdin
|
||||
else:
|
||||
file_in = open(options.file_in)
|
||||
else:
|
||||
file_in = sys.stdin
|
||||
if options.file_out:
|
||||
if options.file_out == '-':
|
||||
file_out = sys.stdout
|
||||
else:
|
||||
file_out = open(options.file_out, 'wb')
|
||||
else:
|
||||
file_out = sys.stdout
|
||||
|
||||
c = Converter(options.encoding)
|
||||
for line in file_in:
|
||||
# print >> file_out, c.convert(line.rstrip('\n').decode(
|
||||
file_out.write(c.convert(line.rstrip('\n').decode(
|
||||
'utf8')).encode('utf8'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
@ -1,47 +0,0 @@
|
||||
import random
|
||||
|
||||
|
||||
user_agent = [
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
|
||||
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
|
||||
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
||||
"Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
||||
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
||||
"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
||||
"MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
||||
"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
|
||||
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
|
||||
"Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
|
||||
"Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
|
||||
"Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
|
||||
"UCWEB7.0.2.37/28/999",
|
||||
"NOKIA5700/ UCWEB7.0.2.37/28/999",
|
||||
"Openwave/ UCWEB7.0.2.37/28/999",
|
||||
"Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999",
|
||||
# iPhone 6:
|
||||
"Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25"
|
||||
]
|
||||
|
||||
|
||||
def get_user_agent():
|
||||
return {'User-Agent': random.choice(user_agent)}
|
||||
263
util/utils.py
263
util/utils.py
@ -1,263 +0,0 @@
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from collections import defaultdict
|
||||
from nonebot import require
|
||||
import nonebot
|
||||
import json
|
||||
import pytz
|
||||
from configs.path_config import TXT_PATH
|
||||
from configs.config import system_proxy
|
||||
import pypinyin
|
||||
|
||||
|
||||
scheduler = require('nonebot_plugin_apscheduler').scheduler
|
||||
|
||||
|
||||
# 次数检测
|
||||
class CountLimiter:
|
||||
def __init__(self, max):
|
||||
self.count = defaultdict(int)
|
||||
self.max = max
|
||||
|
||||
def add(self, key):
|
||||
self.count[key] += 1
|
||||
|
||||
def check(self, key) -> bool:
|
||||
if self.count[key] >= self.max:
|
||||
self.count[key] = 0
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# 用户正在执行此命令
|
||||
class UserExistLimiter:
|
||||
def __init__(self):
|
||||
self.mbool = defaultdict(bool)
|
||||
self.time = time.time()
|
||||
|
||||
def set_True(self, key):
|
||||
self.time = time.time()
|
||||
self.mbool[key] = True
|
||||
|
||||
def set_False(self, key):
|
||||
self.mbool[key] = False
|
||||
|
||||
def check(self, key):
|
||||
if time.time() - self.time > 30:
|
||||
self.set_False(key)
|
||||
return False
|
||||
return self.mbool[key]
|
||||
|
||||
|
||||
# 命令cd
|
||||
class FreqLimiter:
|
||||
def __init__(self, default_cd_seconds):
|
||||
self.next_time = defaultdict(float)
|
||||
self.default_cd = default_cd_seconds
|
||||
|
||||
def check(self, key) -> bool:
|
||||
return time.time() >= self.next_time[key]
|
||||
|
||||
def start_cd(self, key, cd_time=0):
|
||||
self.next_time[key] = time.time() + (cd_time if cd_time > 0 else self.default_cd)
|
||||
|
||||
def left_time(self, key) -> float:
|
||||
return self.next_time[key] - time.time()
|
||||
|
||||
|
||||
static_flmt = FreqLimiter(15)
|
||||
|
||||
|
||||
# 恶意触发命令检测
|
||||
class BanCheckLimiter:
|
||||
def __init__(self, default_check_time: float = 5, default_count: int = 4):
|
||||
self.mint = defaultdict(int)
|
||||
self.mtime = defaultdict(float)
|
||||
self.default_check_time = default_check_time
|
||||
self.default_count = default_count
|
||||
|
||||
def add(self, key):
|
||||
if self.mint[key] == 1:
|
||||
self.mtime[key] = time.time()
|
||||
self.mint[key] += 1
|
||||
|
||||
def check(self, key) -> bool:
|
||||
# print(self.mint[key])
|
||||
# print(time.time() - self.mtime[key])
|
||||
if time.time() - self.mtime[key] > self.default_check_time:
|
||||
self.mtime[key] = time.time()
|
||||
self.mint[key] = 0
|
||||
return False
|
||||
if self.mint[key] >= self.default_count and time.time() - self.mtime[key] < self.default_check_time:
|
||||
self.mtime[key] = time.time()
|
||||
self.mint[key] = 0
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# 每日次数
|
||||
class DailyNumberLimiter:
|
||||
tz = pytz.timezone('Asia/Shanghai')
|
||||
|
||||
def __init__(self, max_num):
|
||||
self.today = -1
|
||||
self.count = defaultdict(int)
|
||||
self.max = max_num
|
||||
|
||||
def check(self, key) -> bool:
|
||||
now = datetime.now(self.tz)
|
||||
day = (now - timedelta(hours=5)).day
|
||||
if day != self.today:
|
||||
self.today = day
|
||||
self.count.clear()
|
||||
return bool(self.count[key] < self.max)
|
||||
|
||||
def get_num(self, key):
|
||||
return self.count[key]
|
||||
|
||||
def increase(self, key, num=1):
|
||||
self.count[key] += num
|
||||
|
||||
def reset(self, key):
|
||||
self.count[key] = 0
|
||||
|
||||
|
||||
def is_number(s) -> bool:
|
||||
try:
|
||||
float(s)
|
||||
return True
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
import unicodedata
|
||||
unicodedata.numeric(s)
|
||||
return True
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def get_lines(path: str, start: int = 0, end: int = 0) -> list:
|
||||
l = []
|
||||
with open(path, 'r', errors='ignore', encoding="UTF-8") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
if line != "\n" and line != "":
|
||||
if end == 0:
|
||||
l.append(line[start:])
|
||||
else:
|
||||
l.append(line[start: end])
|
||||
return l
|
||||
|
||||
|
||||
# 获取bot
|
||||
def get_bot():
|
||||
return list(nonebot.get_bots().values())[0]
|
||||
|
||||
|
||||
def get_message_at(data: str) -> list:
|
||||
qq_list = []
|
||||
data = json.loads(data)
|
||||
try:
|
||||
for msg in data['message']:
|
||||
if msg['type'] == 'at':
|
||||
qq_list.append(int(msg['data']['qq']))
|
||||
return qq_list
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def get_message_imgs(data: str) -> list:
|
||||
img_list = []
|
||||
data = json.loads(data)
|
||||
try:
|
||||
for msg in data['message']:
|
||||
if msg['type'] == 'image':
|
||||
img_list.append(msg['data']['url'])
|
||||
return img_list
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def get_message_text(data: str) -> str:
|
||||
data = json.loads(data)
|
||||
result = ''
|
||||
try:
|
||||
for msg in data['message']:
|
||||
if msg['type'] == 'text':
|
||||
result += msg['data']['text'].strip() + ' '
|
||||
return result.strip()
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
def get_message_type(data: str) -> str:
|
||||
return json.loads(data)['message_type']
|
||||
|
||||
|
||||
def get_message_record(data: str) -> str:
|
||||
data = json.loads(data)
|
||||
try:
|
||||
for msg in data['message']:
|
||||
if msg['type'] == 'record':
|
||||
return msg['data']['url']
|
||||
return ''
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
|
||||
def get_message_json(data: str) -> dict:
|
||||
data = json.loads(data)
|
||||
try:
|
||||
for msg in data['message']:
|
||||
if msg['type'] == 'json':
|
||||
return msg['data']
|
||||
return {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
def add_to_16(value):
|
||||
while len(value) % 16 != 0:
|
||||
value += '\0'
|
||||
return str.encode(value)
|
||||
|
||||
|
||||
# 获取文本加密后的cookie
|
||||
def get_cookie_text(cookie_name: str) -> str:
|
||||
with open(TXT_PATH + "cookie/" + cookie_name + ".txt", 'r') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
# 获取本地http代理
|
||||
def get_local_proxy():
|
||||
# from urllib.request import getproxies
|
||||
# import platform
|
||||
# proxy = getproxies()['http']
|
||||
# if platform.system() != 'Windows':
|
||||
# proxy = 'http://' + proxy
|
||||
return system_proxy if system_proxy else None
|
||||
|
||||
|
||||
# 判断是否为中文
|
||||
def is_Chinese(word):
|
||||
for ch in word:
|
||||
if '\u4e00' <= ch <= '\u9fff':
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def user_avatar(qq):
|
||||
return f'http://q1.qlogo.cn/g?b=qq&nk={qq}&s=160'
|
||||
|
||||
|
||||
def group_avatar(group_id):
|
||||
return f'http://p.qlogo.cn/gh/{group_id}/{group_id}/640/'
|
||||
|
||||
|
||||
def cn2py(word) -> str:
|
||||
temp = ""
|
||||
for i in pypinyin.pinyin(word, style=pypinyin.NORMAL):
|
||||
temp += ''.join(i)
|
||||
return temp
|
||||
|
||||
8275
util/zh_wiki.py
8275
util/zh_wiki.py
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user