Add files via upload

This commit is contained in:
HibiKier 2021-05-20 17:41:00 +08:00 committed by GitHub
parent 954cbc837a
commit dd3c42fb60
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 9475 additions and 0 deletions

94
bot.py Normal file
View File

@ -0,0 +1,94 @@
import nonebot
from nonebot.adapters.cqhttp import Bot as CQHTTPBot
from services.db_context import init, disconnect
nonebot.init()
driver = nonebot.get_driver()
driver.register_adapter("cqhttp", CQHTTPBot)
config = driver.config
driver.on_startup(init)
driver.on_shutdown(disconnect)
nonebot.load_builtin_plugins()
nonebot.load_plugins("plugins")
nonebot.load_plugins("plugins/shop")
nonebot.load_plugins("plugins/genshin")
if __name__ == "__main__":
nonebot.run()
# None
# ---------------------
# 775757368 print(event.get_user_id())
# 775757368 print(event.get_session_id())
# 天气 print(event.get_message())
# message.group.normal print(event.get_event_name())
# 天气 print(event.get_plaintext())
# -------
# 863633108 print(event.group_id)
# 775757368 print(event.user_id)
# 1851212230 print(event.message_id)
# event
# [request.group.invite]: {
# 'time': 1612430661, 'self_id': 3054557284, 'post_type': 'request', 'request_type': 'group', 'sub_type': 'invite',
# 'group_id': 863633108, 'user_id': 775757368, 'comment': '', 'flag': '1612430661235986'}
# [request.friend]: {'time': 1612431762, 'self_id': 3054557284, 'post_type': 'request',
# 'request_type': 'friend', 'user_id': 3238573864, 'comment': '', 'flag': '1612431762000000'}
# [notice.group_decrease.leave]: {'time': 1612620312,
# 'self_id': 3054557284, 'post_type': 'notice', 'notice_type': 'group_decrease',
# 'sub_type': 'leave', 'user_id': 3238573864, 'group_id': 863633108, 'operator_id': 3238573864}
# [notice.group_increase.approve]: {'time': 1612620506,
# 'self_id': 3054557284, 'post_type': 'notice', 'notice_type': 'group_increase',
# 'sub_type': 'approve', 'user_id': 3238573864, 'group_id': 863633108, 'operator_id': 0}
# get_group_list
# [{'group_id': 210287674, 'group_name': '豪爹头号粉丝⑧群', 'max_member_count': 200, 'member_count': 14},
# {'group_id': 863633108, 'group_name': 'Amireux、这里是、可…', 'max_member_count': 200, 'member_count': 4}]
# 消息event
# {"time": 1613886297, "self_id": 3054557284, "post_type": "message", "sub_type": "normal", "user_id": 3238573864,
# "message_type": "group", "message_id": 1933353523, "message": [{"type": "text", "data": {"text": "666"}}],
# "raw_message": "A666", "font": 0, "sender": {"user_id": 3238573864, "nickname":
# "\u53ef\u7231\u7684\u5c0f\u771f\u5bfb", "sex": "unknown", "age": 0, "card": "", "area": "", "level": "",
# "role": "member/admin/owner", "title": ""}, "to_me": true, "reply": null, "group_id": 863633108, "anonymous": null}
# bilibili转发
# {"app":"com.tencent.miniapp_01","config":{"autoSize":0,"ctime":1613992391,"forward":1,"height":0,"
# token":"f7f529900be6af62f4d864f8a92c94c9","type":"normal","width":0},"desc":"哔哩哔哩",
# "extra":{"app_type":1,"appid":100951776,"uin":775757368},"meta":{"detail_1":{"appid":"1109937557",
# "desc":"B 站 用 户 三 连 现 状","gamePoints":"","gamePointsUrl":"","host":{"nick":"这里是","uin":775757368},
# "icon":"http://miniapp.gtimg.cn/public/appicon/432b76be3a548fc128acaa6c1ec90131_200.jpg",
# "preview":"pubminishare-30161.picsz.qpic.cn/4f5a19fb-42d5-4bb5-bc0a-b92fa5a06519",
# "qqdocurl":"https://b23.tv/qDvchc?share_medium=android&share_source=qq&bbid=XYDEA6CD35717661AE594D9DD99A5E852E414&ts=1613992387314",
# "scene":1036,"shareTemplateData":{},"shareTemplateId":"8C8E89B49BE609866298ADDFF2DBABA4","showLittleTail":"","title":"哔哩哔哩",
# "url":"m.q.qq.com/a/s/130c1f9c2af58430805ebfda192caa9a"}},"needShareCallBack":false,"prompt":"[QQ小程序]哔哩哔哩","ver":"1.0.0.19",
# "view":"view_8C8E89B49BE609866298ADDFF2DBABA4"}
#event
# [notice.group_decrease.kick_me]: {'time': 1614143313, 'self_id': 3054557284, 'post_type': 'notice',
# 'notice_type': 'group_decrease', 'sub_type': 'kick_me', 'user_id': 3054557284, 'group_id': 863633108,
# 'operator_id': 775757368}
# [request.group.add]: {'time': 1614851972, 'self_id': 3238573864, 'post_type': 'request', 'request_type': 'group',
# 'sub_type': 'add', 'group_id': 774261838, 'user_id': 3054557284, 'comment': '问题:为啥加群鸭?\n答案哈哈哈',
# 'flag': '1614851972274444'}

140
util/get_bilibili_img.py Normal file
View File

@ -0,0 +1,140 @@
import requests
from util.user_agent import get_user_agent
from bs4 import BeautifulSoup
from time import sleep
import threading
import os
from configs.path_config import IMAGE_PATH
lock = threading.Lock()
url = "https://search.bilibili.com/article"
# path = IMAGE_PATH + "setu/"
index = 1
THREAD_SUM_REMAINDER = 2 # 越小线程越多
class bilibiliThread (threading.Thread):
def __init__(self, threadId, url_list, path, nolist):
threading.Thread.__init__(self)
self.threadId = threadId
self.url_list = url_list
self.path = path
self.nolist = nolist
def run(self):
print("开始线程<><><><><><><><><> " + self.threadId)
thread_get_url(self.threadId, self.url_list, self.path, self.nolist)
def get_bilibili_img(name, path, nolist=None):
global index
index = get_dirfile_len(path)
print("index===", index)
threadId = 1
params = {
'keyword': name,
'page': '1'
}
res = requests.get(url, headers=get_user_agent(), params=params)
sleep(8)
soup = BeautifulSoup(res.text, 'html.parser')
# print(soup.text)
try:
total_page = soup.find_all('button', {'class': 'pagination-btn'})[-1].text.strip()
print("1 try")
except:
try:
total_page = soup.find_all('button', {'class': 'pagination-btn num-btn'})[-1].text.strip()
print("2 try")
except:
total_page = 1
print("3 except")
print(total_page)
url_list = []
for page in range(1, int(total_page)+1):
url_r = "https://search.bilibili.com/article?keyword=" + name + "&page=" + str(page)
url_list.append(url_r)
if page % THREAD_SUM_REMAINDER == 0:
print('-----> ' + str(page) + " =======>", url_list)
# _thread.start_new_thread(thread_get_url, (url_list, path,))
bilibiliThread(str(threadId), url_list, path, nolist).start()
threadId += 1
sleep(0.5)
url_list = []
if url_list:
print("=========================最后一个线程启动========================= url数量: ", len(url_list))
bilibiliThread(str(threadId), url_list, path, nolist).start()
def thread_get_url(threadId, url_list, path, nolist):
for url in url_list:
res = requests.get(url, headers=get_user_agent())
sleep(2)
soup = BeautifulSoup(res.text, 'lxml')
alist = soup.find_all('a', {'class': 'poster'})
img_content_page = []
# print(alist)
for a in alist:
if nolist != None:
if a.get('href') not in nolist:
img_content_page.append("https://" + a.get('href')[2:])
else:
img_content_page.append("https://" + a.get('href')[2:])
pic_url = []
for img_content in img_content_page:
print("开始获取---------->", img_content)
res = requests.get(img_content, headers=get_user_agent())
sleep(2)
soup = BeautifulSoup(res.text, 'lxml')
figure_ls = soup.body.find_all('figure')
# print(figure_ls)
for figure in figure_ls:
try:
_ = figure.img.attrs['class']
except:
data_src = figure.img.attrs['data-src']
pic_url.append('https:' + data_src)
print("线程 " + threadId + " 获取完毕------> 开始存储")
for url in pic_url:
print("线程 " + threadId + "正在存储---------------->", url)
res = requests.get(url, headers=get_user_agent())
save_img(res.content, path, threadId)
pic_url = []
print("线程 " + threadId + " ---------------->执行完毕")
def save_img(img, path, threadId):
global index
try:
lock.acquire()
img_index = index
finally:
lock.release()
try:
with open(path + str(img_index) + ".jpg", 'wb') as f:
f.write(img)
lock.acquire()
index += 1
except:
print("线程 " + threadId + "存储失败-------->" + str(img_index) + ".jpg")
finally:
lock.release()
def get_dirfile_len(path):
return len(os.listdir(path))
if __name__ == '__main__':
# url = "https://search.bilibili.com" \
# "/article?keyword=%23%E4%BB%8A%E6%97%A5%E4%BB%BD%E7%9A%84%E5%8F%AF%E7%88%B1%" \
# "E5%B0%8F%E8%90%9D%E8%8E%89%EF%BC%8C%E8%BF%9B%E6%9D%A5%E7%9C%8B%E7%9C%8B%EF%BC%8C%E" \
# "6%8F%90%E7%A5%9E%E9%86%92%E8%84%91%EF%BC%81"
# res = requests.get(url, headers=get_user_agent())
# sleep(2)
# soup = BeautifulSoup(res.text, 'lxml')
# alist = soup.find_all('button', {'class': 'pagination-btn num-btn'})
# total_page = soup.find_all('button', {'class': 'pagination-btn num-btn'})[-1].text.strip()
# print(total_page)
get_bilibili_img("精选动漫壁纸手机电脑壁纸&动漫游戏专题", IMAGE_PATH + "bizhi/")

296
util/img_utils.py Normal file
View File

@ -0,0 +1,296 @@
import os
from configs.path_config import IMAGE_PATH, TXT_PATH, TTF_PATH
from PIL import Image, ImageFile, ImageDraw, ImageFont
import cv2
import imagehash
import base64
from io import BytesIO
from matplotlib import pyplot as plt
# 扫描图库id是否连贯
def scan_img(path):
path = IMAGE_PATH + path
nolist = []
length = len(os.listdir(path))
print(length)
for i in range(length):
if i in nolist:
continue
img_path = path + "{}.jpg".format(i)
if not os.path.exists(img_path):
print("不存在=== " + str(length) + ".jpg -------> " + str(i) + ".jpg")
os.rename(path + "{}.jpg".format(length - 1), img_path)
nolist.append(length)
length -= 1
# 比较hash值
def compare_image_with_hash(image_file1, image_file2, max_dif=1.5):
"""
max_dif: 允许最大hash差值, 越小越精确,最小为0
推荐使用
"""
ImageFile.LOAD_TRUNCATED_IMAGES = True
hash_1 = None
hash_2 = None
hash_1 = get_img_hash(image_file1)
hash_2 = get_img_hash(image_file2)
dif = hash_1 - hash_2
if dif < 0:
dif = -dif
if dif <= max_dif:
return True
else:
return False
# 比较图片与hash值
def compare_one_img_hash(image_file, hash_2, max_dif=1.5):
hash_1 = get_img_hash(image_file)
dif = hash_1 - hash_2
if dif < 0:
dif = -dif
if dif <= max_dif:
return True
else:
return False
def get_img_hash(image_file):
with open(image_file, 'rb') as fp:
hash_value = imagehash.average_hash(Image.open(fp))
return hash_value
# 压缩图片
def rar_imgs(inpath, outpath, ratio=0.9, start=0, end=0, lens=0, maxsize=0.0, in_file_name='', out_file_name='',
itype='jpg'):
in_path = IMAGE_PATH + inpath + '/'
out_path = IMAGE_PATH + outpath + '/'
# scan_img(inpath)
l = []
if in_file_name != '' and out_file_name != '':
filein = in_path + in_file_name + "." + itype
fileout = out_path + out_file_name + "." + itype
h, w, d = cv2.imread(filein).shape
width = int(w * ratio)
height = int(h * ratio)
ResizeImage(filein, fileout, width, height)
else:
if lens == 0:
lens = len(os.listdir(in_path))
if end == 0:
end = lens
for i in range(start, end):
if i in l:
continue
if maxsize != 0:
if os.path.getsize(in_path + str(i) + ".jpg") > maxsize:
print("压缩----->", i, ".jpg")
filein = in_path + str(i) + ".jpg"
fileout = out_path + str(i) + ".jpg"
h, w, d = cv2.imread(filein).shape
width = int(w * ratio)
height = int(h * ratio)
ResizeImage(filein, fileout, width, height)
else:
continue
else:
print("压缩----->", i, ".jpg")
filein = in_path + str(i) + ".jpg"
fileout = out_path + str(i) + ".jpg"
h, w, d = cv2.imread(filein).shape
width = int(w * ratio)
height = int(h * ratio)
ResizeImage(filein, fileout, width, height)
# 压缩
def ResizeImage(filein, fileout, width, height):
img = cv2.resize(cv2.imread(filein), (int(width), int(height)))
cv2.imwrite(fileout, img)
# 保存图片压缩后的hash值
def save_img_hash(path, name):
for file in os.listdir(IMAGE_PATH + path):
if os.path.getsize(IMAGE_PATH + path + file) > 1024 * 1024 * 1.5:
compare_img_hash_in_txt(IMAGE_PATH + 'rar/' + file, name)
else:
compare_img_hash_in_txt(IMAGE_PATH + path + file, name)
# 比较色图hash值
def compare_img_hash_in_txt(file, name, mode=1):
with open(TXT_PATH + name + ".txt", 'a+') as txtfile:
txtfile.seek(0)
hash_list = txtfile.read()[:-1].strip(",")
txtfile.seek(2)
with open(file, 'rb') as fp:
img_hash = str(imagehash.average_hash(Image.open(fp)))
if img_hash not in hash_list:
if mode == 1:
txtfile.write(img_hash + ",")
return False
return True
# 透明背景 -> 白色
def alphabg2white_PIL(img):
img = img.convert('RGBA')
sp = img.size
width = sp[0]
height = sp[1]
for yh in range(height):
for xw in range(width):
dot = (xw, yh)
color_d = img.getpixel(dot)
if color_d[3] == 0:
color_d = (255, 255, 255, 255)
img.putpixel(dot, color_d)
return img
def pic2b64(pic: Image) -> str:
buf = BytesIO()
pic.save(buf, format='PNG')
base64_str = base64.b64encode(buf.getvalue()).decode()
return 'base64://' + base64_str
def fig2b64(plt: plt) -> str:
buf = BytesIO()
plt.savefig(buf, format='PNG', dpi=100)
base64_str = base64.b64encode(buf.getvalue()).decode()
return 'base64://' + base64_str
class CreateImg:
def __init__(self,
w,
h,
img_w=0,
img_h=0,
color='white',
image_type='RGBA',
font_size=10,
background='',
ttf='yz.ttf',
divisor=1):
self.w = int(w)
self.h = int(h)
self.img_w = int(img_w)
self.img_h = int(img_h)
self.current_w = 0
self.current_h = 0
self.ttfont = ImageFont.truetype(TTF_PATH + ttf, int(font_size))
if not background:
self.markImg = Image.new(image_type, (self.w, self.h), color)
else:
if w == 0 and h == 0:
self.markImg = Image.open(background)
w, h = self.markImg.size
if divisor:
self.w = int(divisor * w)
self.h = int(divisor * h)
self.markImg = self.markImg.resize((self.w, self.h), Image.ANTIALIAS)
else:
self.w = w
self.h = h
else:
self.markImg = Image.open(background).resize((self.w, self.h), Image.ANTIALIAS)
self.draw = ImageDraw.Draw(self.markImg)
self.size = self.w, self.h
# 贴图
def paste(self, img, pos=None, alpha=False):
if isinstance(img, CreateImg):
img = img.markImg
if self.current_w == self.w:
self.current_w = 0
self.current_h += self.img_h
if not pos:
pos = (self.current_w, self.current_h)
if alpha:
try:
self.markImg.paste(img, pos, img)
except ValueError:
img = img.convert("RGBA")
self.markImg.paste(img, pos, img)
else:
self.markImg.paste(img, pos)
self.current_w += self.img_w
return self.markImg
# 获取文字大小
def getsize(self, msg):
return self.ttfont.getsize(msg)
# 写字
def text(self, pos, text, fill=(0, 0, 0)):
self.draw.text(pos, text, fill=fill, font=self.ttfont)
return self.markImg
# 饼图
def pieslice(self):
self.draw.pieslice((350, 50, 500, 200), -150, -30, 'pink', 'crimson')
return self.markImg
# 保存
def save(self, path):
self.markImg.save(path)
# 显示
def show(self):
self.markImg.show(self.markImg)
# 压缩
def resize(self, ratio):
self.markImg = self.markImg.resize((int(self.w * ratio), int(self.h * ratio)), Image.ANTIALIAS)
self.w, self.h = self.markImg.size
self.size = self.w, self.h
self.draw = ImageDraw.Draw(self.markImg)
# 检查字体大小
def check_font_size(self, word):
return self.ttfont.getsize(word)[0] > self.w
# 透明化
def transparent(self, n=0):
self.markImg = self.markImg.convert('RGBA') # 修改颜色通道为RGBA
x, y = self.markImg.size # 获得长和宽
# 设置每个像素点颜色的透明度
for i in range(n, x - n):
for k in range(n, y - n):
color = self.markImg.getpixel((i, k))
color = color[:-1] + (100, )
self.markImg.putpixel((i, k), color)
return self.markImg
# 转bs4:
def pic2bs4(self):
buf = BytesIO()
self.markImg.save(buf, format='PNG')
base64_str = base64.b64encode(buf.getvalue()).decode()
return 'base64://' + base64_str
#
def convert(self, itype):
self.markImg = self.markImg.convert(itype)
if __name__ == '__main__':
pass

86
util/init_result.py Normal file
View File

@ -0,0 +1,86 @@
from configs.path_config import IMAGE_PATH, VOICE_PATH
from nonebot.adapters.cqhttp.message import MessageSegment
import os
from services.log import logger
def image(img_name: str = None, path: str = '', abspath: str = None, b64: str = None):
if abspath:
if os.path.exists(abspath):
return MessageSegment.image("file:///" + abspath)
else:
return ''
elif b64:
if b64.find('base64://') != -1:
return MessageSegment.image(b64)
else:
return MessageSegment.image('base64://' + b64)
else:
img_name = str(img_name)
if img_name.find('http') == -1:
if len(img_name.split('.')) == 1:
img_name += '.jpg'
if os.path.exists(IMAGE_PATH + path + '/' + img_name):
return MessageSegment.image("file:///" + IMAGE_PATH + path + '/' + img_name)
else:
logger.warning(f"图片 {path}/{img_name}缺失.")
return ''
else:
return MessageSegment.image(img_name)
def at(qq):
return MessageSegment.at(qq)
def record(voice_name='', path=''):
if len(voice_name.split('.')) == 1:
voice_name += '.mp3'
if path == "":
name = VOICE_PATH + "{}.".format(voice_name)
else:
name = VOICE_PATH + "{}/{}".format(path, voice_name)
if voice_name.find('http') == -1:
if os.path.exists(name):
result = MessageSegment.record("file:///" + name)
return result
else:
logger.warning(f"语音{path}/{voice_name}缺失...")
return ""
else:
return MessageSegment.record(voice_name)
def text(msg):
return MessageSegment.text(msg)
def contact_user(qq):
return MessageSegment.contact_user(qq)
def share(url, title, content='', image_url=''):
return MessageSegment.share(url, title, content, image_url)
def xml(data):
return MessageSegment.xml(data)
def json(data):
return MessageSegment.json(data)
def face(id_):
return MessageSegment.face(id_)
def poke(qq):
return MessageSegment('poke', {"qq": qq})
def forward():
return MessageSegment.forward()
# if __name__ == '__main__':
# print(get_record_result("dadada", "", type="amr"))

274
util/langconv.py Normal file
View File

@ -0,0 +1,274 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from copy import deepcopy
import re
try:
import psyco
psyco.full()
except:
pass
from .zh_wiki import zh2Hant, zh2Hans
import sys
py3k = sys.version_info >= (3, 0, 0)
if py3k:
UEMPTY = ''
else:
_zh2Hant, _zh2Hans = {}, {}
for old, new in ((zh2Hant, _zh2Hant), (zh2Hans, _zh2Hans)):
for k, v in old.items():
new[k.decode('utf8')] = v.decode('utf8')
zh2Hant = _zh2Hant
zh2Hans = _zh2Hans
UEMPTY = ''.decode('utf8')
# states
(START, END, FAIL, WAIT_TAIL) = list(range(4))
# conditions
(TAIL, ERROR, MATCHED_SWITCH, UNMATCHED_SWITCH, CONNECTOR) = list(range(5))
MAPS = {}
class Node(object):
def __init__(self, from_word, to_word=None, is_tail=True,
have_child=False):
self.from_word = from_word
if to_word is None:
self.to_word = from_word
self.data = (is_tail, have_child, from_word)
self.is_original = True
else:
self.to_word = to_word or from_word
self.data = (is_tail, have_child, to_word)
self.is_original = False
self.is_tail = is_tail
self.have_child = have_child
def is_original_long_word(self):
return self.is_original and len(self.from_word)>1
def is_follow(self, chars):
return chars != self.from_word[:-1]
def __str__(self):
return '<Node, %s, %s, %s, %s>' % (repr(self.from_word),
repr(self.to_word), self.is_tail, self.have_child)
__repr__ = __str__
class ConvertMap(object):
def __init__(self, name, mapping=None):
self.name = name
self._map = {}
if mapping:
self.set_convert_map(mapping)
def set_convert_map(self, mapping):
convert_map = {}
have_child = {}
max_key_length = 0
for key in sorted(mapping.keys()):
if len(key)>1:
for i in range(1, len(key)):
parent_key = key[:i]
have_child[parent_key] = True
have_child[key] = False
max_key_length = max(max_key_length, len(key))
for key in sorted(have_child.keys()):
convert_map[key] = (key in mapping, have_child[key],
mapping.get(key, UEMPTY))
self._map = convert_map
self.max_key_length = max_key_length
def __getitem__(self, k):
try:
is_tail, have_child, to_word = self._map[k]
return Node(k, to_word, is_tail, have_child)
except:
return Node(k)
def __contains__(self, k):
return k in self._map
def __len__(self):
return len(self._map)
class StatesMachineException(Exception): pass
class StatesMachine(object):
def __init__(self):
self.state = START
self.final = UEMPTY
self.len = 0
self.pool = UEMPTY
def clone(self, pool):
new = deepcopy(self)
new.state = WAIT_TAIL
new.pool = pool
return new
def feed(self, char, map):
node = map[self.pool+char]
if node.have_child:
if node.is_tail:
if node.is_original:
cond = UNMATCHED_SWITCH
else:
cond = MATCHED_SWITCH
else:
cond = CONNECTOR
else:
if node.is_tail:
cond = TAIL
else:
cond = ERROR
new = None
if cond == ERROR:
self.state = FAIL
elif cond == TAIL:
if self.state == WAIT_TAIL and node.is_original_long_word():
self.state = FAIL
else:
self.final += node.to_word
self.len += 1
self.pool = UEMPTY
self.state = END
elif self.state == START or self.state == WAIT_TAIL:
if cond == MATCHED_SWITCH:
new = self.clone(node.from_word)
self.final += node.to_word
self.len += 1
self.state = END
self.pool = UEMPTY
elif cond == UNMATCHED_SWITCH or cond == CONNECTOR:
if self.state == START:
new = self.clone(node.from_word)
self.final += node.to_word
self.len += 1
self.state = END
else:
if node.is_follow(self.pool):
self.state = FAIL
else:
self.pool = node.from_word
elif self.state == END:
# END is a new START
self.state = START
new = self.feed(char, map)
elif self.state == FAIL:
raise StatesMachineException('Translate States Machine '
'have error with input data %s' % node)
return new
def __len__(self):
return self.len + 1
def __str__(self):
return '<StatesMachine %s, pool: "%s", state: %s, final: %s>' % (
id(self), self.pool, self.state, self.final)
__repr__ = __str__
class Converter(object):
def __init__(self, to_encoding):
self.to_encoding = to_encoding
self.map = MAPS[to_encoding]
self.start()
def feed(self, char):
branches = []
for fsm in self.machines:
new = fsm.feed(char, self.map)
if new:
branches.append(new)
if branches:
self.machines.extend(branches)
self.machines = [fsm for fsm in self.machines if fsm.state != FAIL]
all_ok = True
for fsm in self.machines:
if fsm.state != END:
all_ok = False
if all_ok:
self._clean()
return self.get_result()
def _clean(self):
if len(self.machines):
self.machines.sort(key=lambda x: len(x))
# self.machines.sort(cmp=lambda x,y: cmp(len(x), len(y)))
self.final += self.machines[0].final
self.machines = [StatesMachine()]
def start(self):
self.machines = [StatesMachine()]
self.final = UEMPTY
def end(self):
self.machines = [fsm for fsm in self.machines
if fsm.state == FAIL or fsm.state == END]
self._clean()
def convert(self, string):
self.start()
for char in string:
self.feed(char)
self.end()
return self.get_result()
def get_result(self):
return self.final
def registery(name, mapping):
global MAPS
MAPS[name] = ConvertMap(name, mapping)
registery('zh-hant', zh2Hant)
registery('zh-hans', zh2Hans)
del zh2Hant, zh2Hans
def run():
import sys
from optparse import OptionParser
parser = OptionParser()
parser.add_option('-e', type='string', dest='encoding',
help='encoding')
parser.add_option('-f', type='string', dest='file_in',
help='input file (- for stdin)')
parser.add_option('-t', type='string', dest='file_out',
help='output file')
(options, args) = parser.parse_args()
if not options.encoding:
parser.error('encoding must be set')
if options.file_in:
if options.file_in == '-':
file_in = sys.stdin
else:
file_in = open(options.file_in)
else:
file_in = sys.stdin
if options.file_out:
if options.file_out == '-':
file_out = sys.stdout
else:
file_out = open(options.file_out, 'wb')
else:
file_out = sys.stdout
c = Converter(options.encoding)
for line in file_in:
# print >> file_out, c.convert(line.rstrip('\n').decode(
file_out.write(c.convert(line.rstrip('\n').decode(
'utf8')).encode('utf8'))
if __name__ == '__main__':
run()

47
util/user_agent.py Normal file
View File

@ -0,0 +1,47 @@
import random
user_agent = [
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
"Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
"Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
"Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
"Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
"UCWEB7.0.2.37/28/999",
"NOKIA5700/ UCWEB7.0.2.37/28/999",
"Openwave/ UCWEB7.0.2.37/28/999",
"Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999",
# iPhone 6
"Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25"
]
def get_user_agent():
return {'User-Agent': random.choice(user_agent)}

263
util/utils.py Normal file
View File

@ -0,0 +1,263 @@
import time
from datetime import datetime, timedelta
from collections import defaultdict
from nonebot import require
import nonebot
import json
import pytz
from configs.path_config import TXT_PATH
from configs.config import system_proxy
import pypinyin
scheduler = require('nonebot_plugin_apscheduler').scheduler
# 次数检测
class CountLimiter:
def __init__(self, max):
self.count = defaultdict(int)
self.max = max
def add(self, key):
self.count[key] += 1
def check(self, key) -> bool:
if self.count[key] >= self.max:
self.count[key] = 0
return True
return False
# 用户正在执行此命令
class UserExistLimiter:
def __init__(self):
self.mbool = defaultdict(bool)
self.time = time.time()
def set_True(self, key):
self.time = time.time()
self.mbool[key] = True
def set_False(self, key):
self.mbool[key] = False
def check(self, key):
if time.time() - self.time > 30:
self.set_False(key)
return False
return self.mbool[key]
# 命令cd
class FreqLimiter:
def __init__(self, default_cd_seconds):
self.next_time = defaultdict(float)
self.default_cd = default_cd_seconds
def check(self, key) -> bool:
return time.time() >= self.next_time[key]
def start_cd(self, key, cd_time=0):
self.next_time[key] = time.time() + (cd_time if cd_time > 0 else self.default_cd)
def left_time(self, key) -> float:
return self.next_time[key] - time.time()
static_flmt = FreqLimiter(15)
# 恶意触发命令检测
class BanCheckLimiter:
def __init__(self, default_check_time: float = 5, default_count: int = 4):
self.mint = defaultdict(int)
self.mtime = defaultdict(float)
self.default_check_time = default_check_time
self.default_count = default_count
def add(self, key):
if self.mint[key] == 1:
self.mtime[key] = time.time()
self.mint[key] += 1
def check(self, key) -> bool:
# print(self.mint[key])
# print(time.time() - self.mtime[key])
if time.time() - self.mtime[key] > self.default_check_time:
self.mtime[key] = time.time()
self.mint[key] = 0
return False
if self.mint[key] >= self.default_count and time.time() - self.mtime[key] < self.default_check_time:
self.mtime[key] = time.time()
self.mint[key] = 0
return True
return False
# 每日次数
class DailyNumberLimiter:
tz = pytz.timezone('Asia/Shanghai')
def __init__(self, max_num):
self.today = -1
self.count = defaultdict(int)
self.max = max_num
def check(self, key) -> bool:
now = datetime.now(self.tz)
day = (now - timedelta(hours=5)).day
if day != self.today:
self.today = day
self.count.clear()
return bool(self.count[key] < self.max)
def get_num(self, key):
return self.count[key]
def increase(self, key, num=1):
self.count[key] += num
def reset(self, key):
self.count[key] = 0
def is_number(s) -> bool:
try:
float(s)
return True
except ValueError:
pass
try:
import unicodedata
unicodedata.numeric(s)
return True
except (TypeError, ValueError):
pass
return False
def get_lines(path: str, start: int = 0, end: int = 0) -> list:
l = []
with open(path, 'r', errors='ignore', encoding="UTF-8") as f:
lines = f.readlines()
for line in lines:
if line != "\n" and line != "":
if end == 0:
l.append(line[start:])
else:
l.append(line[start: end])
return l
# 获取bot
def get_bot():
return list(nonebot.get_bots().values())[0]
def get_message_at(data: str) -> list:
qq_list = []
data = json.loads(data)
try:
for msg in data['message']:
if msg['type'] == 'at':
qq_list.append(int(msg['data']['qq']))
return qq_list
except Exception:
return []
def get_message_imgs(data: str) -> list:
img_list = []
data = json.loads(data)
try:
for msg in data['message']:
if msg['type'] == 'image':
img_list.append(msg['data']['url'])
return img_list
except Exception:
return []
def get_message_text(data: str) -> str:
data = json.loads(data)
result = ''
try:
for msg in data['message']:
if msg['type'] == 'text':
result += msg['data']['text'].strip() + ' '
return result.strip()
except Exception:
return ''
def get_message_type(data: str) -> str:
return json.loads(data)['message_type']
def get_message_record(data: str) -> str:
data = json.loads(data)
try:
for msg in data['message']:
if msg['type'] == 'record':
return msg['data']['url']
return ''
except Exception:
return ''
def get_message_json(data: str) -> dict:
data = json.loads(data)
try:
for msg in data['message']:
if msg['type'] == 'json':
return msg['data']
return {}
except Exception:
return {}
def add_to_16(value):
while len(value) % 16 != 0:
value += '\0'
return str.encode(value)
# 获取文本加密后的cookie
def get_cookie_text(cookie_name: str) -> str:
with open(TXT_PATH + "cookie/" + cookie_name + ".txt", 'r') as f:
return f.read()
# 获取本地http代理
def get_local_proxy():
# from urllib.request import getproxies
# import platform
# proxy = getproxies()['http']
# if platform.system() != 'Windows':
# proxy = 'http://' + proxy
return system_proxy if system_proxy else None
# 判断是否为中文
def is_Chinese(word):
for ch in word:
if '\u4e00' <= ch <= '\u9fff':
return True
return False
def user_avatar(qq):
return f'http://q1.qlogo.cn/g?b=qq&nk={qq}&s=160'
def group_avatar(group_id):
return f'http://p.qlogo.cn/gh/{group_id}/{group_id}/640/'
def cn2py(word) -> str:
temp = ""
for i in pypinyin.pinyin(word, style=pypinyin.NORMAL):
temp += ''.join(i)
return temp

8275
util/zh_wiki.py Normal file

File diff suppressed because it is too large Load Diff