From 2e664c387ec8604a508bbfea0551fb178b163f1f Mon Sep 17 00:00:00 2001 From: molanp <104612722+molanp@users.noreply.github.com> Date: Mon, 14 Jul 2025 18:02:36 +0800 Subject: [PATCH] =?UTF-8?q?feat(models):=20=E5=A2=9E=E5=8A=A0=20Unicode=20?= =?UTF-8?q?=E5=AD=97=E7=AC=A6=E4=B8=B2=E5=AE=89=E5=85=A8=E5=A4=84=E7=90=86?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 UnicodeSafeMixin 类,用于处理特殊字符的 Unicode 转义和反向转义 - 在 GroupInfoUser 模型中添加 _unicode_safe_fields 属性,指定需要处理的字段 - 实现了 save、get、filter、bulk_update 和 bulk_create 方法的重写,以支持 Unicode 安全处理 - 新增 unicode_escape 和 unicode_unescape 函数,用于转换和还原字符串 #### 注意事项 为了防止和db-cache冲突,此分支没有修改Model类,db-cache对Model的修改可以正常合并 --- zhenxun/models/group_member_info.py | 1 + zhenxun/services/db_context.py | 137 +++++++++++++++++++++++++++- 2 files changed, 135 insertions(+), 3 deletions(-) diff --git a/zhenxun/models/group_member_info.py b/zhenxun/models/group_member_info.py index 2bb3d46f..8248bd71 100644 --- a/zhenxun/models/group_member_info.py +++ b/zhenxun/models/group_member_info.py @@ -26,6 +26,7 @@ class GroupInfoUser(Model): table = "group_info_users" table_description = "群员信息数据表" unique_together = ("user_id", "group_id") + _unicode_safe_fields = ["user_name", "nickname"] # noqa: RUF012 @classmethod async def get_all_uid(cls, group_id: str) -> set[int]: diff --git a/zhenxun/services/db_context.py b/zhenxun/services/db_context.py index 4543b262..3a8e5063 100644 --- a/zhenxun/services/db_context.py +++ b/zhenxun/services/db_context.py @@ -1,6 +1,9 @@ +from collections.abc import Iterable +import re + import nonebot from nonebot.utils import is_coroutine_callable -from tortoise import Tortoise +from tortoise import BaseDBAsyncClient, Tortoise from tortoise.connection import connections from tortoise.models import Model as Model_ @@ -17,12 +20,140 @@ MODELS: list[str] = [] driver = nonebot.get_driver() -class Model(Model_): +def unicode_escape(value: str) -> str: + """ + 将字符串转换为Unicode转义形式(仅处理未转义的特殊字符) + 已经转义过的字符串保持不变 + """ + if not value: + return value + + if re.search(r"\\u[0-9a-fA-F]{4}", value): + return value + + return "".join( + char + if 0x20 <= ord(char) <= 0x7E or char in ("\n", "\r", "\t") + else f"\\u{ord(char):04x}" + for char in value + ) + + +def unicode_unescape(value: str) -> str: + """ + 安全还原字符串中的Unicode转义序列 + 如果不是有效转义序列,保留原样 + """ + if not value: + return value + + # 仅处理有效的 \uXXXX 格式 + return re.sub( + r"(?