From 73c87d59597270c08e5d7e5bf52a474ccb5014d4 Mon Sep 17 00:00:00 2001 From: lanvent Date: Tue, 25 Apr 2023 01:48:15 +0800 Subject: [PATCH] fix(wechatcomapp): split long text messages into multiple parts --- channel/wechatcom/README.md | 6 ++++-- channel/wechatcom/wechatcomapp_channel.py | 16 +++++++++++++--- channel/wechatmp/common.py | 17 ----------------- channel/wechatmp/passive_reply.py | 1 + channel/wechatmp/wechatmp_channel.py | 5 ++++- common/utils.py | 17 +++++++++++++++++ 6 files changed, 39 insertions(+), 23 deletions(-) diff --git a/channel/wechatcom/README.md b/channel/wechatcom/README.md index 1728678..e3b4843 100644 --- a/channel/wechatcom/README.md +++ b/channel/wechatcom/README.md @@ -1,6 +1,8 @@ # 企业微信应用号channel -企业微信官方提供了客服、应用等API,本channel使用的是企业微信的应用API的能力。因为未来可能还会开发客服能力,所以本channel的类型名叫作`wechatcom_app`。 +企业微信官方提供了客服、应用等API,本channel使用的是企业微信的应用API的能力。 + +因为未来可能还会开发客服能力,所以本channel的类型名叫作`wechatcom_app`。 `wechatcom_app` channel支持插件系统和图片声音交互等能力,除了无法加入群聊,作为个人使用的私人助理已绰绰有余。 @@ -29,7 +31,7 @@ - 在详情页如果点击`企业可信IP`的配置(没看到可以不管),填入你服务器的公网IP - 点击`接收消息`下的启用API接收消息 -- `URL`填写格式为`http://url:port/wxcomapp`,是程序监听的端口,默认是9898 +- `URL`填写格式为`http://url:port/wxcomapp`,`port`是程序监听的端口,默认是9898 如果是未认证的企业,url可直接使用服务器的IP。如果是认证企业,需要使用备案的域名,可使用二级域名。 - `Token`可随意填写,停留在这个页面 - 在程序根目录`config.json`中增加配置(**去掉注释**),`wechatcomapp_aes_key`是当前页面的`wechatcomapp_aes_key` diff --git a/channel/wechatcom/wechatcomapp_channel.py b/channel/wechatcom/wechatcomapp_channel.py index bd51c5f..4d686eb 100644 --- a/channel/wechatcom/wechatcomapp_channel.py +++ b/channel/wechatcom/wechatcomapp_channel.py @@ -2,6 +2,7 @@ import io import os import textwrap +import time import requests import web @@ -17,10 +18,12 @@ from channel.wechatcom.wechatcomapp_client import WechatComAppClient from channel.wechatcom.wechatcomapp_message import WechatComAppMessage from common.log import logger from common.singleton import singleton -from common.utils import compress_imgfile, fsize +from common.utils import compress_imgfile, fsize, split_string_by_utf8_length from config import conf from voice.audio_convert import any_to_amr +MAX_UTF8_LEN = 2048 + @singleton class WechatComAppChannel(ChatChannel): @@ -50,8 +53,15 @@ class WechatComAppChannel(ChatChannel): def send(self, reply: Reply, context: Context): receiver = context["receiver"] if reply.type in [ReplyType.TEXT, ReplyType.ERROR, ReplyType.INFO]: - self.client.message.send_text(self.agent_id, receiver, reply.content) - logger.info("[wechatcom] sendMsg={}, receiver={}".format(reply, receiver)) + reply_text = reply.content + texts = split_string_by_utf8_length(reply_text, MAX_UTF8_LEN) + if len(texts) > 1: + logger.info("[wechatcom] text too long, split into {} parts".format(len(texts))) + for i, text in enumerate(texts): + self.client.message.send_text(self.agent_id, receiver, text) + if i != len(texts) - 1: + time.sleep(0.5) # 休眠0.5秒,防止发送过快乱序 + logger.info("[wechatcom] Do send text to {}: {}".format(receiver, reply_text)) elif reply.type == ReplyType.VOICE: try: file_path = reply.content diff --git a/channel/wechatmp/common.py b/channel/wechatmp/common.py index b6f206c..be0b800 100644 --- a/channel/wechatmp/common.py +++ b/channel/wechatmp/common.py @@ -43,20 +43,3 @@ def subscribe_msg(): 输入'{trigger_prefix}#帮助' 查看详细指令。""" ) return msg - - -def split_string_by_utf8_length(string, max_length, max_split=0): - encoded = string.encode("utf-8") - start, end = 0, 0 - result = [] - while end < len(encoded): - if max_split > 0 and len(result) >= max_split: - result.append(encoded[start:].decode("utf-8")) - break - end = min(start + max_length, len(encoded)) - # 如果当前字节不是 UTF-8 编码的开始字节,则向前查找直到找到开始字节为止 - while end < len(encoded) and (encoded[end] & 0b11000000) == 0b10000000: - end -= 1 - result.append(encoded[start:end].decode("utf-8")) - start = end - return result diff --git a/channel/wechatmp/passive_reply.py b/channel/wechatmp/passive_reply.py index cd0f012..38fee6d 100644 --- a/channel/wechatmp/passive_reply.py +++ b/channel/wechatmp/passive_reply.py @@ -11,6 +11,7 @@ from channel.wechatmp.common import * from channel.wechatmp.wechatmp_channel import WechatMPChannel from channel.wechatmp.wechatmp_message import WeChatMPMessage from common.log import logger +from common.utils import split_string_by_utf8_length from config import conf diff --git a/channel/wechatmp/wechatmp_channel.py b/channel/wechatmp/wechatmp_channel.py index aa1fc74..0c54a1d 100644 --- a/channel/wechatmp/wechatmp_channel.py +++ b/channel/wechatmp/wechatmp_channel.py @@ -18,6 +18,7 @@ from channel.wechatmp.common import * from channel.wechatmp.wechatmp_client import WechatMPClient from common.log import logger from common.singleton import singleton +from common.utils import split_string_by_utf8_length from config import conf from voice.audio_convert import any_to_mp3 @@ -140,8 +141,10 @@ class WechatMPChannel(ChatChannel): texts = split_string_by_utf8_length(reply_text, MAX_UTF8_LEN) if len(texts) > 1: logger.info("[wechatmp] text too long, split into {} parts".format(len(texts))) - for text in texts: + for i, text in enumerate(texts): self.client.message.send_text(receiver, text) + if i != len(texts) - 1: + time.sleep(0.5) # 休眠0.5秒,防止发送过快乱序 logger.info("[wechatmp] Do send text to {}: {}".format(receiver, reply_text)) elif reply.type == ReplyType.VOICE: try: diff --git a/common/utils.py b/common/utils.py index 4d055f3..966a7cf 100644 --- a/common/utils.py +++ b/common/utils.py @@ -32,3 +32,20 @@ def compress_imgfile(file, max_size): if fsize(out_buf) <= max_size: return out_buf quality -= 5 + + +def split_string_by_utf8_length(string, max_length, max_split=0): + encoded = string.encode("utf-8") + start, end = 0, 0 + result = [] + while end < len(encoded): + if max_split > 0 and len(result) >= max_split: + result.append(encoded[start:].decode("utf-8")) + break + end = min(start + max_length, len(encoded)) + # 如果当前字节不是 UTF-8 编码的开始字节,则向前查找直到找到开始字节为止 + while end < len(encoded) and (encoded[end] & 0b11000000) == 0b10000000: + end -= 1 + result.append(encoded[start:end].decode("utf-8")) + start = end + return result