From 73c87d59597270c08e5d7e5bf52a474ccb5014d4 Mon Sep 17 00:00:00 2001
From: lanvent <lanvent@qq.com>
Date: Tue, 25 Apr 2023 01:48:15 +0800
Subject: [PATCH] fix(wechatcomapp): split long text messages into multiple
 parts

---
 channel/wechatcom/README.md               |  6 ++++--
 channel/wechatcom/wechatcomapp_channel.py | 16 +++++++++++++---
 channel/wechatmp/common.py                | 17 -----------------
 channel/wechatmp/passive_reply.py         |  1 +
 channel/wechatmp/wechatmp_channel.py      |  5 ++++-
 common/utils.py                           | 17 +++++++++++++++++
 6 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/channel/wechatcom/README.md b/channel/wechatcom/README.md
index 1728678..e3b4843 100644
--- a/channel/wechatcom/README.md
+++ b/channel/wechatcom/README.md
@@ -1,6 +1,8 @@
 # 企业微信应用号channel
 
-企业微信官方提供了客服、应用等API，本channel使用的是企业微信的应用API的能力。因为未来可能还会开发客服能力，所以本channel的类型名叫作`wechatcom_app`。
+企业微信官方提供了客服、应用等API，本channel使用的是企业微信的应用API的能力。
+
+因为未来可能还会开发客服能力，所以本channel的类型名叫作`wechatcom_app`。
 
 `wechatcom_app` channel支持插件系统和图片声音交互等能力，除了无法加入群聊，作为个人使用的私人助理已绰绰有余。
 
@@ -29,7 +31,7 @@
 
 - 在详情页如果点击`企业可信IP`的配置(没看到可以不管)，填入你服务器的公网IP
 - 点击`接收消息`下的启用API接收消息
-- `URL`填写格式为`http://url:port/wxcomapp`，是程序监听的端口，默认是9898
+- `URL`填写格式为`http://url:port/wxcomapp`，`port`是程序监听的端口，默认是9898
     如果是未认证的企业，url可直接使用服务器的IP。如果是认证企业，需要使用备案的域名，可使用二级域名。
 - `Token`可随意填写，停留在这个页面
 - 在程序根目录`config.json`中增加配置（**去掉注释**），`wechatcomapp_aes_key`是当前页面的`wechatcomapp_aes_key`
diff --git a/channel/wechatcom/wechatcomapp_channel.py b/channel/wechatcom/wechatcomapp_channel.py
index bd51c5f..4d686eb 100644
--- a/channel/wechatcom/wechatcomapp_channel.py
+++ b/channel/wechatcom/wechatcomapp_channel.py
@@ -2,6 +2,7 @@
 import io
 import os
 import textwrap
+import time
 
 import requests
 import web
@@ -17,10 +18,12 @@ from channel.wechatcom.wechatcomapp_client import WechatComAppClient
 from channel.wechatcom.wechatcomapp_message import WechatComAppMessage
 from common.log import logger
 from common.singleton import singleton
-from common.utils import compress_imgfile, fsize
+from common.utils import compress_imgfile, fsize, split_string_by_utf8_length
 from config import conf
 from voice.audio_convert import any_to_amr
 
+MAX_UTF8_LEN = 2048
+
 
 @singleton
 class WechatComAppChannel(ChatChannel):
@@ -50,8 +53,15 @@ class WechatComAppChannel(ChatChannel):
     def send(self, reply: Reply, context: Context):
         receiver = context["receiver"]
         if reply.type in [ReplyType.TEXT, ReplyType.ERROR, ReplyType.INFO]:
-            self.client.message.send_text(self.agent_id, receiver, reply.content)
-            logger.info("[wechatcom] sendMsg={}, receiver={}".format(reply, receiver))
+            reply_text = reply.content
+            texts = split_string_by_utf8_length(reply_text, MAX_UTF8_LEN)
+            if len(texts) > 1:
+                logger.info("[wechatcom] text too long, split into {} parts".format(len(texts)))
+            for i, text in enumerate(texts):
+                self.client.message.send_text(self.agent_id, receiver, text)
+                if i != len(texts) - 1:
+                    time.sleep(0.5)  # 休眠0.5秒，防止发送过快乱序
+            logger.info("[wechatcom] Do send text to {}: {}".format(receiver, reply_text))
         elif reply.type == ReplyType.VOICE:
             try:
                 file_path = reply.content
diff --git a/channel/wechatmp/common.py b/channel/wechatmp/common.py
index b6f206c..be0b800 100644
--- a/channel/wechatmp/common.py
+++ b/channel/wechatmp/common.py
@@ -43,20 +43,3 @@ def subscribe_msg():
                     输入'{trigger_prefix}#帮助' 查看详细指令。"""
     )
     return msg
-
-
-def split_string_by_utf8_length(string, max_length, max_split=0):
-    encoded = string.encode("utf-8")
-    start, end = 0, 0
-    result = []
-    while end < len(encoded):
-        if max_split > 0 and len(result) >= max_split:
-            result.append(encoded[start:].decode("utf-8"))
-            break
-        end = min(start + max_length, len(encoded))
-        # 如果当前字节不是 UTF-8 编码的开始字节，则向前查找直到找到开始字节为止
-        while end < len(encoded) and (encoded[end] & 0b11000000) == 0b10000000:
-            end -= 1
-        result.append(encoded[start:end].decode("utf-8"))
-        start = end
-    return result
diff --git a/channel/wechatmp/passive_reply.py b/channel/wechatmp/passive_reply.py
index cd0f012..38fee6d 100644
--- a/channel/wechatmp/passive_reply.py
+++ b/channel/wechatmp/passive_reply.py
@@ -11,6 +11,7 @@ from channel.wechatmp.common import *
 from channel.wechatmp.wechatmp_channel import WechatMPChannel
 from channel.wechatmp.wechatmp_message import WeChatMPMessage
 from common.log import logger
+from common.utils import split_string_by_utf8_length
 from config import conf
 
 
diff --git a/channel/wechatmp/wechatmp_channel.py b/channel/wechatmp/wechatmp_channel.py
index aa1fc74..0c54a1d 100644
--- a/channel/wechatmp/wechatmp_channel.py
+++ b/channel/wechatmp/wechatmp_channel.py
@@ -18,6 +18,7 @@ from channel.wechatmp.common import *
 from channel.wechatmp.wechatmp_client import WechatMPClient
 from common.log import logger
 from common.singleton import singleton
+from common.utils import split_string_by_utf8_length
 from config import conf
 from voice.audio_convert import any_to_mp3
 
@@ -140,8 +141,10 @@ class WechatMPChannel(ChatChannel):
                 texts = split_string_by_utf8_length(reply_text, MAX_UTF8_LEN)
                 if len(texts) > 1:
                     logger.info("[wechatmp] text too long, split into {} parts".format(len(texts)))
-                for text in texts:
+                for i, text in enumerate(texts):
                     self.client.message.send_text(receiver, text)
+                    if i != len(texts) - 1:
+                        time.sleep(0.5)  # 休眠0.5秒，防止发送过快乱序
                 logger.info("[wechatmp] Do send text to {}: {}".format(receiver, reply_text))
             elif reply.type == ReplyType.VOICE:
                 try:
diff --git a/common/utils.py b/common/utils.py
index 4d055f3..966a7cf 100644
--- a/common/utils.py
+++ b/common/utils.py
@@ -32,3 +32,20 @@ def compress_imgfile(file, max_size):
         if fsize(out_buf) <= max_size:
             return out_buf
         quality -= 5
+
+
+def split_string_by_utf8_length(string, max_length, max_split=0):
+    encoded = string.encode("utf-8")
+    start, end = 0, 0
+    result = []
+    while end < len(encoded):
+        if max_split > 0 and len(result) >= max_split:
+            result.append(encoded[start:].decode("utf-8"))
+            break
+        end = min(start + max_length, len(encoded))
+        # 如果当前字节不是 UTF-8 编码的开始字节，则向前查找直到找到开始字节为止
+        while end < len(encoded) and (encoded[end] & 0b11000000) == 0b10000000:
+            end -= 1
+        result.append(encoded[start:end].decode("utf-8"))
+        start = end
+    return result