diff --git a/channel/wechatmp/README.md b/channel/wechatmp/README.md
index 219d276..d85c4ca 100644
--- a/channel/wechatmp/README.md
+++ b/channel/wechatmp/README.md
@@ -1,7 +1,7 @@
 # 微信公众号channel
 
 鉴于个人微信号在服务器上通过itchat登录有封号风险，这里新增了微信公众号channel，提供无风险的服务。
-目前支持订阅号和服务号两种类型的公众号。个人主体的微信订阅号由于无法通过微信认证，接口存在限制，目前仅支持最基本的文本交互和语音输入。通过微信认证的订阅号或者服务号可以回复图片和语音。
+目前支持订阅号和服务号两种类型的公众号，它们都支持文本交互，语音和图片输入。其中个人主体的微信订阅号由于无法通过微信认证，存在回复时间限制，每天的图片和声音回复次数也有限制。
 
 ## 使用方法（订阅号，服务号类似）
 
@@ -15,7 +15,7 @@ pip3 install web.py
 
 然后在[微信公众平台](https://mp.weixin.qq.com)注册一个自己的公众号，类型选择订阅号，主体为个人即可。
 
-然后根据[接入指南](https://developers.weixin.qq.com/doc/offiaccount/Basic_Information/Access_Overview.html)的说明，在[微信公众平台](https://mp.weixin.qq.com)的“设置与开发”-“基本配置”-“服务器配置”中填写服务器地址`URL`和令牌`Token`。这里的`URL`是`example.com/wx`的形式，不可以使用IP，`Token`是你自己编的一个特定的令牌。消息加解密方式目前选择的是明文模式。
+然后根据[接入指南](https://developers.weixin.qq.com/doc/offiaccount/Basic_Information/Access_Overview.html)的说明，在[微信公众平台](https://mp.weixin.qq.com)的“设置与开发”-“基本配置”-“服务器配置”中填写服务器地址`URL`和令牌`Token`。这里的`URL`是`example.com/wx`的形式，不可以使用IP，`Token`是你自己编的一个特定的令牌。消息加解密方式如果选择了需要加密的模式，需要在配置中填写`wechatmp_aes_key`。
 
 相关的服务器验证代码已经写好，你不需要再添加任何代码。你只需要在本项目根目录的`config.json`中添加
 ```
@@ -24,6 +24,7 @@ pip3 install web.py
 "wechatmp_port": 8080,          # 微信公众平台的端口,需要端口转发到80或443
 "wechatmp_app_id": "xxxx",      # 微信公众平台的appID
 "wechatmp_app_secret": "xxxx",  # 微信公众平台的appsecret
+"wechatmp_aes_key": "",         # 微信公众平台的EncodingAESKey，加密模式需要
 "single_chat_prefix": [""],     # 推荐设置，任意对话都可以触发回复，不添加前缀
 "single_chat_reply_prefix": "", # 推荐设置，回复不设置前缀
 "plugin_trigger_prefix": "&",   # 推荐设置，在手机微信客户端中，$%^等符号与中文连在一起时会自动显示一段较大的间隔，用户体验不好。请不要使用管理员指令前缀"#"，这会造成未知问题。
@@ -40,12 +41,13 @@ sudo iptables-save > /etc/iptables/rules.v4
 程序启动并监听端口后，在刚才的“服务器配置”中点击`提交`即可验证你的服务器。
 随后在[微信公众平台](https://mp.weixin.qq.com)启用服务器，关闭手动填写规则的自动回复，即可实现ChatGPT的自动回复。
 
-如果在启用后如果遇到如下报错：
+之后需要在公众号开发信息下将本机IP加入到IP白名单。
+
+不然在启用后，发送语音、图片等消息可能会遇到如下报错：
 ```
 'errcode': 40164, 'errmsg': 'invalid ip xx.xx.xx.xx not in whitelist rid
 ```
 
-需要在公众号开发信息下将IP加入到IP白名单。
 
 ## 个人微信公众号的限制
 由于人微信公众号不能通过微信认证，所以没有客服接口，因此公众号无法主动发出消息，只能被动回复。而微信官方对被动回复有5秒的时间限制，最多重试2次，因此最多只有15秒的自动回复时间窗口。因此如果问题比较复杂或者我们的服务器比较忙，ChatGPT的回答就没办法及时回复给用户。为了解决这个问题，这里做了回答缓存，它需要你在回复超时后，再次主动发送任意文字（例如1）来尝试拿到回答缓存。为了优化使用体验，目前设置了两分钟（120秒）的timeout，用户在至多两分钟后即可得到查询到回复或者错误原因。
@@ -91,7 +93,7 @@ python3 -m pip install pyttsx3
 
 ## TODO
  - [x] 语音输入
- - [ ] 图片输入
+ - [x] 图片输入
  - [x] 使用临时素材接口提供认证公众号的图片和语音回复
  - [x] 使用永久素材接口提供未认证公众号的图片和语音回复
  - [ ] 高并发支持
diff --git a/channel/wechatmp/active_reply.py b/channel/wechatmp/active_reply.py
index d8a8dde..d33f06e 100644
--- a/channel/wechatmp/active_reply.py
+++ b/channel/wechatmp/active_reply.py
@@ -2,15 +2,15 @@ import time
 
 import web
 
-from channel.wechatmp.wechatmp_message import parse_xml
-from channel.wechatmp.passive_reply_message import TextMsg
+from channel.wechatmp.wechatmp_message import WeChatMPMessage
 from bridge.context import *
-from bridge.reply import ReplyType
+from bridge.reply import *
 from channel.wechatmp.common import *
 from channel.wechatmp.wechatmp_channel import WechatMPChannel
+from wechatpy import parse_message
 from common.log import logger
 from config import conf
-
+from wechatpy.replies import create_reply
 
 # This class is instantiated once per query
 class Query:
@@ -19,18 +19,25 @@ class Query:
 
     def POST(self):
         # Make sure to return the instance that first created, @singleton will do that.
-        channel = WechatMPChannel()
         try:
-            webData = web.data()
-            # logger.debug("[wechatmp] Receive request:\n" + webData.decode("utf-8"))
-            wechatmp_msg = parse_xml(webData)
-            if (
-                wechatmp_msg.msg_type == "text" 
-                or wechatmp_msg.msg_type == "voice" 
-                # or wechatmp_msg.msg_type == "image"
-            ):
+            args = web.input()
+            verify_server(args)
+            channel = WechatMPChannel()
+            message = web.data()
+            encrypt_func = lambda x: x
+            if args.get("encrypt_type") == "aes":
+                logger.debug("[wechatmp] Receive encrypted post data:\n" + message.decode("utf-8"))
+                if not channel.crypto:
+                    raise Exception("Crypto not initialized, Please set wechatmp_aes_key in config.json")
+                message = channel.crypto.decrypt_message(message, args.msg_signature, args.timestamp, args.nonce)
+                encrypt_func = lambda x: channel.crypto.encrypt_message(x, args.nonce, args.timestamp)
+            else:
+                logger.debug("[wechatmp] Receive post data:\n" + message.decode("utf-8"))
+            msg = parse_message(message)
+            if msg.type in ["text", "voice", "image"]:
+                wechatmp_msg = WeChatMPMessage(msg, client=channel.client)
                 from_user = wechatmp_msg.from_user_id
-                message = wechatmp_msg.content
+                content = wechatmp_msg.content
                 message_id = wechatmp_msg.msg_id
 
                 logger.info(
@@ -39,16 +46,17 @@ class Query:
                         web.ctx.env.get("REMOTE_PORT"),
                         from_user,
                         message_id,
-                        message,
+                        content,
                     )
                 )
-                if (wechatmp_msg.msg_type == "voice" and conf().get("voice_reply_voice") == True):
-                    rtype = ReplyType.VOICE
+                if msg.type == "voice" and wechatmp_msg.ctype == ContextType.TEXT and conf().get("voice_reply_voice", False):
+                    context = channel._compose_context(
+                        wechatmp_msg.ctype, content, isgroup=False, desire_rtype=ReplyType.VOICE, msg=wechatmp_msg
+                    )
                 else:
-                    rtype = None
-                context = channel._compose_context(
-                    ContextType.TEXT, message, isgroup=False, desire_rtype=rtype, msg=wechatmp_msg
-                )
+                    context = channel._compose_context(
+                        wechatmp_msg.ctype, content, isgroup=False, msg=wechatmp_msg
+                    )
                 if context:
                     # set private openai_api_key
                     # if from_user is not changed in itchat, this can be placed at chat_channel
@@ -59,18 +67,18 @@ class Query:
                     channel.produce(context)
                 # The reply will be sent by channel.send() in another thread
                 return "success"
-
-            elif wechatmp_msg.msg_type == "event":
+            elif msg.type == "event":
                 logger.info(
                     "[wechatmp] Event {} from {}".format(
-                        wechatmp_msg.Event, wechatmp_msg.from_user_id
+                        msg.event, msg.source
                     )
                 )
-                content = subscribe_msg()
-                replyMsg = TextMsg(
-                    wechatmp_msg.from_user_id, wechatmp_msg.to_user_id, content
-                )
-                return replyMsg.send()
+                if msg.event in ["subscribe", "subscribe_scan"]:
+                    reply_text = subscribe_msg()
+                    replyPost = create_reply(reply_text, msg)
+                    return encrypt_func(replyPost.render())
+                else:
+                    return "success"
             else:
                 logger.info("暂且不处理")
                 return "success"
diff --git a/channel/wechatmp/common.py b/channel/wechatmp/common.py
index 5efccfc..696585f 100644
--- a/channel/wechatmp/common.py
+++ b/channel/wechatmp/common.py
@@ -1,8 +1,10 @@
-import hashlib
 import textwrap
+import web
 
 from config import conf
-
+from wechatpy.utils import check_signature
+from wechatpy.crypto import WeChatCrypto
+from wechatpy.exceptions import InvalidSignatureException
 MAX_UTF8_LEN = 2048
 
 
@@ -12,38 +14,28 @@ class WeChatAPIException(Exception):
 
 def verify_server(data):
     try:
-        if len(data) == 0:
-            return "None"
         signature = data.signature
         timestamp = data.timestamp
         nonce = data.nonce
-        echostr = data.echostr
+        echostr = data.get("echostr", None)
         token = conf().get("wechatmp_token")  # 请按照公众平台官网\基本配置中信息填写
-
-        data_list = [token, timestamp, nonce]
-        data_list.sort()
-        sha1 = hashlib.sha1()
-        # map(sha1.update, data_list) #python2
-        sha1.update("".join(data_list).encode("utf-8"))
-        hashcode = sha1.hexdigest()
-        print("handle/GET func: hashcode, signature: ", hashcode, signature)
-        if hashcode == signature:
-            return echostr
-        else:
-            return ""
-    except Exception as Argument:
-        return Argument
+        check_signature(token, signature, timestamp, nonce)
+        return echostr
+    except InvalidSignatureException:
+        raise web.Forbidden("Invalid signature")
+    except Exception as e:
+        raise web.Forbidden(str(e))
 
 
 def subscribe_msg():
-    trigger_prefix = conf().get("single_chat_prefix", [""])
+    trigger_prefix = conf().get("single_chat_prefix", [""])[0]
     msg = textwrap.dedent(
         f"""\
                     感谢您的关注！
                     这里是ChatGPT，可以自由对话。
                     资源有限，回复较慢，请勿着急。
                     支持语音对话。
-                    暂时不支持图片输入。
+                    支持图片输入。
                     支持图片输出，画字开头的消息将按要求创作图片。
                     支持tool、角色扮演和文字冒险等丰富的插件。
                     输入'{trigger_prefix}#帮助' 查看详细指令。"""
@@ -59,7 +51,7 @@ def split_string_by_utf8_length(string, max_length, max_split=0):
         if max_split > 0 and len(result) >= max_split:
             result.append(encoded[start:].decode("utf-8"))
             break
-        end = start + max_length
+        end = min(start + max_length, len(encoded))
         # 如果当前字节不是 UTF-8 编码的开始字节，则向前查找直到找到开始字节为止
         while end < len(encoded) and (encoded[end] & 0b11000000) == 0b10000000:
             end -= 1
diff --git a/channel/wechatmp/passive_reply.py b/channel/wechatmp/passive_reply.py
index eca94ba..6c722ef 100644
--- a/channel/wechatmp/passive_reply.py
+++ b/channel/wechatmp/passive_reply.py
@@ -3,15 +3,15 @@ import asyncio
 
 import web
 
-from channel.wechatmp.wechatmp_message import parse_xml
-from channel.wechatmp.passive_reply_message import TextMsg, VoiceMsg, ImageMsg
+from channel.wechatmp.wechatmp_message import WeChatMPMessage
 from bridge.context import *
-from bridge.reply import ReplyType
+from bridge.reply import *
 from channel.wechatmp.common import *
 from channel.wechatmp.wechatmp_channel import WechatMPChannel
 from common.log import logger
 from config import conf
-
+from wechatpy import parse_message
+from wechatpy.replies import create_reply, ImageReply, VoiceReply
 
 # This class is instantiated once per query
 class Query:
@@ -20,38 +20,49 @@ class Query:
 
     def POST(self):
         try:
+            args = web.input()
+            verify_server(args)
             request_time = time.time()
             channel = WechatMPChannel()
-            webData = web.data()
-            logger.debug("[wechatmp] Receive post data:\n" + webData.decode("utf-8"))
-            wechatmp_msg = parse_xml(webData)
-            if wechatmp_msg.msg_type == "text" or wechatmp_msg.msg_type == "voice":
+            message = web.data()
+            encrypt_func = lambda x: x
+            if args.get("encrypt_type") == "aes":
+                logger.debug("[wechatmp] Receive encrypted post data:\n" + message.decode("utf-8"))
+                if not channel.crypto:
+                    raise Exception("Crypto not initialized, Please set wechatmp_aes_key in config.json")
+                message = channel.crypto.decrypt_message(message, args.msg_signature, args.timestamp, args.nonce)
+                encrypt_func = lambda x: channel.crypto.encrypt_message(x, args.nonce, args.timestamp)
+            else:
+                logger.debug("[wechatmp] Receive post data:\n" + message.decode("utf-8"))
+            msg = parse_message(message)
+            if msg.type in ["text", "voice", "image"]:
+                wechatmp_msg = WeChatMPMessage(msg, client=channel.client)
                 from_user = wechatmp_msg.from_user_id
-                to_user = wechatmp_msg.to_user_id
-                message = wechatmp_msg.content
+                content = wechatmp_msg.content
                 message_id = wechatmp_msg.msg_id
 
                 supported = True
-                if "【收到不支持的消息类型，暂无法显示】" in message:
+                if "【收到不支持的消息类型，暂无法显示】" in content:
                     supported = False  # not supported, used to refresh
 
                 # New request
                 if (
                     from_user not in channel.cache_dict
                     and from_user not in channel.running
-                    or message.startswith("#") 
+                    or content.startswith("#") 
                     and message_id not in channel.request_cnt # insert the godcmd
                 ):
                     # The first query begin
-                    if (wechatmp_msg.msg_type == "voice" and conf().get("voice_reply_voice") == True):
-                        rtype = ReplyType.VOICE
+                    if msg.type == "voice" and wechatmp_msg.ctype == ContextType.TEXT and conf().get("voice_reply_voice", False):
+                        context = channel._compose_context(
+                            wechatmp_msg.ctype, content, isgroup=False, desire_rtype=ReplyType.VOICE, msg=wechatmp_msg
+                        )
                     else:
-                        rtype = None
-                    context = channel._compose_context(
-                        ContextType.TEXT, message, isgroup=False, desire_rtype=rtype, msg=wechatmp_msg
-                    )
+                        context = channel._compose_context(
+                            wechatmp_msg.ctype, content, isgroup=False, msg=wechatmp_msg
+                        )
                     logger.debug(
-                        "[wechatmp] context: {} {}".format(context, wechatmp_msg)
+                        "[wechatmp] context: {} {} {}".format(context, wechatmp_msg, supported)
                     )
 
                     if supported and context:
@@ -62,29 +73,30 @@ class Query:
                         channel.running.add(from_user)
                         channel.produce(context)
                     else:
-                        trigger_prefix = conf().get("single_chat_prefix", [""])
+                        trigger_prefix = conf().get("single_chat_prefix", [""])[0]
                         if trigger_prefix or not supported:
                             if trigger_prefix:
-                                content = textwrap.dedent(
+                                reply_text = textwrap.dedent(
                                     f"""\
                                     请输入'{trigger_prefix}'接你想说的话跟我说话。
                                     例如:
                                     {trigger_prefix}你好，很高兴见到你。"""
                                 )
                             else:
-                                content = textwrap.dedent(
+                                reply_text = textwrap.dedent(
                                     """\
                                     你好，很高兴见到你。
                                     请跟我说话吧。"""
                                 )
                         else:
                             logger.error(f"[wechatmp] unknown error")
-                            content = textwrap.dedent(
+                            reply_text = textwrap.dedent(
                                 """\
                                 未知错误，请稍后再试"""
                             )
-                        replyPost = TextMsg(wechatmp_msg.from_user_id, wechatmp_msg.to_user_id, content).send()
-                        return replyPost
+                        
+                        replyPost = create_reply(reply_text, msg)
+                        return encrypt_func(replyPost.render())
 
 
                 # Wechat official server will request 3 times (5 seconds each), with the same message_id.
@@ -92,13 +104,13 @@ class Query:
                 request_cnt = channel.request_cnt.get(message_id, 0) + 1
                 channel.request_cnt[message_id] = request_cnt
                 logger.info(
-                    "[wechatmp] Request {} from {} {}\n{}\n{}:{}".format(
+                    "[wechatmp] Request {} from {} {} {}:{}\n{}".format(
                         request_cnt,
                         from_user,
                         message_id,
-                        message,
                         web.ctx.env.get("REMOTE_ADDR"),
                         web.ctx.env.get("REMOTE_PORT"),
+                        content
                     )
                 )
 
@@ -121,8 +133,8 @@ class Query:
                     else: # request_cnt == 3:
                         # return timeout message
                         reply_text = "【正在思考中，回复任意文字尝试获取回复】"
-                        replyPost = TextMsg(from_user, to_user, reply_text).send()
-                        return replyPost
+                        replyPost = create_reply(reply_text, msg)
+                        return encrypt_func(replyPost.render())
 
                 # reply is ready
                 channel.request_cnt.pop(message_id)
@@ -136,58 +148,80 @@ class Query:
 
                 # Only one request can access to the cached data
                 try:
-                    (reply_type, content) = channel.cache_dict.pop(from_user)
+                    (reply_type, reply_content) = channel.cache_dict.pop(from_user)
                 except KeyError:
                     return "success"
 
                 if (reply_type == "text"):
-                    if len(content.encode("utf8")) <= MAX_UTF8_LEN:
-                        reply_text = content
+                    if len(reply_content.encode("utf8")) <= MAX_UTF8_LEN:
+                        reply_text = reply_content
                     else:
                         continue_text = "\n【未完待续，回复任意文字以继续】"
                         splits = split_string_by_utf8_length(
-                            content,
+                            reply_content,
                             MAX_UTF8_LEN - len(continue_text.encode("utf-8")),
                             max_split=1,
                         )
                         reply_text = splits[0] + continue_text
                         channel.cache_dict[from_user] = ("text", splits[1])
-                    
+
                     logger.info(
                         "[wechatmp] Request {} do send to {} {}: {}\n{}".format(
                             request_cnt,
                             from_user,
                             message_id,
-                            message,
+                            content,
                             reply_text,
                         )
                     )
-                    replyPost = TextMsg(from_user, to_user, reply_text).send()
-                    return replyPost
+                    replyPost = create_reply(reply_text, msg)
+                    return encrypt_func(replyPost.render())
 
                 elif (reply_type == "voice"):
-                    media_id = content
+                    media_id = reply_content
                     asyncio.run_coroutine_threadsafe(channel.delete_media(media_id), channel.delete_media_loop)
-                    replyPost = VoiceMsg(from_user, to_user, media_id).send()
-                    return replyPost
+                    logger.info(
+                        "[wechatmp] Request {} do send to {} {}: {} voice media_id {}".format(
+                            request_cnt,
+                            from_user,
+                            message_id,
+                            content,
+                            media_id,
+                        )
+                    )
+                    replyPost = VoiceReply(message=msg)
+                    replyPost.media_id = media_id
+                    return encrypt_func(replyPost.render())
 
                 elif (reply_type == "image"):
-                    media_id = content
+                    media_id = reply_content
                     asyncio.run_coroutine_threadsafe(channel.delete_media(media_id), channel.delete_media_loop)
-                    replyPost = ImageMsg(from_user, to_user, media_id).send()
-                    return replyPost
+                    logger.info(
+                        "[wechatmp] Request {} do send to {} {}: {} image media_id {}".format(
+                            request_cnt,
+                            from_user,
+                            message_id,
+                            content,
+                            media_id,
+                        )
+                    )
+                    replyPost = ImageReply(message=msg)
+                    replyPost.media_id = media_id
+                    return encrypt_func(replyPost.render())
 
-            elif wechatmp_msg.msg_type == "event":
+            elif msg.type == "event":
                 logger.info(
                     "[wechatmp] Event {} from {}".format(
-                        wechatmp_msg.content, wechatmp_msg.from_user_id
+                        msg.event, msg.source
                     )
                 )
-                content = subscribe_msg()
-                replyMsg = TextMsg(
-                    wechatmp_msg.from_user_id, wechatmp_msg.to_user_id, content
-                )
-                return replyMsg.send()
+                if msg.event in ["subscribe", "subscribe_scan"]:
+                    reply_text = subscribe_msg()
+                    replyPost = create_reply(reply_text, msg)
+                    return encrypt_func(replyPost.render())
+                else:
+                    return "success"
+
             else:
                 logger.info("暂且不处理")
                 return "success"
diff --git a/channel/wechatmp/passive_reply_message.py b/channel/wechatmp/passive_reply_message.py
deleted file mode 100644
index ef58d70..0000000
--- a/channel/wechatmp/passive_reply_message.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# -*- coding: utf-8 -*-#
-# filename: reply.py
-import time
-
-
-class Msg(object):
-    def __init__(self):
-        pass
-
-    def send(self):
-        return "success"
-
-
-class TextMsg(Msg):
-    def __init__(self, toUserName, fromUserName, content):
-        self.__dict = dict()
-        self.__dict["ToUserName"] = toUserName
-        self.__dict["FromUserName"] = fromUserName
-        self.__dict["CreateTime"] = int(time.time())
-        self.__dict["Content"] = content
-
-    def send(self):
-        XmlForm = """
-            <xml>
-                <ToUserName><![CDATA[{ToUserName}]]></ToUserName>
-                <FromUserName><![CDATA[{FromUserName}]]></FromUserName>
-                <CreateTime>{CreateTime}</CreateTime>
-                <MsgType><![CDATA[text]]></MsgType>
-                <Content><![CDATA[{Content}]]></Content>
-            </xml>
-            """
-        return XmlForm.format(**self.__dict)
-
-
-class VoiceMsg(Msg):
-    def __init__(self, toUserName, fromUserName, mediaId):
-        self.__dict = dict()
-        self.__dict["ToUserName"] = toUserName
-        self.__dict["FromUserName"] = fromUserName
-        self.__dict["CreateTime"] = int(time.time())
-        self.__dict["MediaId"] = mediaId
-
-    def send(self):
-        XmlForm = """
-            <xml>
-                <ToUserName><![CDATA[{ToUserName}]]></ToUserName>
-                <FromUserName><![CDATA[{FromUserName}]]></FromUserName>
-                <CreateTime>{CreateTime}</CreateTime>
-                <MsgType><![CDATA[voice]]></MsgType>
-                <Voice>
-                <MediaId><![CDATA[{MediaId}]]></MediaId>
-                </Voice>
-            </xml>
-            """
-        return XmlForm.format(**self.__dict)
-
-
-class ImageMsg(Msg):
-    def __init__(self, toUserName, fromUserName, mediaId):
-        self.__dict = dict()
-        self.__dict["ToUserName"] = toUserName
-        self.__dict["FromUserName"] = fromUserName
-        self.__dict["CreateTime"] = int(time.time())
-        self.__dict["MediaId"] = mediaId
-
-    def send(self):
-        XmlForm = """
-            <xml>
-                <ToUserName><![CDATA[{ToUserName}]]></ToUserName>
-                <FromUserName><![CDATA[{FromUserName}]]></FromUserName>
-                <CreateTime>{CreateTime}</CreateTime>
-                <MsgType><![CDATA[image]]></MsgType>
-                <Image>
-                <MediaId><![CDATA[{MediaId}]]></MediaId>
-                </Image>
-            </xml>
-            """
-        return XmlForm.format(**self.__dict)
diff --git a/channel/wechatmp/wechatmp_channel.py b/channel/wechatmp/wechatmp_channel.py
index 9780048..9d63b84 100644
--- a/channel/wechatmp/wechatmp_channel.py
+++ b/channel/wechatmp/wechatmp_channel.py
@@ -4,17 +4,19 @@ import os
 import time
 import imghdr
 import requests
+import asyncio
+import threading
+from config import conf
 from bridge.context import *
 from bridge.reply import *
-from channel.chat_channel import ChatChannel
-from channel.wechatmp.wechatmp_client import WechatMPClient
-from channel.wechatmp.common import *
 from common.log import logger
 from common.singleton import singleton
-from config import conf
-
-import asyncio
-from threading import Thread
+from voice.audio_convert import any_to_mp3
+from channel.chat_channel import ChatChannel
+from channel.wechatmp.common import *
+from channel.wechatmp.wechatmp_client import WechatMPClient
+from wechatpy.exceptions import WeChatClientException
+from wechatpy.crypto import WeChatCrypto
 
 import web
 # If using SSL, uncomment the following lines, and modify the certificate path.
@@ -31,7 +33,14 @@ class WechatMPChannel(ChatChannel):
         super().__init__()
         self.passive_reply = passive_reply
         self.NOT_SUPPORT_REPLYTYPE = []
-        self.client = WechatMPClient()
+        appid = conf().get("wechatmp_app_id")
+        secret = conf().get("wechatmp_app_secret")
+        token = conf().get("wechatmp_token")
+        aes_key = conf().get("wechatmp_aes_key")
+        self.client = WechatMPClient(appid, secret)
+        self.crypto = None
+        if aes_key:
+            self.crypto = WeChatCrypto(token, aes_key, appid)
         if self.passive_reply:
             # Cache the reply to the user's first message
             self.cache_dict = dict()
@@ -41,7 +50,7 @@ class WechatMPChannel(ChatChannel):
             self.request_cnt = dict()
             # The permanent media need to be deleted to avoid media number limit
             self.delete_media_loop = asyncio.new_event_loop()
-            t = Thread(target=self.start_loop, args=(self.delete_media_loop,))
+            t = threading.Thread(target=self.start_loop, args=(self.delete_media_loop,))
             t.setDaemon(True)
             t.start()
 
@@ -62,7 +71,7 @@ class WechatMPChannel(ChatChannel):
     async def delete_media(self, media_id):
         logger.debug("[wechatmp] permanent media {} will be deleted in 10s".format(media_id))
         await asyncio.sleep(10)
-        self.client.delete_permanent_media(media_id)
+        self.client.material.delete(media_id)
         logger.info("[wechatmp] permanent media {} has been deleted".format(media_id))
 
     def send(self, reply: Reply, context: Context):
@@ -70,80 +79,125 @@ class WechatMPChannel(ChatChannel):
         if self.passive_reply:
             if reply.type == ReplyType.TEXT or reply.type == ReplyType.INFO or reply.type == ReplyType.ERROR:
                 reply_text = reply.content
-                logger.info("[wechatmp] reply to {} cached:\n{}".format(receiver, reply_text))
+                logger.info("[wechatmp] text cached, receiver {}\n{}".format(receiver, reply_text))
                 self.cache_dict[receiver] = ("text", reply_text)
             elif reply.type == ReplyType.VOICE:
-                voice_file_path = reply.content
-                logger.info("[wechatmp] voice file path {}".format(voice_file_path))
-                with open(voice_file_path, 'rb') as f:
-                    filename = receiver + "-" + context["msg"].msg_id + ".mp3"
-                    media_id = self.client.upload_permanent_media("voice", (filename, f, "audio/mpeg"))
-                    # 根据文件大小估计一个微信自动审核的时间，审核结束前返回将会导致语音无法播放，这个估计有待验证
-                    f_size = os.fstat(f.fileno()).st_size
-                    print(f_size)
-                    time.sleep(1.0 + 2 * f_size / 1024 / 1024)
-                    logger.info("[wechatmp] voice reply to {} uploaded: {}".format(receiver, media_id))
-                    self.cache_dict[receiver] = ("voice", media_id)
+                try:
+                    voice_file_path = reply.content
+                    with open(voice_file_path, 'rb') as f:
+                        # support: <2M, <60s, mp3/wma/wav/amr
+                        response = self.client.material.add("voice", f)
+                        logger.debug("[wechatmp] upload voice response: {}".format(response))
+                        # 根据文件大小估计一个微信自动审核的时间，审核结束前返回将会导致语音无法播放，这个估计有待验证
+                        f_size = os.fstat(f.fileno()).st_size
+                        time.sleep(1.0 + 2 * f_size / 1024 / 1024)
+                        # todo check media_id
+                except WeChatClientException as e:
+                    logger.error("[wechatmp] upload voice failed: {}".format(e))
+                    return
+                media_id = response["media_id"]
+                logger.info("[wechatmp] voice uploaded, receiver {}, media_id {}".format(receiver, media_id))
+                self.cache_dict[receiver] = ("voice", media_id)
+
             elif reply.type == ReplyType.IMAGE_URL:  # 从网络下载图片
                 img_url = reply.content
                 pic_res = requests.get(img_url, stream=True)
-                print(pic_res.headers)
                 image_storage = io.BytesIO()
                 for block in pic_res.iter_content(1024):
                     image_storage.write(block)
                 image_storage.seek(0)
                 image_type = imghdr.what(image_storage)
-                filename = receiver + "-" + context["msg"].msg_id + "." + image_type
+                filename = receiver + "-" + str(context['msg'].msg_id) + "." + image_type
                 content_type = "image/" + image_type
-                media_id = self.client.upload_permanent_media("image", (filename, image_storage, content_type))
-                logger.info("[wechatmp] image reply to {} uploaded: {}".format(receiver, media_id))
+                try:
+                    response = self.client.material.add("image", (filename, image_storage, content_type))
+                    logger.debug("[wechatmp] upload image response: {}".format(response))
+                except WeChatClientException as e:
+                    logger.error("[wechatmp] upload image failed: {}".format(e))
+                    return
+                media_id = response["media_id"]
+                logger.info("[wechatmp] image uploaded, receiver {}, media_id {}".format(receiver, media_id))
                 self.cache_dict[receiver] = ("image", media_id)
             elif reply.type == ReplyType.IMAGE:  # 从文件读取图片
                 image_storage = reply.content
                 image_storage.seek(0)
                 image_type = imghdr.what(image_storage)
-                filename = receiver + "-" + context["msg"].msg_id + "." + image_type
+                filename = receiver + "-" + str(context['msg'].msg_id) + "." + image_type
                 content_type = "image/" + image_type
-                media_id = self.client.upload_permanent_media("image", (filename, image_storage, content_type))
-                logger.info("[wechatmp] image reply to {} uploaded: {}".format(receiver, media_id))
+                try:
+                    response = self.client.material.add("image", (filename, image_storage, content_type))
+                    logger.debug("[wechatmp] upload image response: {}".format(response))
+                except WeChatClientException as e:
+                    logger.error("[wechatmp] upload image failed: {}".format(e))
+                    return
+                media_id = response["media_id"]
+                logger.info("[wechatmp] image uploaded, receiver {}, media_id {}".format(receiver, media_id))
                 self.cache_dict[receiver] = ("image", media_id)
         else:
             if reply.type == ReplyType.TEXT or reply.type == ReplyType.INFO or reply.type == ReplyType.ERROR:
                 reply_text = reply.content
-                self.client.send_text(receiver, reply_text)
-                logger.info("[wechatmp] Do send to {}: {}".format(receiver, reply_text))
+                texts = split_string_by_utf8_length(reply_text, MAX_UTF8_LEN)
+                if len(texts)>1:
+                    logger.info("[wechatmp] text too long, split into {} parts".format(len(texts)))
+                for text in texts:
+                    self.client.message.send_text(receiver, text)
+                logger.info("[wechatmp] Do send text to {}: {}".format(receiver, reply_text))
             elif reply.type == ReplyType.VOICE:
-                voice_file_path = reply.content
-                logger.info("[wechatmp] voice file path {}".format(voice_file_path))
-                with open(voice_file_path, 'rb') as f:
-                    filename = receiver + "-" + context["msg"].msg_id + ".mp3"
-                    media_id = self.client.upload_media("voice", (filename, f, "audio/mpeg"))
-                    self.client.send_voice(receiver, media_id)
-                    logger.info("[wechatmp] Do send voice to {}".format(receiver))
+                try:
+                    file_path = reply.content
+                    file_name = os.path.basename(file_path)
+                    file_type = os.path.splitext(file_name)[1]
+                    if file_type == ".mp3":
+                        file_type = "audio/mpeg"
+                    elif file_type == ".amr":
+                        file_type = "audio/amr"
+                    else:
+                        mp3_file = os.path.splitext(file_path)[0] + ".mp3"
+                        any_to_mp3(file_path, mp3_file)
+                        file_path = mp3_file
+                        file_name = os.path.basename(file_path)
+                        file_type = "audio/mpeg"
+                    logger.info("[wechatmp] file_name: {}, file_type: {} ".format(file_name, file_type))
+                    # support: <2M, <60s, AMR\MP3
+                    response = self.client.media.upload("voice", (file_name, open(file_path, "rb"), file_type))
+                    logger.debug("[wechatmp] upload voice response: {}".format(response))
+                except WeChatClientException as e:
+                    logger.error("[wechatmp] upload voice failed: {}".format(e))
+                    return
+                self.client.message.send_voice(receiver, response["media_id"])
+                logger.info("[wechatmp] Do send voice to {}".format(receiver))
             elif reply.type == ReplyType.IMAGE_URL:  # 从网络下载图片
                 img_url = reply.content
                 pic_res = requests.get(img_url, stream=True)
-                print(pic_res.headers)
                 image_storage = io.BytesIO()
                 for block in pic_res.iter_content(1024):
                     image_storage.write(block)
                 image_storage.seek(0)
                 image_type = imghdr.what(image_storage)
-                filename = receiver + "-" + context["msg"].msg_id + "." + image_type
+                filename = receiver + "-" + str(context['msg'].msg_id) + "." + image_type
                 content_type = "image/" + image_type
-                # content_type = pic_res.headers.get('content-type')
-                media_id = self.client.upload_media("image", (filename, image_storage, content_type))
-                self.client.send_image(receiver, media_id)
-                logger.info("[wechatmp] sendImage url={}, receiver={}".format(img_url, receiver))
+                try:
+                    response = self.client.media.upload("image", (filename, image_storage, content_type))
+                    logger.debug("[wechatmp] upload image response: {}".format(response))
+                except WeChatClientException as e:
+                    logger.error("[wechatmp] upload image failed: {}".format(e))
+                    return
+                self.client.message.send_image(receiver, response["media_id"])
+                logger.info("[wechatmp] Do send image to {}".format(receiver))
             elif reply.type == ReplyType.IMAGE:  # 从文件读取图片
                 image_storage = reply.content
                 image_storage.seek(0)
                 image_type = imghdr.what(image_storage)
-                filename = receiver + "-" + context["msg"].msg_id + "." + image_type
+                filename = receiver + "-" + str(context['msg'].msg_id) + "." + image_type
                 content_type = "image/" + image_type
-                media_id = self.client.upload_media("image", (filename, image_storage, content_type))
-                self.client.send_image(receiver, media_id)
-                logger.info("[wechatmp] sendImage, receiver={}".format(receiver))
+                try:
+                    response = self.client.media.upload("image", (filename, image_storage, content_type))
+                    logger.debug("[wechatmp] upload image response: {}".format(response))
+                except WeChatClientException as e:
+                    logger.error("[wechatmp] upload image failed: {}".format(e))
+                    return
+                self.client.message.send_image(receiver, response["media_id"])
+                logger.info("[wechatmp] Do send image to {}".format(receiver))
         return
 
     def _success_callback(self, session_id, context, **kwargs):  # 线程异常结束时的回调函数
diff --git a/channel/wechatmp/wechatmp_client.py b/channel/wechatmp/wechatmp_client.py
index 96ebddb..ee0ec84 100644
--- a/channel/wechatmp/wechatmp_client.py
+++ b/channel/wechatmp/wechatmp_client.py
@@ -1,180 +1,41 @@
 import time
-import json
-import requests
 import threading
 from channel.wechatmp.common import *
+from wechatpy.client import WeChatClient
 from common.log import logger
-from config import conf
+from wechatpy.exceptions import APILimitedException
 
 
-class WechatMPClient:
-    def __init__(self):
-        self.app_id = conf().get("wechatmp_app_id")
-        self.app_secret = conf().get("wechatmp_app_secret")
-        self.access_token = None
-        self.access_token_expires_time = 0
-        self.access_token_lock = threading.Lock()
-        self.get_access_token()
-
-
-    def wechatmp_request(self, method, url, **kwargs):
-        r = requests.request(method=method, url=url, **kwargs)
-        r.raise_for_status()
-        r.encoding = "utf-8"
-        ret = r.json()
-        if "errcode" in ret and ret["errcode"] != 0:
-            if ret["errcode"] == 45009:
-                self.clear_quota_v2()
-            raise WeChatAPIException("{}".format(ret))
-        return ret
-
-    def get_access_token(self):
-        # return the access_token
-        if self.access_token:
-            if self.access_token_expires_time - time.time() > 60:
-                return self.access_token
-
-        # Get new access_token
-        # Do not request access_token in parallel! Only the last obtained is valid.
-        if self.access_token_lock.acquire(blocking=False):
-            # Wait for other threads that have previously obtained access_token to complete the request
-            # This happens every 2 hours, so it doesn't affect the experience very much
-            time.sleep(1)
-            self.access_token = None
-            url = "https://api.weixin.qq.com/cgi-bin/token"
-            params = {
-                "grant_type": "client_credential",
-                "appid": self.app_id,
-                "secret": self.app_secret,
-            }
-            ret = self.wechatmp_request(method="get", url=url, params=params)
-            self.access_token = ret["access_token"]
-            self.access_token_expires_time = int(time.time()) + ret["expires_in"]
-            logger.info("[wechatmp] access_token: {}".format(self.access_token))
-            self.access_token_lock.release()
-        else:
-            # Wait for token update
-            while self.access_token_lock.locked():
-                time.sleep(0.1)
-        return self.access_token
-
-
-    def send_text(self, receiver, reply_text):
-        url = "https://api.weixin.qq.com/cgi-bin/message/custom/send"
-        params = {"access_token": self.get_access_token()}
-        json_data = {
-            "touser": receiver,
-            "msgtype": "text",
-            "text": {"content": reply_text},
-        }
-        self.wechatmp_request(
-            method="post",
-            url=url,
-            params=params,
-            data=json.dumps(json_data, ensure_ascii=False).encode("utf8"),
-        )
-
-
-    def send_voice(self, receiver, media_id):
-        url="https://api.weixin.qq.com/cgi-bin/message/custom/send"
-        params = {"access_token": self.get_access_token()}
-        json_data = {
-            "touser": receiver,
-            "msgtype": "voice",
-            "voice": {
-                "media_id": media_id
-            }
-        }
-        self.wechatmp_request(
-            method="post",
-            url=url,
-            params=params,
-            data=json.dumps(json_data, ensure_ascii=False).encode("utf8"),
-        )
-
-    def send_image(self, receiver, media_id):
-        url="https://api.weixin.qq.com/cgi-bin/message/custom/send"
-        params = {"access_token": self.get_access_token()}
-        json_data = {
-            "touser": receiver,
-            "msgtype": "image",
-            "image": {
-                "media_id": media_id
-            }
-        }
-        self.wechatmp_request(
-            method="post",
-            url=url,
-            params=params,
-            data=json.dumps(json_data, ensure_ascii=False).encode("utf8"),
+class WechatMPClient(WeChatClient):
+    def __init__(self, appid, secret, access_token=None,
+                 session=None, timeout=None, auto_retry=True):
+        super(WechatMPClient, self).__init__(
+            appid, secret, access_token, session, timeout, auto_retry
         )
-
-
-    def upload_media(self, media_type, media_file):
-        url="https://api.weixin.qq.com/cgi-bin/media/upload"
-        params={
-            "access_token": self.get_access_token(),
-            "type": media_type
-        }
-        files={"media": media_file}
-        ret = self.wechatmp_request(
-            method="post",
-            url=url,
-            params=params,
-            files=files
-        )
-        logger.debug("[wechatmp] media {} uploaded".format(media_file))
-        return ret["media_id"]
-
-
-    def upload_permanent_media(self, media_type, media_file):
-        url="https://api.weixin.qq.com/cgi-bin/material/add_material"
-        params={
-            "access_token": self.get_access_token(),
-            "type": media_type
-        }
-        files={"media": media_file}
-        ret = self.wechatmp_request(
-            method="post",
-            url=url,
-            params=params,
-            files=files
-        )
-        logger.debug("[wechatmp] permanent media {} uploaded".format(media_file))
-        return ret["media_id"]
-
-
-    def delete_permanent_media(self, media_id):
-        url="https://api.weixin.qq.com/cgi-bin/material/del_material"
-        params={
-            "access_token": self.get_access_token()
-        }
-        self.wechatmp_request(
-            method="post",
-            url=url,
-            params=params,
-            data=json.dumps({"media_id": media_id}, ensure_ascii=False).encode("utf8")
-        )
-        logger.debug("[wechatmp] permanent media {} deleted".format(media_id))
+        self.fetch_access_token_lock = threading.Lock()
 
     def clear_quota(self):
-        url="https://api.weixin.qq.com/cgi-bin/clear_quota"
-        params = {
-            "access_token": self.get_access_token()
-        }
-        self.wechatmp_request(
-            method="post",
-            url=url,
-            params=params,
-            data={"appid": self.app_id}
-        )
-        logger.debug("[wechatmp] API quata has been cleard")
+        return self.post("clear_quota", data={"appid": self.appid})
 
     def clear_quota_v2(self):
-        url="https://api.weixin.qq.com/cgi-bin/clear_quota/v2"
-        self.wechatmp_request(
-            method="post",
-            url=url,
-            data={"appid": self.app_id, "appsecret": self.app_secret}
-        )
-        logger.debug("[wechatmp] API quata has been cleard")
+        return self.post("clear_quota/v2", params={"appid": self.appid, "appsecret": self.secret})
+
+    def fetch_access_token(self): # 重载父类方法，加锁避免多线程重复获取access_token
+        with self.fetch_access_token_lock:
+            access_token = self.session.get(self.access_token_key)
+            if access_token:
+                if not self.expires_at:
+                    return access_token
+                timestamp = time.time()
+                if self.expires_at - timestamp > 60:
+                    return access_token
+            return super().fetch_access_token()
+
+    def _request(self, method, url_or_endpoint, **kwargs): # 重载父类方法，遇到API限流时，清除quota后重试
+        try:
+            return super()._request(method, url_or_endpoint, **kwargs)
+        except APILimitedException as e:
+            logger.error("[wechatmp] API quata has been used up. {}".format(e))
+            response = self.clear_quota_v2()
+            logger.debug("[wechatmp] API quata has been cleard, {}".format(response))
+            return super()._request(method, url_or_endpoint, **kwargs)
\ No newline at end of file
diff --git a/channel/wechatmp/wechatmp_message.py b/channel/wechatmp/wechatmp_message.py
index d385897..fd07243 100644
--- a/channel/wechatmp/wechatmp_message.py
+++ b/channel/wechatmp/wechatmp_message.py
@@ -1,50 +1,65 @@
 # -*- coding: utf-8 -*-#
-# filename: receive.py
-import xml.etree.ElementTree as ET
 
 from bridge.context import ContextType
 from channel.chat_message import ChatMessage
 from common.log import logger
+from common.tmp_dir import TmpDir
 
 
-def parse_xml(web_data):
-    if len(web_data) == 0:
-        return None
-    xmlData = ET.fromstring(web_data)
-    return WeChatMPMessage(xmlData)
-
 
 class WeChatMPMessage(ChatMessage):
-    def __init__(self, xmlData):
-        super().__init__(xmlData)
-        self.to_user_id = xmlData.find("ToUserName").text
-        self.from_user_id = xmlData.find("FromUserName").text
-        self.create_time = xmlData.find("CreateTime").text
-        self.msg_type = xmlData.find("MsgType").text
-        try:
-            self.msg_id = xmlData.find("MsgId").text
-        except:
-            self.msg_id = self.from_user_id + self.create_time
+    def __init__(self, msg, client=None):
+        super().__init__(msg)
+        self.msg_id = msg.id
+        self.create_time = msg.time
         self.is_group = False
 
-        # reply to other_user_id
-        self.other_user_id = self.from_user_id
-
-        if self.msg_type == "text":
+        if msg.type == "text":
             self.ctype = ContextType.TEXT
-            self.content = xmlData.find("Content").text
-        elif self.msg_type == "voice":
-            self.ctype = ContextType.TEXT
-            self.content = xmlData.find("Recognition").text  # 接收语音识别结果
-            # other voice_to_text method not implemented yet
-            if self.content == None:
-                self.content = "你好"
-        elif self.msg_type == "image":
-            # not implemented yet
-            self.pic_url = xmlData.find("PicUrl").text
-            self.media_id = xmlData.find("MediaId").text
-        elif self.msg_type == "event":
-            self.content = xmlData.find("Event").text
-        else:  # video, shortvideo, location, link
-            # not implemented
-            pass
+            self.content = msg.content
+        elif msg.type == "voice":
+            
+            if msg.recognition == None:
+                self.ctype = ContextType.VOICE
+                self.content = (
+                    TmpDir().path() + msg.media_id + "." + msg.format
+                )  # content直接存临时目录路径
+
+                def download_voice():
+                    # 如果响应状态码是200，则将响应内容写入本地文件
+                    response = client.media.download(msg.media_id)
+                    if response.status_code == 200:
+                        with open(self.content, "wb") as f:
+                            f.write(response.content)
+                    else:
+                        logger.info(
+                            f"[wechatmp] Failed to download voice file, {response.content}"
+                        )
+
+                self._prepare_fn = download_voice
+            else:
+                self.ctype = ContextType.TEXT
+                self.content = msg.recognition
+        elif msg.type == "image":
+            self.ctype = ContextType.IMAGE
+            self.content = TmpDir().path() + msg.media_id + ".png"  # content直接存临时目录路径
+            def download_image():
+                # 如果响应状态码是200，则将响应内容写入本地文件
+                response = client.media.download(msg.media_id)
+                if response.status_code == 200:
+                    with open(self.content, "wb") as f:
+                        f.write(response.content)
+                else:
+                    logger.info(
+                        f"[wechatmp] Failed to download image file, {response.content}"
+                    )
+
+            self._prepare_fn = download_image
+        else:
+            raise NotImplementedError(
+                "Unsupported message type: Type:{} ".format(msg.type)
+            )
+
+        self.from_user_id = msg.source
+        self.to_user_id = msg.target
+        self.other_user_id = msg.source
diff --git a/config.py b/config.py
index 8f5d2ca..4e8b60a 100644
--- a/config.py
+++ b/config.py
@@ -73,8 +73,9 @@ available_setting = {
     # wechatmp的配置
     "wechatmp_token": "",  # 微信公众平台的Token
     "wechatmp_port": 8080,  # 微信公众平台的端口,需要端口转发到80或443
-    "wechatmp_app_id": "",  # 微信公众平台的appID，仅服务号需要
-    "wechatmp_app_secret": "",  # 微信公众平台的appsecret，仅服务号需要
+    "wechatmp_app_id": "",  # 微信公众平台的appID
+    "wechatmp_app_secret": "",  # 微信公众平台的appsecret
+    "wechatmp_aes_key": "",  # 微信公众平台的EncodingAESKey，加密模式需要
     # chatgpt指令自定义触发词
     "clear_memory_commands": ["#清除记忆"],  # 重置会话指令，必须以#开头
     # channel配置
diff --git a/requirements-optional.txt b/requirements-optional.txt
index cfb52c9..ba453cb 100644
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -18,6 +18,7 @@ pysilk_mod>=1.6.0 # needed by send voice
 
 # wechatmp
 web.py
+wechatpy
 
 # chatgpt-tool-hub plugin
 
diff --git a/voice/audio_convert.py b/voice/audio_convert.py
index ce0601d..77de4ed 100644
--- a/voice/audio_convert.py
+++ b/voice/audio_convert.py
@@ -33,6 +33,22 @@ def get_pcm_from_wav(wav_path):
     wav = wave.open(wav_path, "rb")
     return wav.readframes(wav.getnframes())
 
+def any_to_mp3(any_path, mp3_path):
+    """
+    把任意格式转成mp3文件
+    """
+    if any_path.endswith(".mp3"):
+        shutil.copy2(any_path, mp3_path)
+        return
+    if (
+        any_path.endswith(".sil")
+        or any_path.endswith(".silk")
+        or any_path.endswith(".slk")
+    ):
+        sil_to_wav(any_path, any_path)
+        any_path = mp3_path
+    audio = AudioSegment.from_file(any_path)
+    audio.export(mp3_path, format="mp3")
 
 def any_to_wav(any_path, wav_path):
     """
diff --git a/voice/pytts/pytts_voice.py b/voice/pytts/pytts_voice.py
index 072e28b..17cd6ff 100644
--- a/voice/pytts/pytts_voice.py
+++ b/voice/pytts/pytts_voice.py
@@ -2,8 +2,9 @@
 pytts voice service (offline)
 """
 
+import os
+import sys
 import time
-
 import pyttsx3
 
 from bridge.reply import Reply, ReplyType
@@ -11,7 +12,6 @@ from common.log import logger
 from common.tmp_dir import TmpDir
 from voice.voice import Voice
 
-
 class PyttsVoice(Voice):
     engine = pyttsx3.init()
 
@@ -20,19 +20,42 @@ class PyttsVoice(Voice):
         self.engine.setProperty("rate", 125)
         # 音量
         self.engine.setProperty("volume", 1.0)
-        for voice in self.engine.getProperty("voices"):
-            if "Chinese" in voice.name:
-                self.engine.setProperty("voice", voice.id)
+        if sys.platform == 'win32':
+            for voice in self.engine.getProperty("voices"):
+                if "Chinese" in voice.name:
+                    self.engine.setProperty("voice", voice.id)
+        else:
+            self.engine.setProperty("voice", "zh")
+            # If the problem of espeak is fixed, using runAndWait() and remove this startLoop()
+            # TODO: check if this is work on win32
+            self.engine.startLoop(useDriverLoop=False)
 
     def textToVoice(self, text):
         try:
-            wavFile = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav"
+            # avoid the same filename
+            wavFileName = "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7fffffff) + ".wav"
+            wavFile = TmpDir().path() + wavFileName
+            logger.info("[Pytts] textToVoice text={} voice file name={}".format(text, wavFile))
+
             self.engine.save_to_file(text, wavFile)
-            self.engine.runAndWait()
-            logger.info(
-                "[Pytts] textToVoice text={} voice file name={}".format(text, wavFile)
-            )
+
+            if sys.platform == 'win32':
+                self.engine.runAndWait()
+            else:
+                # In ubuntu, runAndWait do not really wait until the file created. 
+                # It will return once the task queue is empty, but the task is still running in coroutine.
+                # And if you call runAndWait() and time.sleep() twice, it will stuck, so do not use this.
+                # If you want to fix this, add self._proxy.setBusy(True) in line 127 in espeak.py, at the beginning of the function save_to_file.
+                # self.engine.runAndWait()
+
+                # Before espeak fix this problem, we iterate the generator and control the waiting by ourself.
+                # But this is not the canonical way to use it, for example if the file already exists it also cannot wait. 
+                self.engine.iterate()
+                while self.engine.isBusy() or wavFileName not in os.listdir(TmpDir().path()):
+                    time.sleep(0.1)
+
             reply = Reply(ReplyType.VOICE, wavFile)
+
         except Exception as e:
             reply = Reply(ReplyType.ERROR, str(e))
         finally: