From 9b389ffc336ff7cb066ad70f67bfb05693abda65 Mon Sep 17 00:00:00 2001
From: Chiaki <root@whatisit.cn>
Date: Mon, 27 Mar 2023 16:46:53 +0800
Subject: [PATCH 1/3] =?UTF-8?q?1.itchat=E6=B7=BB=E5=8A=A0=E7=BE=A4?=
 =?UTF-8?q?=E7=BB=84=E8=AF=AD=E9=9F=B3=E5=9B=9E=E5=A4=8D=E6=96=87=E6=9C=AC?=
 =?UTF-8?q?=E5=8A=9F=E8=83=BD=EF=BC=9B2.itchat=E6=B7=BB=E5=8A=A0=E7=BE=A4?=
 =?UTF-8?q?=E7=BB=84=E8=AF=AD=E9=9F=B3=E5=9B=9E=E5=A4=8D=E8=AF=AD=E9=9F=B3?=
 =?UTF-8?q?=E5=8A=9F=E8=83=BD=EF=BC=9B3.=E6=9B=B4=E6=96=B0redeme?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app.py                           |  2 +-
 channel/wechat/wechat_channel.py | 49 ++++++++++++++++++++++++++------
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/app.py b/app.py
index 7d42b9d..c95f39c 100644
--- a/app.py
+++ b/app.py
@@ -24,4 +24,4 @@ def run():
         logger.exception(e)
 
 if __name__ == '__main__':
-    run()
\ No newline at end of file
+    run()
diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py
index fa303d2..165b958 100644
--- a/channel/wechat/wechat_channel.py
+++ b/channel/wechat/wechat_channel.py
@@ -33,18 +33,20 @@ def handler_single_msg(msg):
     WechatChannel().handle_text(msg)
     return None
 
-
 @itchat.msg_register(TEXT, isGroupChat=True)
 def handler_group_msg(msg):
     WechatChannel().handle_group(msg)
     return None
 
-
 @itchat.msg_register(VOICE)
 def handler_single_voice(msg):
     WechatChannel().handle_voice(msg)
     return None
-
+    
+@itchat.msg_register(VOICE, isGroupChat=True)
+def handler_group_voice(msg):
+    WechatChannel().handle_group_voice(msg)
+    return None
 
 class WechatChannel(Channel):
     def __init__(self):
@@ -89,7 +91,6 @@ class WechatChannel(Channel):
             context.kwargs = {'isgroup': False, 'msg': msg, 'receiver': other_user_id, 'session_id': other_user_id}
             thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
 
-
     @time_checker
     def handle_text(self, msg):
         logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
@@ -169,6 +170,33 @@ class WechatChannel(Channel):
 
             thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
 
+    def handle_group_voice(self, msg):
+        if conf().get('group_speech_recognition') != True:
+            return
+        logger.debug("[WX]receive voice for group msg: " + msg['FileName'])
+        group_name = msg['User'].get('NickName', None)
+        group_id = msg['User'].get('UserName', None)
+        create_time = msg['CreateTime']             # 消息时间
+        if conf().get('hot_reload') == True and int(create_time) < int(time.time()) - 60:    #跳过1分钟前的历史消息
+            logger.debug("[WX]history group message skipped")
+            return
+        # 验证群名
+        if not group_name:
+            return ""
+        if ('ALL_GROUP' in conf().get('group_name_white_list') or group_name in conf().get('group_name_white_list') or check_contain(group_name, conf().get('group_name_keyword_white_list'))):
+            context = Context(ContextType.VOICE,msg['FileName'])
+            context.kwargs = {'isgroup': True, 'msg': msg, 'receiver': group_id}
+
+            group_chat_in_one_session = conf().get('group_chat_in_one_session', [])
+            if ('ALL_GROUP' in group_chat_in_one_session or
+                    group_name in group_chat_in_one_session or
+                    check_contain(group_name, group_chat_in_one_session)):
+                context['session_id'] = group_id
+            else:
+                context['session_id'] = msg['ActualUserName']
+
+            thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
+
     # 统一的发送函数，每个Channel自行实现，根据reply的type字段发送不同类型的消息
     def send(self, reply : Reply, receiver):
         if reply.type == ReplyType.TEXT:
@@ -206,19 +234,26 @@ class WechatChannel(Channel):
         reply = e_context['reply']
         if not e_context.is_pass():
             logger.debug('[WX] ready to handle context: type={}, content={}'.format(context.type, context.content))
-            if context.type == ContextType.TEXT or context.type == ContextType.IMAGE_CREATE:
+            if context.type == ContextType.TEXT or context.type == ContextType.IMAGE_CREATE: # 文字和图片消息
                 reply = super().build_reply_content(context.content, context)
-            elif context.type == ContextType.VOICE:
+            elif context.type == ContextType.VOICE: # 语音消息
                 msg = context['msg']
                 file_name = TmpDir().path() + context.content
                 msg.download(file_name)
                 reply = super().build_voice_to_text(file_name)
                 if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO:
                     context.content = reply.content # 语音转文字后，将文字内容作为新的context
+                    # 如果是群消息，判断是否触发关键字
+                    if context['isgroup']:
+                        match_prefix = check_prefix(context.content, conf().get('group_chat_prefix')) or check_contain(context.content, conf().get('group_chat_keyword'))
+                        if match_prefix != True:
+                            return
                     context.type = ContextType.TEXT
                     reply = super().build_reply_content(context.content, context)
                     if reply.type == ReplyType.TEXT:
                         if conf().get('voice_reply_voice'):
+                            if context['isgroup']:
+                                reply.content = '@' + context['msg']['ActualNickName'] + ' ' + reply.content
                             reply = super().build_text_to_voice(reply.content)
             else:
                 logger.error('[WX] unknown context type: {}'.format(context.type))
@@ -255,14 +290,12 @@ class WechatChannel(Channel):
                 logger.debug('[WX] ready to send reply: {} to {}'.format(reply, context['receiver']))
                 self.send(reply, context['receiver'])
 
-
 def check_prefix(content, prefix_list):
     for prefix in prefix_list:
         if content.startswith(prefix):
             return prefix
     return None
 
-
 def check_contain(content, keyword_list):
     if not keyword_list:
         return None

From 42aca71763b3ac92a6af19c9c6cedaa5b225a9e2 Mon Sep 17 00:00:00 2001
From: Chiaki <root@whatisit.cn>
Date: Mon, 27 Mar 2023 16:50:50 +0800
Subject: [PATCH 2/3] =?UTF-8?q?1.=E6=9B=B4=E6=96=B0redeme?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index bd6cd02..aa98432 100644
--- a/README.md
+++ b/README.md
@@ -134,7 +134,7 @@ pip3 install --upgrade tiktoken
 **3.语音识别**
 
 + 添加 `"speech_recognition": true` 将开启语音识别，默认使用openai的whisper模型识别为文字，同时以文字回复，该参数仅支持私聊 (注意由于语音消息无法匹配前缀，一旦开启将对所有语音自动回复)；
-+ 添加 `"group_speech_recognition": true` 将开启群组语音识别，默认使用openai的whisper模型识别为文字，同时以文字回复，参数仅支持群聊 (可以匹配group_chat_prefix和group_chat_keyword，目前仅支持wechaty方案)；
++ 添加 `"group_speech_recognition": true` 将开启群组语音识别，默认使用openai的whisper模型识别为文字，同时以文字回复，参数仅支持群聊 (可以匹配group_chat_prefix和group_chat_keyword, 目前wechaty可支持群聊语音触发画图，itchat无法触发画图)；
 + 添加 `"voice_reply_voice": true` 将开启语音回复语音（同时作用于私聊和群聊），但是需要配置对应语音合成平台的key，由于itchat协议的限制，只能发送语音mp3文件，若使用wechaty则回复的是微信语音。
 
 **4.其他配置**

From 74f383a7d4a1b107b525bb12bb87e6c3131212ce Mon Sep 17 00:00:00 2001
From: lanvent <lanvent@qq.com>
Date: Mon, 27 Mar 2023 18:56:40 +0800
Subject: [PATCH 3/3] =?UTF-8?q?Merge=20pull=20request=20#629=20from=20Chia?=
 =?UTF-8?q?ki-Chan/master=20ItChat-uos=E6=96=B9=E6=A1=88=E4=B8=8B=E6=B7=BB?=
 =?UTF-8?q?=E5=8A=A0=E5=AF=B9=E7=BE=A4=E7=BB=84=E8=AF=AD=E9=9F=B3=E6=B6=88?=
 =?UTF-8?q?=E6=81=AF=E7=9A=84=E5=93=8D=E5=BA=94?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                        |  6 +++---
 app.py                           |  2 +-
 channel/wechat/wechat_channel.py | 28 +++++++++++++++++++---------
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index aa98432..7f95674 100644
--- a/README.md
+++ b/README.md
@@ -112,7 +112,7 @@ pip3 install --upgrade tiktoken
   "image_create_prefix": ["画", "看", "找"],                   # 开启图片回复的前缀
   "conversation_max_tokens": 1000,                            # 支持上下文记忆的最多字符数
   "speech_recognition": false,                                # 是否开启语音识别
-  "group_speech_recognition": false,                          # 是否开启群组语音识别 （目前仅支持wechaty）
+  "group_speech_recognition": false,                          # 是否开启群组语音识别
   "use_azure_chatgpt": false,                                 # 是否使用Azure ChatGPT service代替openai ChatGPT service. 当设置为true时需要设置 open_ai_api_base，如 https://xxx.openai.azure.com/
   "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题，并且可以使用多种语言与人交流。",  # 人格描述,
 }
@@ -133,8 +133,8 @@ pip3 install --upgrade tiktoken
 
 **3.语音识别**
 
-+ 添加 `"speech_recognition": true` 将开启语音识别，默认使用openai的whisper模型识别为文字，同时以文字回复，该参数仅支持私聊 (注意由于语音消息无法匹配前缀，一旦开启将对所有语音自动回复)；
-+ 添加 `"group_speech_recognition": true` 将开启群组语音识别，默认使用openai的whisper模型识别为文字，同时以文字回复，参数仅支持群聊 (可以匹配group_chat_prefix和group_chat_keyword, 目前wechaty可支持群聊语音触发画图，itchat无法触发画图)；
++ 添加 `"speech_recognition": true` 将开启语音识别，默认使用openai的whisper模型识别为文字，同时以文字回复，该参数仅支持私聊 (注意由于语音消息无法匹配前缀，一旦开启将对所有语音自动回复，支持语音触发画图)；
++ 添加 `"group_speech_recognition": true` 将开启群组语音识别，默认使用openai的whisper模型识别为文字，同时以文字回复，参数仅支持群聊 (会匹配group_chat_prefix和group_chat_keyword, 支持语音触发画图)；
 + 添加 `"voice_reply_voice": true` 将开启语音回复语音（同时作用于私聊和群聊），但是需要配置对应语音合成平台的key，由于itchat协议的限制，只能发送语音mp3文件，若使用wechaty则回复的是微信语音。
 
 **4.其他配置**
diff --git a/app.py b/app.py
index c95f39c..7d42b9d 100644
--- a/app.py
+++ b/app.py
@@ -24,4 +24,4 @@ def run():
         logger.exception(e)
 
 if __name__ == '__main__':
-    run()
+    run()
\ No newline at end of file
diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py
index 9fa4821..c7c660d 100644
--- a/channel/wechat/wechat_channel.py
+++ b/channel/wechat/wechat_channel.py
@@ -190,14 +190,14 @@ class WechatChannel(Channel):
             thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
 
     def handle_group_voice(self, msg):
-        if conf().get('group_speech_recognition') != True:
+        if conf().get('group_speech_recognition', False) != True:
             return
         logger.debug("[WX]receive voice for group msg: " + msg['FileName'])
         group_name = msg['User'].get('NickName', None)
         group_id = msg['User'].get('UserName', None)
         create_time = msg['CreateTime']             # 消息时间
         if conf().get('hot_reload') == True and int(create_time) < int(time.time()) - 60:    #跳过1分钟前的历史消息
-            logger.debug("[WX]history group message skipped")
+            logger.debug("[WX]history group voice skipped")
             return
         # 验证群名
         if not group_name:
@@ -260,19 +260,29 @@ class WechatChannel(Channel):
                 file_name = TmpDir().path() + context.content
                 msg.download(file_name)
                 reply = super().build_voice_to_text(file_name)
-                if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO:
-                    context.content = reply.content # 语音转文字后，将文字内容作为新的context
+                if reply.type == ReplyType.TEXT:
+                    content = reply.content # 语音转文字后，将文字内容作为新的context
                     # 如果是群消息，判断是否触发关键字
                     if context['isgroup']:
-                        match_prefix = check_prefix(context.content, conf().get('group_chat_prefix')) or check_contain(context.content, conf().get('group_chat_keyword'))
-                        if match_prefix != True:
+                        match_prefix = check_prefix(content, conf().get('group_chat_prefix'))
+                        match_contain = check_contain(content, conf().get('group_chat_keyword'))
+                        logger.debug('[WX] group chat prefix match: {}'.format(match_prefix))
+                        if match_prefix is None and match_contain is None:
                             return
-                    context.type = ContextType.TEXT
+                        else:
+                            if match_prefix:
+                                content = content.replace(match_prefix, '', 1).strip()
+                        
+                    img_match_prefix = check_prefix(content, conf().get('image_create_prefix'))
+                    if img_match_prefix:
+                        content = content.replace(img_match_prefix, '', 1).strip()
+                        context.type = ContextType.IMAGE_CREATE
+                    else:
+                        context.type = ContextType.TEXT
+                    context.content = content
                     reply = super().build_reply_content(context.content, context)
                     if reply.type == ReplyType.TEXT:
                         if conf().get('voice_reply_voice'):
-                            if context['isgroup']:
-                                reply.content = '@' + context['msg']['ActualNickName'] + ' ' + reply.content
                             reply = super().build_text_to_voice(reply.content)
             else:
                 logger.error('[WX] unknown context type: {}'.format(context.type))