From 8d2e81815c104f2082dad4b695c6ccdbac2d6240 Mon Sep 17 00:00:00 2001
From: lanvent <lanvent@qq.com>
Date: Mon, 13 Mar 2023 00:12:34 +0800
Subject: [PATCH] compatible for voice

---
 channel/wechat/wechat_channel.py |  7 +++++--
 plugins/godcmd/godcmd.py         | 12 ++++++-----
 plugins/hello/hello.py           | 35 ++++++++++++++++++--------------
 voice/baidu/baidu_voice.py       |  5 +++--
 voice/google/google_voice.py     | 27 +++++++++++++++---------
 voice/openai/openai_voice.py     | 18 ++++++++++------
 6 files changed, 64 insertions(+), 40 deletions(-)

diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py
index 73bb59a..0ba923f 100644
--- a/channel/wechat/wechat_channel.py
+++ b/channel/wechat/wechat_channel.py
@@ -71,6 +71,7 @@ class WechatChannel(Channel):
         if from_user_id == other_user_id:
             context = {'isgroup': False, 'msg': msg, 'receiver': other_user_id}
             context['type'] = 'VOICE'
+            context['content'] = msg['FileName']
             context['session_id'] = other_user_id
             thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback)
 
@@ -183,11 +184,13 @@ class WechatChannel(Channel):
                 reply = super().build_reply_content(context['content'], context)
             elif context['type'] == 'VOICE':
                 msg = context['msg']
-                file_name = TmpDir().path() + msg['FileName']
+                file_name = TmpDir().path() + context['content']
                 msg.download(file_name)
                 reply = super().build_voice_to_text(file_name)
                 if reply['type'] != 'ERROR' and reply['type'] != 'INFO':
-                    reply = super().build_reply_content(reply['content'], context)
+                    context['content'] = reply['content'] # 语音转文字后，将文字内容作为新的context
+                    context['type'] = reply['type']
+                    reply = super().build_reply_content(context['content'], context)
                     if reply['type'] == 'TEXT':
                         if conf().get('voice_reply_voice'):
                             reply = super().build_text_to_voice(reply['content'])
diff --git a/plugins/godcmd/godcmd.py b/plugins/godcmd/godcmd.py
index 300353d..3dd8760 100644
--- a/plugins/godcmd/godcmd.py
+++ b/plugins/godcmd/godcmd.py
@@ -100,11 +100,15 @@ class Godcmd(Plugin):
 
     
     def on_handle_context(self, e_context: EventContext):
-        content = e_context['context']['content']
         context_type = e_context['context']['type']
-        logger.debug("[Godcmd] on_handle_context. content: %s" % content)
+        if context_type != "TEXT":
+            if not self.isrunning:
+                e_context.action = EventAction.BREAK_PASS
+            return
         
-        if content.startswith("#") and context_type == "TEXT":
+        content = e_context['context']['content']
+        logger.debug("[Godcmd] on_handle_context. content: %s" % content)
+        if content.startswith("#"):
             # msg = e_context['context']['msg']
             user = e_context['context']['receiver']
             session_id = e_context['context']['session_id']
@@ -176,8 +180,6 @@ class Godcmd(Plugin):
             e_context.action = EventAction.BREAK_PASS # 事件结束，并跳过处理context的默认逻辑
         elif not self.isrunning:
             e_context.action = EventAction.BREAK_PASS
-        else:
-            e_context.action = EventAction.CONTINUE # 事件继续，交付给下个插件或默认逻辑
     
     def authenticate(self, userid, args, isadmin, isgroup) -> Tuple[bool,str] : 
         if isgroup:
diff --git a/plugins/hello/hello.py b/plugins/hello/hello.py
index 144906b..ca1d257 100644
--- a/plugins/hello/hello.py
+++ b/plugins/hello/hello.py
@@ -14,26 +14,31 @@ class Hello(Plugin):
 
     def on_handle_context(self, e_context: EventContext):
 
-        logger.debug("[Hello] on_handle_context. content: %s" % e_context['context']['content'])
-
-        if e_context['context']['content'] == "Hello":
-            e_context['reply']['type'] = "TEXT"
+        if e_context['context']['type'] != "TEXT":
+            return
+        
+        content = e_context['context']['content']
+        logger.debug("[Hello] on_handle_context. content: %s" % content)
+        if content == "Hello":
+            reply = {}
+            reply['type'] = "TEXT"
             msg = e_context['context']['msg']
             if e_context['context']['isgroup']:
-                e_context['reply']['content'] = "Hello, " + msg['ActualNickName'] + " from " + msg['User'].get('NickName', "Group")
+                reply['content'] = "Hello, " + msg['ActualNickName'] + " from " + msg['User'].get('NickName', "Group")
             else:
-                e_context['reply']['content'] = "Hello, " + msg['User'].get('NickName', "My friend")
-            
+                reply['content'] = "Hello, " + msg['User'].get('NickName', "My friend")
+            e_context['reply'] = reply
             e_context.action = EventAction.BREAK_PASS # 事件结束，并跳过处理context的默认逻辑
 
-        if e_context['context']['content'] == "Hi":
-            e_context['reply']['type'] = "TEXT"
-            e_context['reply']['content'] = "Hi"
+        if content == "Hi":
+            reply={}
+            reply['type'] = "TEXT"
+            reply['content'] = "Hi"
+            e_context['reply'] = reply
             e_context.action = EventAction.BREAK  # 事件结束，进入默认处理逻辑，一般会覆写reply
 
-        if e_context['context']['content'] == "End":
+        if content == "End":
             # 如果是文本消息"End"，将请求转换成"IMAGE_CREATE"，并将content设置为"The World"
-            if e_context['context']['type'] == "TEXT":
-                e_context['context']['type'] = "IMAGE_CREATE"
-                e_context['context']['content'] = "The World"
-                e_context.action = EventAction.CONTINUE  # 事件继续，交付给下个插件或默认逻辑
+            e_context['context']['type'] = "IMAGE_CREATE"
+            content = "The World"
+            e_context.action = EventAction.CONTINUE  # 事件继续，交付给下个插件或默认逻辑
diff --git a/voice/baidu/baidu_voice.py b/voice/baidu/baidu_voice.py
index d99db37..adab169 100644
--- a/voice/baidu/baidu_voice.py
+++ b/voice/baidu/baidu_voice.py
@@ -30,7 +30,8 @@ class BaiduVoice(Voice):
             with open(fileName, 'wb') as f:
                 f.write(result)
             logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
-            return fileName
+            reply = {"type": "VOICE", "content": fileName}
         else:
             logger.error('[Baidu] textToVoice error={}'.format(result))
-            return None
+            reply = {"type": "ERROR", "content": "抱歉，语音合成失败"}
+        return reply
diff --git a/voice/google/google_voice.py b/voice/google/google_voice.py
index 8e339f2..6c00892 100644
--- a/voice/google/google_voice.py
+++ b/voice/google/google_voice.py
@@ -32,20 +32,27 @@ class GoogleVoice(Voice):
                         ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
         with speech_recognition.AudioFile(new_file) as source:
             audio = self.recognizer.record(source)
+        reply = {}
         try:
             text = self.recognizer.recognize_google(audio, language='zh-CN')
             logger.info(
                 '[Google] voiceToText text={} voice file name={}'.format(text, voice_file))
-            return text
+            reply = {"type": "TEXT", "content": text}
         except speech_recognition.UnknownValueError:
-            return "抱歉，我听不懂。"
+            reply = {"type": "ERROR", "content": "抱歉，我听不懂"}
         except speech_recognition.RequestError as e:
-            return "抱歉，无法连接到 Google 语音识别服务；{0}".format(e)
-
+            reply = {"type": "ERROR", "content": "抱歉，无法连接到 Google 语音识别服务；{0}".format(e)}
+        finally:
+            return reply
     def textToVoice(self, text):
-        textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
-        self.engine.save_to_file(text, textFile)
-        self.engine.runAndWait()
-        logger.info(
-            '[Google] textToVoice text={} voice file name={}'.format(text, textFile))
-        return textFile
+        try:
+            textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
+            self.engine.save_to_file(text, textFile)
+            self.engine.runAndWait()
+            logger.info(
+                '[Google] textToVoice text={} voice file name={}'.format(text, textFile))
+            reply = {"type": "VOICE", "content": textFile}
+        except Exception as e:
+            reply = {"type": "ERROR", "content": str(e)}
+        finally:
+            return reply
diff --git a/voice/openai/openai_voice.py b/voice/openai/openai_voice.py
index 475aac6..3b77c52 100644
--- a/voice/openai/openai_voice.py
+++ b/voice/openai/openai_voice.py
@@ -16,12 +16,18 @@ class OpenaiVoice(Voice):
     def voiceToText(self, voice_file):
         logger.debug(
             '[Openai] voice file name={}'.format(voice_file))
-        file = open(voice_file, "rb")
-        reply = openai.Audio.transcribe("whisper-1", file)
-        text = reply["text"]
-        logger.info(
-            '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
-        return text
+        reply={}
+        try:
+            file = open(voice_file, "rb")
+            result = openai.Audio.transcribe("whisper-1", file)
+            text = result["text"]
+            reply = {"type": "TEXT", "content": text}
+            logger.info(
+                '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
+        except Exception as e:
+            reply = {"type": "ERROR", "content": str(e)}
+        finally:
+            return reply
 
     def textToVoice(self, text):
         pass