From 8d2e81815c104f2082dad4b695c6ccdbac2d6240 Mon Sep 17 00:00:00 2001 From: lanvent Date: Mon, 13 Mar 2023 00:12:34 +0800 Subject: [PATCH] compatible for voice --- channel/wechat/wechat_channel.py | 7 +++++-- plugins/godcmd/godcmd.py | 12 ++++++----- plugins/hello/hello.py | 35 ++++++++++++++++++-------------- voice/baidu/baidu_voice.py | 5 +++-- voice/google/google_voice.py | 27 +++++++++++++++--------- voice/openai/openai_voice.py | 18 ++++++++++------ 6 files changed, 64 insertions(+), 40 deletions(-) diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py index 73bb59a..0ba923f 100644 --- a/channel/wechat/wechat_channel.py +++ b/channel/wechat/wechat_channel.py @@ -71,6 +71,7 @@ class WechatChannel(Channel): if from_user_id == other_user_id: context = {'isgroup': False, 'msg': msg, 'receiver': other_user_id} context['type'] = 'VOICE' + context['content'] = msg['FileName'] context['session_id'] = other_user_id thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback) @@ -183,11 +184,13 @@ class WechatChannel(Channel): reply = super().build_reply_content(context['content'], context) elif context['type'] == 'VOICE': msg = context['msg'] - file_name = TmpDir().path() + msg['FileName'] + file_name = TmpDir().path() + context['content'] msg.download(file_name) reply = super().build_voice_to_text(file_name) if reply['type'] != 'ERROR' and reply['type'] != 'INFO': - reply = super().build_reply_content(reply['content'], context) + context['content'] = reply['content'] # 语音转文字后,将文字内容作为新的context + context['type'] = reply['type'] + reply = super().build_reply_content(context['content'], context) if reply['type'] == 'TEXT': if conf().get('voice_reply_voice'): reply = super().build_text_to_voice(reply['content']) diff --git a/plugins/godcmd/godcmd.py b/plugins/godcmd/godcmd.py index 300353d..3dd8760 100644 --- a/plugins/godcmd/godcmd.py +++ b/plugins/godcmd/godcmd.py @@ -100,11 +100,15 @@ class Godcmd(Plugin): def on_handle_context(self, e_context: EventContext): - content = e_context['context']['content'] context_type = e_context['context']['type'] - logger.debug("[Godcmd] on_handle_context. content: %s" % content) + if context_type != "TEXT": + if not self.isrunning: + e_context.action = EventAction.BREAK_PASS + return - if content.startswith("#") and context_type == "TEXT": + content = e_context['context']['content'] + logger.debug("[Godcmd] on_handle_context. content: %s" % content) + if content.startswith("#"): # msg = e_context['context']['msg'] user = e_context['context']['receiver'] session_id = e_context['context']['session_id'] @@ -176,8 +180,6 @@ class Godcmd(Plugin): e_context.action = EventAction.BREAK_PASS # 事件结束,并跳过处理context的默认逻辑 elif not self.isrunning: e_context.action = EventAction.BREAK_PASS - else: - e_context.action = EventAction.CONTINUE # 事件继续,交付给下个插件或默认逻辑 def authenticate(self, userid, args, isadmin, isgroup) -> Tuple[bool,str] : if isgroup: diff --git a/plugins/hello/hello.py b/plugins/hello/hello.py index 144906b..ca1d257 100644 --- a/plugins/hello/hello.py +++ b/plugins/hello/hello.py @@ -14,26 +14,31 @@ class Hello(Plugin): def on_handle_context(self, e_context: EventContext): - logger.debug("[Hello] on_handle_context. content: %s" % e_context['context']['content']) - - if e_context['context']['content'] == "Hello": - e_context['reply']['type'] = "TEXT" + if e_context['context']['type'] != "TEXT": + return + + content = e_context['context']['content'] + logger.debug("[Hello] on_handle_context. content: %s" % content) + if content == "Hello": + reply = {} + reply['type'] = "TEXT" msg = e_context['context']['msg'] if e_context['context']['isgroup']: - e_context['reply']['content'] = "Hello, " + msg['ActualNickName'] + " from " + msg['User'].get('NickName', "Group") + reply['content'] = "Hello, " + msg['ActualNickName'] + " from " + msg['User'].get('NickName', "Group") else: - e_context['reply']['content'] = "Hello, " + msg['User'].get('NickName', "My friend") - + reply['content'] = "Hello, " + msg['User'].get('NickName', "My friend") + e_context['reply'] = reply e_context.action = EventAction.BREAK_PASS # 事件结束,并跳过处理context的默认逻辑 - if e_context['context']['content'] == "Hi": - e_context['reply']['type'] = "TEXT" - e_context['reply']['content'] = "Hi" + if content == "Hi": + reply={} + reply['type'] = "TEXT" + reply['content'] = "Hi" + e_context['reply'] = reply e_context.action = EventAction.BREAK # 事件结束,进入默认处理逻辑,一般会覆写reply - if e_context['context']['content'] == "End": + if content == "End": # 如果是文本消息"End",将请求转换成"IMAGE_CREATE",并将content设置为"The World" - if e_context['context']['type'] == "TEXT": - e_context['context']['type'] = "IMAGE_CREATE" - e_context['context']['content'] = "The World" - e_context.action = EventAction.CONTINUE # 事件继续,交付给下个插件或默认逻辑 + e_context['context']['type'] = "IMAGE_CREATE" + content = "The World" + e_context.action = EventAction.CONTINUE # 事件继续,交付给下个插件或默认逻辑 diff --git a/voice/baidu/baidu_voice.py b/voice/baidu/baidu_voice.py index d99db37..adab169 100644 --- a/voice/baidu/baidu_voice.py +++ b/voice/baidu/baidu_voice.py @@ -30,7 +30,8 @@ class BaiduVoice(Voice): with open(fileName, 'wb') as f: f.write(result) logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName)) - return fileName + reply = {"type": "VOICE", "content": fileName} else: logger.error('[Baidu] textToVoice error={}'.format(result)) - return None + reply = {"type": "ERROR", "content": "抱歉,语音合成失败"} + return reply diff --git a/voice/google/google_voice.py b/voice/google/google_voice.py index 8e339f2..6c00892 100644 --- a/voice/google/google_voice.py +++ b/voice/google/google_voice.py @@ -32,20 +32,27 @@ class GoogleVoice(Voice): ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True) with speech_recognition.AudioFile(new_file) as source: audio = self.recognizer.record(source) + reply = {} try: text = self.recognizer.recognize_google(audio, language='zh-CN') logger.info( '[Google] voiceToText text={} voice file name={}'.format(text, voice_file)) - return text + reply = {"type": "TEXT", "content": text} except speech_recognition.UnknownValueError: - return "抱歉,我听不懂。" + reply = {"type": "ERROR", "content": "抱歉,我听不懂"} except speech_recognition.RequestError as e: - return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e) - + reply = {"type": "ERROR", "content": "抱歉,无法连接到 Google 语音识别服务;{0}".format(e)} + finally: + return reply def textToVoice(self, text): - textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' - self.engine.save_to_file(text, textFile) - self.engine.runAndWait() - logger.info( - '[Google] textToVoice text={} voice file name={}'.format(text, textFile)) - return textFile + try: + textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' + self.engine.save_to_file(text, textFile) + self.engine.runAndWait() + logger.info( + '[Google] textToVoice text={} voice file name={}'.format(text, textFile)) + reply = {"type": "VOICE", "content": textFile} + except Exception as e: + reply = {"type": "ERROR", "content": str(e)} + finally: + return reply diff --git a/voice/openai/openai_voice.py b/voice/openai/openai_voice.py index 475aac6..3b77c52 100644 --- a/voice/openai/openai_voice.py +++ b/voice/openai/openai_voice.py @@ -16,12 +16,18 @@ class OpenaiVoice(Voice): def voiceToText(self, voice_file): logger.debug( '[Openai] voice file name={}'.format(voice_file)) - file = open(voice_file, "rb") - reply = openai.Audio.transcribe("whisper-1", file) - text = reply["text"] - logger.info( - '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file)) - return text + reply={} + try: + file = open(voice_file, "rb") + result = openai.Audio.transcribe("whisper-1", file) + text = result["text"] + reply = {"type": "TEXT", "content": text} + logger.info( + '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file)) + except Exception as e: + reply = {"type": "ERROR", "content": str(e)} + finally: + return reply def textToVoice(self, text): pass