From cc19017c01c7e8b7be7eb6363d64e7865826f6b8 Mon Sep 17 00:00:00 2001 From: wanggang Date: Tue, 7 Mar 2023 23:28:57 +0800 Subject: [PATCH] [voice] add text to voice --- bridge/bridge.py | 3 ++ channel/channel.py | 5 +++- channel/wechat/wechat_channel.py | 47 +++++++++++++++++++++++--------- voice/google/google_voice.py | 32 +++++++++++++++++++--- voice/voice.py | 6 ++++ 5 files changed, 75 insertions(+), 18 deletions(-) diff --git a/bridge/bridge.py b/bridge/bridge.py index 78d950a..9d00bfe 100644 --- a/bridge/bridge.py +++ b/bridge/bridge.py @@ -11,3 +11,6 @@ class Bridge(object): def fetch_voice_to_text(self, voiceFile): return voice_factory.create_voice("google").voiceToText(voiceFile) + + def fetch_text_to_voice(self, text): + return voice_factory.create_voice("google").textToVoice(text) \ No newline at end of file diff --git a/channel/channel.py b/channel/channel.py index d4c0fc5..a1395c4 100644 --- a/channel/channel.py +++ b/channel/channel.py @@ -30,5 +30,8 @@ class Channel(object): def build_reply_content(self, query, context=None): return Bridge().fetch_reply_content(query, context) - def build_void_text(self, voice_file): + def build_voice_to_text(self, voice_file): return Bridge().fetch_voice_to_text(voice_file) + + def build_text_to_voice(self, text): + return Bridge().fetch_text_to_voice(text) diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py index b773010..b3d3658 100644 --- a/channel/wechat/wechat_channel.py +++ b/channel/wechat/wechat_channel.py @@ -40,6 +40,7 @@ class WechatChannel(Channel): tmpFilePath = './tmp/' def __init__(self): + voices = self.engine.getProperty('voices') isExists = os.path.exists(self.tmpFilePath) if not isExists: os.makedirs(self.tmpFilePath) @@ -55,17 +56,20 @@ class WechatChannel(Channel): if conf().get('speech_recognition') != True : return logger.debug("[WX]receive voice msg: ", msg['FileName']) - fileName = msg['FileName'] - msg.download(self.tmpFilePath+fileName) - content = super().build_void_text(self.tmpFilePath+fileName) - self._handle_single_msg(msg, content) + thread_pool.submit(self._do_handle_voice, msg) + + def _do_handle_voice(self, msg): + fileName = self.tmpFilePath+msg['FileName'] + msg.download(fileName) + content = super().build_voice_to_text(fileName) + self._handle_single_msg(msg, content, True) def handle_text(self, msg): logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False)) content = msg['Text'] - self._handle_single_msg(msg, content) + self._handle_single_msg(msg, content, False) - def _handle_single_msg(self, msg, content): + def _handle_single_msg(self, msg, content, is_voice): from_user_id = msg['FromUserName'] to_user_id = msg['ToUserName'] # 接收人id other_user_id = msg['User']['UserName'] # 对手方id @@ -84,9 +88,10 @@ class WechatChannel(Channel): if img_match_prefix: content = content.split(img_match_prefix, 1)[1].strip() thread_pool.submit(self._do_send_img, content, from_user_id) - else: - thread_pool.submit(self._do_send, content, from_user_id) - + elif is_voice: + thread_pool.submit(self._do_send_voice, content, from_user_id) + else : + thread_pool.submit(self._do_send_text, content, from_user_id) elif to_user_id == other_user_id and match_prefix: # 自己给好友发送消息 str_list = content.split(match_prefix, 1) @@ -96,8 +101,10 @@ class WechatChannel(Channel): if img_match_prefix: content = content.split(img_match_prefix, 1)[1].strip() thread_pool.submit(self._do_send_img, content, to_user_id) + elif is_voice: + thread_pool.submit(self._do_send_voice, content, to_user_id) else: - thread_pool.submit(self._do_send, content, to_user_id) + thread_pool.submit(self._do_send_text, content, to_user_id) def handle_group(self, msg): @@ -129,10 +136,24 @@ class WechatChannel(Channel): thread_pool.submit(self._do_send_group, content, msg) def send(self, msg, receiver): - logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver)) itchat.send(msg, toUserName=receiver) + logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver)) - def _do_send(self, query, reply_user_id): + def _do_send_voice(self, query, reply_user_id): + try: + if not query: + return + context = dict() + context['from_user_id'] = reply_user_id + reply_text = super().build_reply_content(query, context) + if reply_text: + replyFile = super().build_text_to_voice(reply_text) + itchat.send_file(replyFile, toUserName=reply_user_id) + logger.info('[WX] sendFile={}, receiver={}'.format(replyFile, reply_user_id)) + except Exception as e: + logger.exception(e) + + def _do_send_text(self, query, reply_user_id): try: if not query: return @@ -162,8 +183,8 @@ class WechatChannel(Channel): image_storage.seek(0) # 图片发送 - logger.info('[WX] sendImage, receiver={}'.format(reply_user_id)) itchat.send_image(image_storage, reply_user_id) + logger.info('[WX] sendImage, receiver={}'.format(reply_user_id)) except Exception as e: logger.exception(e) diff --git a/voice/google/google_voice.py b/voice/google/google_voice.py index 97597b3..58955f4 100644 --- a/voice/google/google_voice.py +++ b/voice/google/google_voice.py @@ -4,23 +4,47 @@ google voice service """ import subprocess -import speech_recognition +import time +import speech_recognition +import pyttsx3 +from common.log import logger from voice.voice import Voice + class GoogleVoice(Voice): + tmpFilePath = './tmp/' recognizer = speech_recognition.Recognizer() + engine = pyttsx3.init() def __init__(self): - pass + # 语速 + self.engine.setProperty('rate', 125) + # 音量 + self.engine.setProperty('volume', 1.0) + # 0为男声,1为女声 + voices = self.engine.getProperty('voices') + self.engine.setProperty('voice', voices[1].id) def voiceToText(self, voice_file): new_file = voice_file.replace('.mp3', '.wav') - subprocess.call('ffmpeg -i ' + voice_file + ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True) + subprocess.call('ffmpeg -i ' + voice_file + + ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True) with speech_recognition.AudioFile(new_file) as source: audio = self.recognizer.record(source) try: - return self.recognizer.recognize_google(audio, language='zh-CN') + text = self.recognizer.recognize_google(audio, language='zh-CN') + logger.info( + '[Google] voiceToText text={} voice file name={}'.format(text, voice_file)) + return text except speech_recognition.UnknownValueError: return "抱歉,我听不懂。" except speech_recognition.RequestError as e: return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e) + + def textToVoice(self, text): + textFile = self.tmpFilePath + '语音回复_' + str(int(time.time())) + '.mp3' + self.engine.save_to_file(text, textFile) + self.engine.runAndWait() + logger.info( + '[Google] textToVoice text={} voice file name={}'.format(text, textFile)) + return textFile diff --git a/voice/voice.py b/voice/voice.py index 2f66dab..52d8aaa 100644 --- a/voice/voice.py +++ b/voice/voice.py @@ -8,3 +8,9 @@ class Voice(object): Send voice to voice service and get text """ raise NotImplementedError + + def textToVoice(self, text): + """ + Send text to voice service and get voice + """ + raise NotImplementedError \ No newline at end of file