From e78886fb35759477074a379416bf35fc21dfbc50 Mon Sep 17 00:00:00 2001 From: lanvent Date: Tue, 28 Mar 2023 03:14:26 +0800 Subject: [PATCH] feat: new voice class pytts --- channel/wechat/wechat_channel.py | 64 ++++++++++++-------------------- config.py | 4 +- voice/google/google_voice.py | 15 +------- voice/openai/openai_voice.py | 3 -- voice/pytts/pytts_voice.py | 37 ++++++++++++++++++ voice/voice_factory.py | 3 ++ 6 files changed, 66 insertions(+), 60 deletions(-) create mode 100644 voice/pytts/pytts_voice.py diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py index e013cba..c427407 100644 --- a/channel/wechat/wechat_channel.py +++ b/channel/wechat/wechat_channel.py @@ -68,8 +68,7 @@ class WechatChannel(Channel): itchat.auto_login(enableCmdQR=2, hotReload=hotReload) except Exception as e: if hotReload: - logger.error( - "Hot reload failed, try to login without hot reload") + logger.error("Hot reload failed, try to login without hot reload") itchat.logout() os.remove("itchat.pkl") itchat.auto_login(enableCmdQR=2, hotReload=hotReload) @@ -112,8 +111,7 @@ class WechatChannel(Channel): @time_checker def handle_text(self, msg): - logger.debug("[WX]receive text msg: " + - json.dumps(msg, ensure_ascii=False)) + logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False)) content = msg['Text'] from_user_id = msg['FromUserName'] to_user_id = msg['ToUserName'] # 接收人id @@ -141,8 +139,7 @@ class WechatChannel(Channel): context.kwargs = {'isgroup': False, 'msg': msg, 'receiver': other_user_id, 'session_id': other_user_id} - img_match_prefix = check_prefix( - content, conf().get('image_create_prefix')) + img_match_prefix = check_prefix(content, conf().get('image_create_prefix')) if img_match_prefix: content = content.replace(img_match_prefix, '', 1).strip() context.type = ContextType.IMAGE_CREATE @@ -150,13 +147,11 @@ class WechatChannel(Channel): context.type = ContextType.TEXT context.content = content - thread_pool.submit(self.handle, context).add_done_callback( - thread_pool_callback) + thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback) @time_checker def handle_group(self, msg): - logger.debug("[WX]receive group msg: " + - json.dumps(msg, ensure_ascii=False)) + logger.debug("[WX]receive group msg: " + json.dumps(msg, ensure_ascii=False)) group_name = msg['User'].get('NickName', None) group_id = msg['User'].get('UserName', None) create_time = msg['CreateTime'] # 消息时间 @@ -181,11 +176,9 @@ class WechatChannel(Channel): or check_contain(origin_content, config.get('group_chat_keyword')) if ('ALL_GROUP' in config.get('group_name_white_list') or group_name in config.get('group_name_white_list') or check_contain(group_name, config.get('group_name_keyword_white_list'))) and match_prefix: context = Context() - context.kwargs = {'isgroup': True, - 'msg': msg, 'receiver': group_id} + context.kwargs = { 'isgroup': True, 'msg': msg, 'receiver': group_id} - img_match_prefix = check_prefix( - content, conf().get('image_create_prefix')) + img_match_prefix = check_prefix(content, conf().get('image_create_prefix')) if img_match_prefix: content = content.replace(img_match_prefix, '', 1).strip() context.type = ContextType.IMAGE_CREATE @@ -201,8 +194,7 @@ class WechatChannel(Channel): else: context['session_id'] = msg['ActualUserName'] - thread_pool.submit(self.handle, context).add_done_callback( - thread_pool_callback) + thread_pool.submit(self.handle, context).add_done_callback(thread_pool_callback) def handle_group_voice(self, msg): if conf().get('group_speech_recognition', False) != True: @@ -241,9 +233,8 @@ class WechatChannel(Channel): logger.info('[WX] sendMsg={}, receiver={}'.format(reply, receiver)) elif reply.type == ReplyType.VOICE: itchat.send_file(reply.content, toUserName=receiver) - logger.info('[WX] sendFile={}, receiver={}'.format( - reply.content, receiver)) - elif reply.type == ReplyType.IMAGE_URL: # 从网络下载图片 + logger.info('[WX] sendFile={}, receiver={}'.format(reply.content, receiver)) + elif reply.type == ReplyType.IMAGE_URL: # 从网络下载图片 img_url = reply.content pic_res = requests.get(img_url, stream=True) image_storage = io.BytesIO() @@ -251,9 +242,8 @@ class WechatChannel(Channel): image_storage.write(block) image_storage.seek(0) itchat.send_image(image_storage, toUserName=receiver) - logger.info('[WX] sendImage url={}, receiver={}'.format( - img_url, receiver)) - elif reply.type == ReplyType.IMAGE: # 从文件读取图片 + logger.info('[WX] sendImage url={}, receiver={}'.format(img_url,receiver)) + elif reply.type == ReplyType.IMAGE: # 从文件读取图片 image_storage = reply.content image_storage.seek(0) itchat.send_image(image_storage, toUserName=receiver) @@ -291,20 +281,16 @@ class WechatChannel(Channel): if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO: content = reply.content # 语音转文字后,将文字内容作为新的context context.type = ContextType.TEXT - if (context["isgroup"] == True): + if context["isgroup"]: # 校验关键字 - match_prefix = check_prefix(content, conf().get('group_chat_prefix')) \ - or check_contain(content, conf().get('group_chat_keyword')) - # Wechaty判断is_at为True,返回的内容是过滤掉@之后的内容;而is_at为False,则会返回完整的内容 - if match_prefix is not None: - # 故判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容,用于实现类似自定义+前缀触发生成AI图片的功能 - prefixes = conf().get('group_chat_prefix') - for prefix in prefixes: - if content.startswith(prefix): - content = content.replace(prefix, '', 1).strip() - break + match_prefix = check_prefix(content, conf().get('group_chat_prefix')) + match_contain = check_contain(content, conf().get('group_chat_keyword')) + if match_prefix is not None or match_contain is not None: + # 判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容,用于实现类似自定义+前缀触发生成AI图片的功能 + if match_prefix: + content = content.replace(match_prefix, '', 1).strip() else: - logger.info("[WX]receive voice check prefix: " + 'False') + logger.info("[WX]receive voice, checkprefix didn't match") return img_match_prefix = check_prefix(content, conf().get('image_create_prefix')) @@ -333,9 +319,7 @@ class WechatChannel(Channel): if reply.type == ReplyType.TEXT: reply_text = reply.content if context['isgroup']: - reply_text = '@' + \ - context['msg']['ActualNickName'] + \ - ' ' + reply_text.strip() + reply_text = '@' + context['msg']['ActualNickName'] + ' ' + reply_text.strip() reply_text = conf().get("group_chat_reply_prefix", "")+reply_text else: reply_text = conf().get("single_chat_reply_prefix", "")+reply_text @@ -345,8 +329,7 @@ class WechatChannel(Channel): elif reply.type == ReplyType.IMAGE_URL or reply.type == ReplyType.VOICE or reply.type == ReplyType.IMAGE: pass else: - logger.error( - '[WX] unknown reply type: {}'.format(reply.type)) + logger.error('[WX] unknown reply type: {}'.format(reply.type)) return # reply的发送步骤 @@ -355,8 +338,7 @@ class WechatChannel(Channel): 'channel': self, 'context': context, 'reply': reply})) reply = e_context['reply'] if not e_context.is_pass() and reply and reply.type: - logger.debug('[WX] ready to send reply: {} to {}'.format( - reply, context['receiver'])) + logger.debug('[WX] ready to send reply: {} to {}'.format(reply, context['receiver'])) self.send(reply, context['receiver']) def check_prefix(content, prefix_list): diff --git a/config.py b/config.py index 9b3204f..de2e380 100644 --- a/config.py +++ b/config.py @@ -47,8 +47,8 @@ available_setting = { "speech_recognition": False, # 是否开启语音识别 "group_speech_recognition": False, # 是否开启群组语音识别 "voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key - "voice_to_text": "openai", # 语音识别引擎,支持openai和google - "text_to_voice": "baidu", # 语音合成引擎,支持baidu和google + "voice_to_text": "openai", # 语音识别引擎,支持openai,google + "text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline) # baidu api的配置, 使用百度语音识别和语音合成时需要 "baidu_app_id": "", diff --git a/voice/google/google_voice.py b/voice/google/google_voice.py index 8770b58..901cc99 100644 --- a/voice/google/google_voice.py +++ b/voice/google/google_voice.py @@ -5,7 +5,6 @@ google voice service import time import speech_recognition -import pyttsx3 from gtts import gTTS from bridge.reply import Reply, ReplyType from common.log import logger @@ -15,21 +14,11 @@ from voice.voice import Voice class GoogleVoice(Voice): recognizer = speech_recognition.Recognizer() - engine = pyttsx3.init() def __init__(self): - # 语速 - self.engine.setProperty('rate', 125) - # 音量 - self.engine.setProperty('volume', 1.0) - # 0为男声,1为女声 - voices = self.engine.getProperty('voices') - self.engine.setProperty('voice', voices[1].id) + pass def voiceToText(self, voice_file): - # new_file = voice_file.replace('.mp3', '.wav') - # subprocess.call('ffmpeg -i ' + voice_file + - # ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True) with speech_recognition.AudioFile(voice_file) as source: audio = self.recognizer.record(source) try: @@ -46,8 +35,6 @@ class GoogleVoice(Voice): def textToVoice(self, text): try: mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' - # self.engine.save_to_file(text, textFile) - # self.engine.runAndWait() tts = gTTS(text=text, lang='zh') tts.save(mp3File) logger.info( diff --git a/voice/openai/openai_voice.py b/voice/openai/openai_voice.py index 2e85e10..c98d0c9 100644 --- a/voice/openai/openai_voice.py +++ b/voice/openai/openai_voice.py @@ -28,6 +28,3 @@ class OpenaiVoice(Voice): reply = Reply(ReplyType.ERROR, str(e)) finally: return reply - - def textToVoice(self, text): - pass diff --git a/voice/pytts/pytts_voice.py b/voice/pytts/pytts_voice.py new file mode 100644 index 0000000..8884f39 --- /dev/null +++ b/voice/pytts/pytts_voice.py @@ -0,0 +1,37 @@ + +""" +pytts voice service (offline) +""" + +import time +import pyttsx3 +from bridge.reply import Reply, ReplyType +from common.log import logger +from common.tmp_dir import TmpDir +from voice.voice import Voice + + +class PyttsVoice(Voice): + engine = pyttsx3.init() + + def __init__(self): + # 语速 + self.engine.setProperty('rate', 125) + # 音量 + self.engine.setProperty('volume', 1.0) + for voice in self.engine.getProperty('voices'): + if "Chinese" in voice.name: + self.engine.setProperty('voice', voice.id) + + def textToVoice(self, text): + try: + mp3File = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' + self.engine.save_to_file(text, mp3File) + self.engine.runAndWait() + logger.info( + '[Pytts] textToVoice text={} voice file name={}'.format(text, mp3File)) + reply = Reply(ReplyType.VOICE, mp3File) + except Exception as e: + reply = Reply(ReplyType.ERROR, str(e)) + finally: + return reply diff --git a/voice/voice_factory.py b/voice/voice_factory.py index 053840e..591e346 100644 --- a/voice/voice_factory.py +++ b/voice/voice_factory.py @@ -17,4 +17,7 @@ def create_voice(voice_type): elif voice_type == 'openai': from voice.openai.openai_voice import OpenaiVoice return OpenaiVoice() + elif voice_type == 'pytts': + from voice.pytts.pytts_voice import PyttsVoice + return PyttsVoice() raise RuntimeError