diff --git a/config.py b/config.py index 154c633..acfb6a6 100644 --- a/config.py +++ b/config.py @@ -83,7 +83,7 @@ available_setting = { "voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key "always_reply_voice": False, # 是否一直使用语音回复 "voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,azure - "text_to_voice": "openai", # 语音合成引擎,支持openai,baidu,google,pytts(offline),azure,elevenlabs + "text_to_voice": "openai", # 语音合成引擎,支持openai,baidu,google,pytts(offline),azure,elevenlabs,edge(online) "text_to_voice_model": "tts-1", "tts_voice_id": "alloy", # baidu 语音api配置, 使用百度语音识别和语音合成时需要 diff --git a/requirements-optional.txt b/requirements-optional.txt index 74f1780..abb8a4e 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -7,6 +7,7 @@ gTTS>=2.3.1 # google text to speech pyttsx3>=2.90 # pytsx text to speech baidu_aip>=4.16.10 # baidu voice azure-cognitiveservices-speech # azure voice +edge-tts # edge-tts numpy<=1.24.2 langid # language detect diff --git a/voice/edge/edge_voice.py b/voice/edge/edge_voice.py new file mode 100644 index 0000000..7bb8b2e --- /dev/null +++ b/voice/edge/edge_voice.py @@ -0,0 +1,50 @@ +import time + +import edge_tts +import asyncio + +from bridge.reply import Reply, ReplyType +from common.log import logger +from common.tmp_dir import TmpDir +from voice.voice import Voice + + +class EdgeVoice(Voice): + + def __init__(self): + ''' + # 普通话 + zh-CN-XiaoxiaoNeural + zh-CN-XiaoyiNeural + zh-CN-YunjianNeural + zh-CN-YunxiNeural + zh-CN-YunxiaNeural + zh-CN-YunyangNeural + # 地方口音 + zh-CN-liaoning-XiaobeiNeural + zh-CN-shaanxi-XiaoniNeural + # 粤语 + zh-HK-HiuGaaiNeural + zh-HK-HiuMaanNeural + zh-HK-WanLungNeural + # 湾湾腔 + zh-TW-HsiaoChenNeural + zh-TW-HsiaoYuNeural + zh-TW-YunJheNeural + ''' + self.voice = "zh-CN-YunjianNeural" + + def voiceToText(self, voice_file): + pass + + async def gen_voice(self, text, fileName): + communicate = edge_tts.Communicate(text, self.voice) + await communicate.save(fileName) + + def textToVoice(self, text): + fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3" + + asyncio.run(self.gen_voice(text, fileName)) + + logger.info("[EdgeTTS] textToVoice text={} voice file name={}".format(text, fileName)) + return Reply(ReplyType.VOICE, fileName) diff --git a/voice/factory.py b/voice/factory.py index ed80758..bc9c9c3 100644 --- a/voice/factory.py +++ b/voice/factory.py @@ -42,4 +42,8 @@ def create_voice(voice_type): from voice.ali.ali_voice import AliVoice return AliVoice() + elif voice_type == "edge": + from voice.edge.edge_voice import EdgeVoice + + return EdgeVoice() raise RuntimeError