@@ -83,7 +83,7 @@ available_setting = { | |||||
"voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key | "voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key | ||||
"always_reply_voice": False, # 是否一直使用语音回复 | "always_reply_voice": False, # 是否一直使用语音回复 | ||||
"voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,azure | "voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,azure | ||||
"text_to_voice": "openai", # 语音合成引擎,支持openai,baidu,google,pytts(offline),azure,elevenlabs | |||||
"text_to_voice": "openai", # 语音合成引擎,支持openai,baidu,google,pytts(offline),azure,elevenlabs,edge(online) | |||||
"text_to_voice_model": "tts-1", | "text_to_voice_model": "tts-1", | ||||
"tts_voice_id": "alloy", | "tts_voice_id": "alloy", | ||||
# baidu 语音api配置, 使用百度语音识别和语音合成时需要 | # baidu 语音api配置, 使用百度语音识别和语音合成时需要 | ||||
@@ -7,6 +7,7 @@ gTTS>=2.3.1 # google text to speech | |||||
pyttsx3>=2.90 # pytsx text to speech | pyttsx3>=2.90 # pytsx text to speech | ||||
baidu_aip>=4.16.10 # baidu voice | baidu_aip>=4.16.10 # baidu voice | ||||
azure-cognitiveservices-speech # azure voice | azure-cognitiveservices-speech # azure voice | ||||
edge-tts # edge-tts | |||||
numpy<=1.24.2 | numpy<=1.24.2 | ||||
langid # language detect | langid # language detect | ||||
@@ -0,0 +1,50 @@ | |||||
import time | |||||
import edge_tts | |||||
import asyncio | |||||
from bridge.reply import Reply, ReplyType | |||||
from common.log import logger | |||||
from common.tmp_dir import TmpDir | |||||
from voice.voice import Voice | |||||
class EdgeVoice(Voice): | |||||
def __init__(self): | |||||
''' | |||||
# 普通话 | |||||
zh-CN-XiaoxiaoNeural | |||||
zh-CN-XiaoyiNeural | |||||
zh-CN-YunjianNeural | |||||
zh-CN-YunxiNeural | |||||
zh-CN-YunxiaNeural | |||||
zh-CN-YunyangNeural | |||||
# 地方口音 | |||||
zh-CN-liaoning-XiaobeiNeural | |||||
zh-CN-shaanxi-XiaoniNeural | |||||
# 粤语 | |||||
zh-HK-HiuGaaiNeural | |||||
zh-HK-HiuMaanNeural | |||||
zh-HK-WanLungNeural | |||||
# 湾湾腔 | |||||
zh-TW-HsiaoChenNeural | |||||
zh-TW-HsiaoYuNeural | |||||
zh-TW-YunJheNeural | |||||
''' | |||||
self.voice = "zh-CN-YunjianNeural" | |||||
def voiceToText(self, voice_file): | |||||
pass | |||||
async def gen_voice(self, text, fileName): | |||||
communicate = edge_tts.Communicate(text, self.voice) | |||||
await communicate.save(fileName) | |||||
def textToVoice(self, text): | |||||
fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3" | |||||
asyncio.run(self.gen_voice(text, fileName)) | |||||
logger.info("[EdgeTTS] textToVoice text={} voice file name={}".format(text, fileName)) | |||||
return Reply(ReplyType.VOICE, fileName) |
@@ -42,4 +42,8 @@ def create_voice(voice_type): | |||||
from voice.ali.ali_voice import AliVoice | from voice.ali.ali_voice import AliVoice | ||||
return AliVoice() | return AliVoice() | ||||
elif voice_type == "edge": | |||||
from voice.edge.edge_voice import EdgeVoice | |||||
return EdgeVoice() | |||||
raise RuntimeError | raise RuntimeError |