|
|
@@ -6,6 +6,7 @@ import os |
|
|
|
import time |
|
|
|
|
|
|
|
import azure.cognitiveservices.speech as speechsdk |
|
|
|
from langid import classify |
|
|
|
|
|
|
|
from bridge.reply import Reply, ReplyType |
|
|
|
from common.log import logger |
|
|
@@ -30,7 +31,15 @@ class AzureVoice(Voice): |
|
|
|
config = None |
|
|
|
if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件 |
|
|
|
config = { |
|
|
|
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", |
|
|
|
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", # 识别不出时的默认语音 |
|
|
|
"auto_detect": True, # 是否自动检测语言 |
|
|
|
"speech_synthesis_zh": "zh-CN-XiaozhenNeural", |
|
|
|
"speech_synthesis_en": "en-US-JacobNeural", |
|
|
|
"speech_synthesis_ja": "ja-JP-AoiNeural", |
|
|
|
"speech_synthesis_ko": "ko-KR-SoonBokNeural", |
|
|
|
"speech_synthesis_de": "de-DE-LouisaNeural", |
|
|
|
"speech_synthesis_fr": "fr-FR-BrigitteNeural", |
|
|
|
"speech_synthesis_es": "es-ES-LaiaNeural", |
|
|
|
"speech_recognition_language": "zh-CN", |
|
|
|
} |
|
|
|
with open(config_path, "w") as fw: |
|
|
@@ -38,11 +47,12 @@ class AzureVoice(Voice): |
|
|
|
else: |
|
|
|
with open(config_path, "r") as fr: |
|
|
|
config = json.load(fr) |
|
|
|
self.config = config |
|
|
|
self.api_key = conf().get("azure_voice_api_key") |
|
|
|
self.api_region = conf().get("azure_voice_region") |
|
|
|
self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region) |
|
|
|
self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"] |
|
|
|
self.speech_config.speech_recognition_language = config["speech_recognition_language"] |
|
|
|
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"] |
|
|
|
self.speech_config.speech_recognition_language = self.config["speech_recognition_language"] |
|
|
|
except Exception as e: |
|
|
|
logger.warn("AzureVoice init failed: %s, ignore " % e) |
|
|
|
|
|
|
@@ -59,6 +69,16 @@ class AzureVoice(Voice): |
|
|
|
return reply |
|
|
|
|
|
|
|
def textToVoice(self, text): |
|
|
|
if self.config.get("auto_detect"): |
|
|
|
lang = classify(text)[0] |
|
|
|
key = "speech_synthesis_" + lang |
|
|
|
if key in self.config: |
|
|
|
logger.info("[Azure] textToVoice auto detect language={}, voice={}".format(lang, self.config[key])) |
|
|
|
self.speech_config.speech_synthesis_voice_name = self.config[key] |
|
|
|
else: |
|
|
|
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"] |
|
|
|
else: |
|
|
|
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"] |
|
|
|
fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav" |
|
|
|
audio_config = speechsdk.AudioConfig(filename=fileName) |
|
|
|
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config) |
|
|
|