From 1e09bd0e76b5d6cdc52986fb6c05989c81481b43 Mon Sep 17 00:00:00 2001 From: lanvent Date: Sun, 23 Apr 2023 04:22:37 +0800 Subject: [PATCH] feat(azure_voice): add language detection, support mulitple languages --- requirements-optional.txt | 1 + translate/baidu/baidu_translate.py | 3 --- voice/azure/azure_voice.py | 26 +++++++++++++++++++++++--- voice/azure/config.json.template | 8 ++++++++ 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/requirements-optional.txt b/requirements-optional.txt index ba453cb..89d822f 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -7,6 +7,7 @@ gTTS>=2.3.1 # google text to speech pyttsx3>=2.90 # pytsx text to speech baidu_aip>=4.16.10 # baidu voice # azure-cognitiveservices-speech # azure voice +langid # language detect #install plugin dulwich diff --git a/translate/baidu/baidu_translate.py b/translate/baidu/baidu_translate.py index d9bdf5c..bf0a721 100644 --- a/translate/baidu/baidu_translate.py +++ b/translate/baidu/baidu_translate.py @@ -8,8 +8,6 @@ import requests from config import conf from translate.translator import Translator -# from langid import classify - class BaiduTranslator(Translator): def __init__(self) -> None: @@ -24,7 +22,6 @@ class BaiduTranslator(Translator): def translate(self, query: str, from_lang: str = "", to_lang: str = "en") -> str: if not from_lang: from_lang = "auto" # baidu suppport auto detect - # from_lang = classify(query)[0] salt = random.randint(32768, 65536) sign = self.make_md5(self.appid + query + str(salt) + self.appkey) headers = {"Content-Type": "application/x-www-form-urlencoded"} diff --git a/voice/azure/azure_voice.py b/voice/azure/azure_voice.py index f911246..98ccdf1 100644 --- a/voice/azure/azure_voice.py +++ b/voice/azure/azure_voice.py @@ -6,6 +6,7 @@ import os import time import azure.cognitiveservices.speech as speechsdk +from langid import classify from bridge.reply import Reply, ReplyType from common.log import logger @@ -30,7 +31,15 @@ class AzureVoice(Voice): config = None if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件 config = { - "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", + "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", # 识别不出时的默认语音 + "auto_detect": True, # 是否自动检测语言 + "speech_synthesis_zh": "zh-CN-XiaozhenNeural", + "speech_synthesis_en": "en-US-JacobNeural", + "speech_synthesis_ja": "ja-JP-AoiNeural", + "speech_synthesis_ko": "ko-KR-SoonBokNeural", + "speech_synthesis_de": "de-DE-LouisaNeural", + "speech_synthesis_fr": "fr-FR-BrigitteNeural", + "speech_synthesis_es": "es-ES-LaiaNeural", "speech_recognition_language": "zh-CN", } with open(config_path, "w") as fw: @@ -38,11 +47,12 @@ class AzureVoice(Voice): else: with open(config_path, "r") as fr: config = json.load(fr) + self.config = config self.api_key = conf().get("azure_voice_api_key") self.api_region = conf().get("azure_voice_region") self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region) - self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"] - self.speech_config.speech_recognition_language = config["speech_recognition_language"] + self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"] + self.speech_config.speech_recognition_language = self.config["speech_recognition_language"] except Exception as e: logger.warn("AzureVoice init failed: %s, ignore " % e) @@ -59,6 +69,16 @@ class AzureVoice(Voice): return reply def textToVoice(self, text): + if self.config.get("auto_detect"): + lang = classify(text)[0] + key = "speech_synthesis_" + lang + if key in self.config: + logger.info("[Azure] textToVoice auto detect language={}, voice={}".format(lang, self.config[key])) + self.speech_config.speech_synthesis_voice_name = self.config[key] + else: + self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"] + else: + self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"] fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav" audio_config = speechsdk.AudioConfig(filename=fileName) speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config) diff --git a/voice/azure/config.json.template b/voice/azure/config.json.template index 2dc2176..8f3f546 100644 --- a/voice/azure/config.json.template +++ b/voice/azure/config.json.template @@ -1,4 +1,12 @@ { "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", + "auto_detect": true, + "speech_synthesis_zh": "zh-CN-YunxiNeural", + "speech_synthesis_en": "en-US-JacobNeural", + "speech_synthesis_ja": "ja-JP-AoiNeural", + "speech_synthesis_ko": "ko-KR-SoonBokNeural", + "speech_synthesis_de": "de-DE-LouisaNeural", + "speech_synthesis_fr": "fr-FR-BrigitteNeural", + "speech_synthesis_es": "es-ES-LaiaNeural", "speech_recognition_language": "zh-CN" }