Browse Source

feat(azure_voice): add language detection, support mulitple languages

master
lanvent 1 year ago
parent
commit
1e09bd0e76
4 changed files with 32 additions and 6 deletions
  1. +1
    -0
      requirements-optional.txt
  2. +0
    -3
      translate/baidu/baidu_translate.py
  3. +23
    -3
      voice/azure/azure_voice.py
  4. +8
    -0
      voice/azure/config.json.template

+ 1
- 0
requirements-optional.txt View File

@@ -7,6 +7,7 @@ gTTS>=2.3.1 # google text to speech
pyttsx3>=2.90 # pytsx text to speech pyttsx3>=2.90 # pytsx text to speech
baidu_aip>=4.16.10 # baidu voice baidu_aip>=4.16.10 # baidu voice
# azure-cognitiveservices-speech # azure voice # azure-cognitiveservices-speech # azure voice
langid # language detect


#install plugin #install plugin
dulwich dulwich


+ 0
- 3
translate/baidu/baidu_translate.py View File

@@ -8,8 +8,6 @@ import requests
from config import conf from config import conf
from translate.translator import Translator from translate.translator import Translator


# from langid import classify



class BaiduTranslator(Translator): class BaiduTranslator(Translator):
def __init__(self) -> None: def __init__(self) -> None:
@@ -24,7 +22,6 @@ class BaiduTranslator(Translator):
def translate(self, query: str, from_lang: str = "", to_lang: str = "en") -> str: def translate(self, query: str, from_lang: str = "", to_lang: str = "en") -> str:
if not from_lang: if not from_lang:
from_lang = "auto" # baidu suppport auto detect from_lang = "auto" # baidu suppport auto detect
# from_lang = classify(query)[0]
salt = random.randint(32768, 65536) salt = random.randint(32768, 65536)
sign = self.make_md5(self.appid + query + str(salt) + self.appkey) sign = self.make_md5(self.appid + query + str(salt) + self.appkey)
headers = {"Content-Type": "application/x-www-form-urlencoded"} headers = {"Content-Type": "application/x-www-form-urlencoded"}


+ 23
- 3
voice/azure/azure_voice.py View File

@@ -6,6 +6,7 @@ import os
import time import time


import azure.cognitiveservices.speech as speechsdk import azure.cognitiveservices.speech as speechsdk
from langid import classify


from bridge.reply import Reply, ReplyType from bridge.reply import Reply, ReplyType
from common.log import logger from common.log import logger
@@ -30,7 +31,15 @@ class AzureVoice(Voice):
config = None config = None
if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件 if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
config = { config = {
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", # 识别不出时的默认语音
"auto_detect": True, # 是否自动检测语言
"speech_synthesis_zh": "zh-CN-XiaozhenNeural",
"speech_synthesis_en": "en-US-JacobNeural",
"speech_synthesis_ja": "ja-JP-AoiNeural",
"speech_synthesis_ko": "ko-KR-SoonBokNeural",
"speech_synthesis_de": "de-DE-LouisaNeural",
"speech_synthesis_fr": "fr-FR-BrigitteNeural",
"speech_synthesis_es": "es-ES-LaiaNeural",
"speech_recognition_language": "zh-CN", "speech_recognition_language": "zh-CN",
} }
with open(config_path, "w") as fw: with open(config_path, "w") as fw:
@@ -38,11 +47,12 @@ class AzureVoice(Voice):
else: else:
with open(config_path, "r") as fr: with open(config_path, "r") as fr:
config = json.load(fr) config = json.load(fr)
self.config = config
self.api_key = conf().get("azure_voice_api_key") self.api_key = conf().get("azure_voice_api_key")
self.api_region = conf().get("azure_voice_region") self.api_region = conf().get("azure_voice_region")
self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region) self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
self.speech_config.speech_recognition_language = config["speech_recognition_language"]
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
self.speech_config.speech_recognition_language = self.config["speech_recognition_language"]
except Exception as e: except Exception as e:
logger.warn("AzureVoice init failed: %s, ignore " % e) logger.warn("AzureVoice init failed: %s, ignore " % e)


@@ -59,6 +69,16 @@ class AzureVoice(Voice):
return reply return reply


def textToVoice(self, text): def textToVoice(self, text):
if self.config.get("auto_detect"):
lang = classify(text)[0]
key = "speech_synthesis_" + lang
if key in self.config:
logger.info("[Azure] textToVoice auto detect language={}, voice={}".format(lang, self.config[key]))
self.speech_config.speech_synthesis_voice_name = self.config[key]
else:
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
else:
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav" fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav"
audio_config = speechsdk.AudioConfig(filename=fileName) audio_config = speechsdk.AudioConfig(filename=fileName)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config) speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)


+ 8
- 0
voice/azure/config.json.template View File

@@ -1,4 +1,12 @@
{ {
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
"auto_detect": true,
"speech_synthesis_zh": "zh-CN-YunxiNeural",
"speech_synthesis_en": "en-US-JacobNeural",
"speech_synthesis_ja": "ja-JP-AoiNeural",
"speech_synthesis_ko": "ko-KR-SoonBokNeural",
"speech_synthesis_de": "de-DE-LouisaNeural",
"speech_synthesis_fr": "fr-FR-BrigitteNeural",
"speech_synthesis_es": "es-ES-LaiaNeural",
"speech_recognition_language": "zh-CN" "speech_recognition_language": "zh-CN"
} }

Loading…
Cancel
Save