浏览代码

feat(azure_voice): add language detection, support mulitple languages

master
lanvent 2 年前
父节点
当前提交
1e09bd0e76
共有 4 个文件被更改,包括 32 次插入6 次删除
  1. +1
    -0
      requirements-optional.txt
  2. +0
    -3
      translate/baidu/baidu_translate.py
  3. +23
    -3
      voice/azure/azure_voice.py
  4. +8
    -0
      voice/azure/config.json.template

+ 1
- 0
requirements-optional.txt 查看文件

@@ -7,6 +7,7 @@ gTTS>=2.3.1 # google text to speech
pyttsx3>=2.90 # pytsx text to speech pyttsx3>=2.90 # pytsx text to speech
baidu_aip>=4.16.10 # baidu voice baidu_aip>=4.16.10 # baidu voice
# azure-cognitiveservices-speech # azure voice # azure-cognitiveservices-speech # azure voice
langid # language detect


#install plugin #install plugin
dulwich dulwich


+ 0
- 3
translate/baidu/baidu_translate.py 查看文件

@@ -8,8 +8,6 @@ import requests
from config import conf from config import conf
from translate.translator import Translator from translate.translator import Translator


# from langid import classify



class BaiduTranslator(Translator): class BaiduTranslator(Translator):
def __init__(self) -> None: def __init__(self) -> None:
@@ -24,7 +22,6 @@ class BaiduTranslator(Translator):
def translate(self, query: str, from_lang: str = "", to_lang: str = "en") -> str: def translate(self, query: str, from_lang: str = "", to_lang: str = "en") -> str:
if not from_lang: if not from_lang:
from_lang = "auto" # baidu suppport auto detect from_lang = "auto" # baidu suppport auto detect
# from_lang = classify(query)[0]
salt = random.randint(32768, 65536) salt = random.randint(32768, 65536)
sign = self.make_md5(self.appid + query + str(salt) + self.appkey) sign = self.make_md5(self.appid + query + str(salt) + self.appkey)
headers = {"Content-Type": "application/x-www-form-urlencoded"} headers = {"Content-Type": "application/x-www-form-urlencoded"}


+ 23
- 3
voice/azure/azure_voice.py 查看文件

@@ -6,6 +6,7 @@ import os
import time import time


import azure.cognitiveservices.speech as speechsdk import azure.cognitiveservices.speech as speechsdk
from langid import classify


from bridge.reply import Reply, ReplyType from bridge.reply import Reply, ReplyType
from common.log import logger from common.log import logger
@@ -30,7 +31,15 @@ class AzureVoice(Voice):
config = None config = None
if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件 if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
config = { config = {
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", # 识别不出时的默认语音
"auto_detect": True, # 是否自动检测语言
"speech_synthesis_zh": "zh-CN-XiaozhenNeural",
"speech_synthesis_en": "en-US-JacobNeural",
"speech_synthesis_ja": "ja-JP-AoiNeural",
"speech_synthesis_ko": "ko-KR-SoonBokNeural",
"speech_synthesis_de": "de-DE-LouisaNeural",
"speech_synthesis_fr": "fr-FR-BrigitteNeural",
"speech_synthesis_es": "es-ES-LaiaNeural",
"speech_recognition_language": "zh-CN", "speech_recognition_language": "zh-CN",
} }
with open(config_path, "w") as fw: with open(config_path, "w") as fw:
@@ -38,11 +47,12 @@ class AzureVoice(Voice):
else: else:
with open(config_path, "r") as fr: with open(config_path, "r") as fr:
config = json.load(fr) config = json.load(fr)
self.config = config
self.api_key = conf().get("azure_voice_api_key") self.api_key = conf().get("azure_voice_api_key")
self.api_region = conf().get("azure_voice_region") self.api_region = conf().get("azure_voice_region")
self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region) self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
self.speech_config.speech_recognition_language = config["speech_recognition_language"]
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
self.speech_config.speech_recognition_language = self.config["speech_recognition_language"]
except Exception as e: except Exception as e:
logger.warn("AzureVoice init failed: %s, ignore " % e) logger.warn("AzureVoice init failed: %s, ignore " % e)


@@ -59,6 +69,16 @@ class AzureVoice(Voice):
return reply return reply


def textToVoice(self, text): def textToVoice(self, text):
if self.config.get("auto_detect"):
lang = classify(text)[0]
key = "speech_synthesis_" + lang
if key in self.config:
logger.info("[Azure] textToVoice auto detect language={}, voice={}".format(lang, self.config[key]))
self.speech_config.speech_synthesis_voice_name = self.config[key]
else:
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
else:
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav" fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav"
audio_config = speechsdk.AudioConfig(filename=fileName) audio_config = speechsdk.AudioConfig(filename=fileName)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config) speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)


+ 8
- 0
voice/azure/config.json.template 查看文件

@@ -1,4 +1,12 @@
{ {
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
"auto_detect": true,
"speech_synthesis_zh": "zh-CN-YunxiNeural",
"speech_synthesis_en": "en-US-JacobNeural",
"speech_synthesis_ja": "ja-JP-AoiNeural",
"speech_synthesis_ko": "ko-KR-SoonBokNeural",
"speech_synthesis_de": "de-DE-LouisaNeural",
"speech_synthesis_fr": "fr-FR-BrigitteNeural",
"speech_synthesis_es": "es-ES-LaiaNeural",
"speech_recognition_language": "zh-CN" "speech_recognition_language": "zh-CN"
} }

正在加载...
取消
保存