Bläddra i källkod

feat(azure_voice): add language detection, support mulitple languages

master
lanvent 1 år sedan
förälder
incheckning
1e09bd0e76
4 ändrade filer med 32 tillägg och 6 borttagningar
  1. +1
    -0
      requirements-optional.txt
  2. +0
    -3
      translate/baidu/baidu_translate.py
  3. +23
    -3
      voice/azure/azure_voice.py
  4. +8
    -0
      voice/azure/config.json.template

+ 1
- 0
requirements-optional.txt Visa fil

@@ -7,6 +7,7 @@ gTTS>=2.3.1 # google text to speech
pyttsx3>=2.90 # pytsx text to speech pyttsx3>=2.90 # pytsx text to speech
baidu_aip>=4.16.10 # baidu voice baidu_aip>=4.16.10 # baidu voice
# azure-cognitiveservices-speech # azure voice # azure-cognitiveservices-speech # azure voice
langid # language detect


#install plugin #install plugin
dulwich dulwich


+ 0
- 3
translate/baidu/baidu_translate.py Visa fil

@@ -8,8 +8,6 @@ import requests
from config import conf from config import conf
from translate.translator import Translator from translate.translator import Translator


# from langid import classify



class BaiduTranslator(Translator): class BaiduTranslator(Translator):
def __init__(self) -> None: def __init__(self) -> None:
@@ -24,7 +22,6 @@ class BaiduTranslator(Translator):
def translate(self, query: str, from_lang: str = "", to_lang: str = "en") -> str: def translate(self, query: str, from_lang: str = "", to_lang: str = "en") -> str:
if not from_lang: if not from_lang:
from_lang = "auto" # baidu suppport auto detect from_lang = "auto" # baidu suppport auto detect
# from_lang = classify(query)[0]
salt = random.randint(32768, 65536) salt = random.randint(32768, 65536)
sign = self.make_md5(self.appid + query + str(salt) + self.appkey) sign = self.make_md5(self.appid + query + str(salt) + self.appkey)
headers = {"Content-Type": "application/x-www-form-urlencoded"} headers = {"Content-Type": "application/x-www-form-urlencoded"}


+ 23
- 3
voice/azure/azure_voice.py Visa fil

@@ -6,6 +6,7 @@ import os
import time import time


import azure.cognitiveservices.speech as speechsdk import azure.cognitiveservices.speech as speechsdk
from langid import classify


from bridge.reply import Reply, ReplyType from bridge.reply import Reply, ReplyType
from common.log import logger from common.log import logger
@@ -30,7 +31,15 @@ class AzureVoice(Voice):
config = None config = None
if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件 if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
config = { config = {
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", # 识别不出时的默认语音
"auto_detect": True, # 是否自动检测语言
"speech_synthesis_zh": "zh-CN-XiaozhenNeural",
"speech_synthesis_en": "en-US-JacobNeural",
"speech_synthesis_ja": "ja-JP-AoiNeural",
"speech_synthesis_ko": "ko-KR-SoonBokNeural",
"speech_synthesis_de": "de-DE-LouisaNeural",
"speech_synthesis_fr": "fr-FR-BrigitteNeural",
"speech_synthesis_es": "es-ES-LaiaNeural",
"speech_recognition_language": "zh-CN", "speech_recognition_language": "zh-CN",
} }
with open(config_path, "w") as fw: with open(config_path, "w") as fw:
@@ -38,11 +47,12 @@ class AzureVoice(Voice):
else: else:
with open(config_path, "r") as fr: with open(config_path, "r") as fr:
config = json.load(fr) config = json.load(fr)
self.config = config
self.api_key = conf().get("azure_voice_api_key") self.api_key = conf().get("azure_voice_api_key")
self.api_region = conf().get("azure_voice_region") self.api_region = conf().get("azure_voice_region")
self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region) self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
self.speech_config.speech_recognition_language = config["speech_recognition_language"]
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
self.speech_config.speech_recognition_language = self.config["speech_recognition_language"]
except Exception as e: except Exception as e:
logger.warn("AzureVoice init failed: %s, ignore " % e) logger.warn("AzureVoice init failed: %s, ignore " % e)


@@ -59,6 +69,16 @@ class AzureVoice(Voice):
return reply return reply


def textToVoice(self, text): def textToVoice(self, text):
if self.config.get("auto_detect"):
lang = classify(text)[0]
key = "speech_synthesis_" + lang
if key in self.config:
logger.info("[Azure] textToVoice auto detect language={}, voice={}".format(lang, self.config[key]))
self.speech_config.speech_synthesis_voice_name = self.config[key]
else:
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
else:
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav" fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav"
audio_config = speechsdk.AudioConfig(filename=fileName) audio_config = speechsdk.AudioConfig(filename=fileName)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config) speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)


+ 8
- 0
voice/azure/config.json.template Visa fil

@@ -1,4 +1,12 @@
{ {
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
"auto_detect": true,
"speech_synthesis_zh": "zh-CN-YunxiNeural",
"speech_synthesis_en": "en-US-JacobNeural",
"speech_synthesis_ja": "ja-JP-AoiNeural",
"speech_synthesis_ko": "ko-KR-SoonBokNeural",
"speech_synthesis_de": "de-DE-LouisaNeural",
"speech_synthesis_fr": "fr-FR-BrigitteNeural",
"speech_synthesis_es": "es-ES-LaiaNeural",
"speech_recognition_language": "zh-CN" "speech_recognition_language": "zh-CN"
} }

Laddar…
Avbryt
Spara