Przeglądaj źródła

feat(azure_voice): add language detection, support mulitple languages

master
lanvent 1 rok temu
rodzic
commit
1e09bd0e76
4 zmienionych plików z 32 dodań i 6 usunięć
  1. +1
    -0
      requirements-optional.txt
  2. +0
    -3
      translate/baidu/baidu_translate.py
  3. +23
    -3
      voice/azure/azure_voice.py
  4. +8
    -0
      voice/azure/config.json.template

+ 1
- 0
requirements-optional.txt Wyświetl plik

@@ -7,6 +7,7 @@ gTTS>=2.3.1 # google text to speech
pyttsx3>=2.90 # pytsx text to speech
baidu_aip>=4.16.10 # baidu voice
# azure-cognitiveservices-speech # azure voice
langid # language detect

#install plugin
dulwich


+ 0
- 3
translate/baidu/baidu_translate.py Wyświetl plik

@@ -8,8 +8,6 @@ import requests
from config import conf
from translate.translator import Translator

# from langid import classify


class BaiduTranslator(Translator):
def __init__(self) -> None:
@@ -24,7 +22,6 @@ class BaiduTranslator(Translator):
def translate(self, query: str, from_lang: str = "", to_lang: str = "en") -> str:
if not from_lang:
from_lang = "auto" # baidu suppport auto detect
# from_lang = classify(query)[0]
salt = random.randint(32768, 65536)
sign = self.make_md5(self.appid + query + str(salt) + self.appkey)
headers = {"Content-Type": "application/x-www-form-urlencoded"}


+ 23
- 3
voice/azure/azure_voice.py Wyświetl plik

@@ -6,6 +6,7 @@ import os
import time

import azure.cognitiveservices.speech as speechsdk
from langid import classify

from bridge.reply import Reply, ReplyType
from common.log import logger
@@ -30,7 +31,15 @@ class AzureVoice(Voice):
config = None
if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
config = {
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", # 识别不出时的默认语音
"auto_detect": True, # 是否自动检测语言
"speech_synthesis_zh": "zh-CN-XiaozhenNeural",
"speech_synthesis_en": "en-US-JacobNeural",
"speech_synthesis_ja": "ja-JP-AoiNeural",
"speech_synthesis_ko": "ko-KR-SoonBokNeural",
"speech_synthesis_de": "de-DE-LouisaNeural",
"speech_synthesis_fr": "fr-FR-BrigitteNeural",
"speech_synthesis_es": "es-ES-LaiaNeural",
"speech_recognition_language": "zh-CN",
}
with open(config_path, "w") as fw:
@@ -38,11 +47,12 @@ class AzureVoice(Voice):
else:
with open(config_path, "r") as fr:
config = json.load(fr)
self.config = config
self.api_key = conf().get("azure_voice_api_key")
self.api_region = conf().get("azure_voice_region")
self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
self.speech_config.speech_recognition_language = config["speech_recognition_language"]
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
self.speech_config.speech_recognition_language = self.config["speech_recognition_language"]
except Exception as e:
logger.warn("AzureVoice init failed: %s, ignore " % e)

@@ -59,6 +69,16 @@ class AzureVoice(Voice):
return reply

def textToVoice(self, text):
if self.config.get("auto_detect"):
lang = classify(text)[0]
key = "speech_synthesis_" + lang
if key in self.config:
logger.info("[Azure] textToVoice auto detect language={}, voice={}".format(lang, self.config[key]))
self.speech_config.speech_synthesis_voice_name = self.config[key]
else:
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
else:
self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav"
audio_config = speechsdk.AudioConfig(filename=fileName)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)


+ 8
- 0
voice/azure/config.json.template Wyświetl plik

@@ -1,4 +1,12 @@
{
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
"auto_detect": true,
"speech_synthesis_zh": "zh-CN-YunxiNeural",
"speech_synthesis_en": "en-US-JacobNeural",
"speech_synthesis_ja": "ja-JP-AoiNeural",
"speech_synthesis_ko": "ko-KR-SoonBokNeural",
"speech_synthesis_de": "de-DE-LouisaNeural",
"speech_synthesis_fr": "fr-FR-BrigitteNeural",
"speech_synthesis_es": "es-ES-LaiaNeural",
"speech_recognition_language": "zh-CN"
}

Ładowanie…
Anuluj
Zapisz