From 1545a9f262d99ac06792fcd7c21f7548defae671 Mon Sep 17 00:00:00 2001 From: lanvent Date: Sat, 1 Apr 2023 16:36:27 +0800 Subject: [PATCH] feat: support azure voice --- config.py | 10 +++-- voice/azure/azure_voice.py | 70 ++++++++++++++++++++++++++++++++ voice/azure/config.json.template | 4 ++ voice/voice_factory.py | 3 ++ 4 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 voice/azure/azure_voice.py create mode 100644 voice/azure/config.json.template diff --git a/config.py b/config.py index 008f447..7a734b3 100644 --- a/config.py +++ b/config.py @@ -47,16 +47,20 @@ available_setting = { "speech_recognition": False, # 是否开启语音识别 "group_speech_recognition": False, # 是否开启群组语音识别 "voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key - "voice_to_text": "openai", # 语音识别引擎,支持openai,google - "text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline) + "voice_to_text": "openai", # 语音识别引擎,支持openai,google,azure + "text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline),azure - # baidu api的配置, 使用百度语音识别和语音合成时需要 + # baidu 语音api配置, 使用百度语音识别和语音合成时需要 "baidu_app_id": "", "baidu_api_key": "", "baidu_secret_key": "", # 1536普通话(支持简单的英文识别) 1737英语 1637粤语 1837四川话 1936普通话远场 "baidu_dev_pid": "1536", + # azure 语音api配置, 使用azure语音识别和语音合成时需要 + "azure_voice_api_key": "", + "azure_voice_region": "japaneast", + # 服务时间限制,目前支持itchat "chat_time_module": False, # 是否开启服务时间限制 "chat_start_time": "00:00", # 服务开始时间 diff --git a/voice/azure/azure_voice.py b/voice/azure/azure_voice.py new file mode 100644 index 0000000..f9baf2b --- /dev/null +++ b/voice/azure/azure_voice.py @@ -0,0 +1,70 @@ + +""" +azure voice service +""" +import json +import os +import time +import azure.cognitiveservices.speech as speechsdk +from aip import AipSpeech +from bridge.reply import Reply, ReplyType +from common.log import logger +from common.tmp_dir import TmpDir +from voice.voice import Voice +from voice.audio_convert import get_pcm_from_wav +from config import conf +""" +Azure voice +主目录设置文件中需填写azure_voice_api_key和azure_voice_region + +查看可用的 voice: https://speech.microsoft.com/portal/voicegallery + +""" + +class AzureVoice(Voice): + + def __init__(self): + try: + curdir = os.path.dirname(__file__) + config_path = os.path.join(curdir, "config.json") + config = None + if not os.path.exists(config_path): #如果没有配置文件,创建本地配置文件 + config = { "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_recognition_language": "zh-CN"} + with open(config_path, "w") as fw: + json.dump(config, fw, indent=4) + else: + with open(config_path, "r") as fr: + config = json.load(fr) + self.api_key = conf().get('azure_voice_api_key') + self.api_region = conf().get('azure_voice_region') + self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region) + self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"] + self.speech_config.speech_recognition_language = config["speech_recognition_language"] + except Exception as e: + logger.warn("AzureVoice init failed: %s, ignore " % e) + + def voiceToText(self, voice_file): + audio_config = speechsdk.AudioConfig(filename=voice_file) + speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config) + result = speech_recognizer.recognize_once() + if result.reason == speechsdk.ResultReason.RecognizedSpeech: + logger.info('[Azure] voiceToText voice file name={} text={}'.format(voice_file, result.text)) + reply = Reply(ReplyType.TEXT, result.text) + else: + logger.error('[Azure] voiceToText error, result={}'.format(result)) + reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败") + return reply + + def textToVoice(self, text): + fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' + audio_config = speechsdk.AudioConfig(filename=fileName) + speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config) + result = speech_synthesizer.speak_text(text) + if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: + logger.info( + '[Azure] textToVoice text={} voice file name={}'.format(text, fileName)) + reply = Reply(ReplyType.VOICE, fileName) + else: + logger.error('[Azure] textToVoice error, result={}'.format(result)) + reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败") + return reply diff --git a/voice/azure/config.json.template b/voice/azure/config.json.template new file mode 100644 index 0000000..13b1fbd --- /dev/null +++ b/voice/azure/config.json.template @@ -0,0 +1,4 @@ +{ + "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", + "speech_recognition_language": "zh-CN" +} \ No newline at end of file diff --git a/voice/voice_factory.py b/voice/voice_factory.py index 591e346..de4b3d9 100644 --- a/voice/voice_factory.py +++ b/voice/voice_factory.py @@ -20,4 +20,7 @@ def create_voice(voice_type): elif voice_type == 'pytts': from voice.pytts.pytts_voice import PyttsVoice return PyttsVoice() + elif voice_type == 'azure': + from voice.azure.azure_voice import AzureVoice + return AzureVoice() raise RuntimeError