瀏覽代碼

feat: support azure voice

master
lanvent 1 年之前
父節點
當前提交
1545a9f262
共有 4 個檔案被更改,包括 84 行新增3 行删除
  1. +7
    -3
      config.py
  2. +70
    -0
      voice/azure/azure_voice.py
  3. +4
    -0
      voice/azure/config.json.template
  4. +3
    -0
      voice/voice_factory.py

+ 7
- 3
config.py 查看文件

@@ -47,16 +47,20 @@ available_setting = {
"speech_recognition": False, # 是否开启语音识别
"group_speech_recognition": False, # 是否开启群组语音识别
"voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key
"voice_to_text": "openai", # 语音识别引擎,支持openai,google
"text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline)
"voice_to_text": "openai", # 语音识别引擎,支持openai,google,azure
"text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline),azure

# baidu api配置, 使用百度语音识别和语音合成时需要
# baidu 语音api配置, 使用百度语音识别和语音合成时需要
"baidu_app_id": "",
"baidu_api_key": "",
"baidu_secret_key": "",
# 1536普通话(支持简单的英文识别) 1737英语 1637粤语 1837四川话 1936普通话远场
"baidu_dev_pid": "1536",

# azure 语音api配置, 使用azure语音识别和语音合成时需要
"azure_voice_api_key": "",
"azure_voice_region": "japaneast",

# 服务时间限制,目前支持itchat
"chat_time_module": False, # 是否开启服务时间限制
"chat_start_time": "00:00", # 服务开始时间


+ 70
- 0
voice/azure/azure_voice.py 查看文件

@@ -0,0 +1,70 @@

"""
azure voice service
"""
import json
import os
import time
import azure.cognitiveservices.speech as speechsdk
from aip import AipSpeech
from bridge.reply import Reply, ReplyType
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice
from voice.audio_convert import get_pcm_from_wav
from config import conf
"""
Azure voice
主目录设置文件中需填写azure_voice_api_key和azure_voice_region

查看可用的 voice: https://speech.microsoft.com/portal/voicegallery

"""

class AzureVoice(Voice):

def __init__(self):
try:
curdir = os.path.dirname(__file__)
config_path = os.path.join(curdir, "config.json")
config = None
if not os.path.exists(config_path): #如果没有配置文件,创建本地配置文件
config = { "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_recognition_language": "zh-CN"}
with open(config_path, "w") as fw:
json.dump(config, fw, indent=4)
else:
with open(config_path, "r") as fr:
config = json.load(fr)
self.api_key = conf().get('azure_voice_api_key')
self.api_region = conf().get('azure_voice_region')
self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
self.speech_config.speech_recognition_language = config["speech_recognition_language"]
except Exception as e:
logger.warn("AzureVoice init failed: %s, ignore " % e)

def voiceToText(self, voice_file):
audio_config = speechsdk.AudioConfig(filename=voice_file)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)
result = speech_recognizer.recognize_once()
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
logger.info('[Azure] voiceToText voice file name={} text={}'.format(voice_file, result.text))
reply = Reply(ReplyType.TEXT, result.text)
else:
logger.error('[Azure] voiceToText error, result={}'.format(result))
reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
return reply

def textToVoice(self, text):
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
audio_config = speechsdk.AudioConfig(filename=fileName)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)
result = speech_synthesizer.speak_text(text)
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
logger.info(
'[Azure] textToVoice text={} voice file name={}'.format(text, fileName))
reply = Reply(ReplyType.VOICE, fileName)
else:
logger.error('[Azure] textToVoice error, result={}'.format(result))
reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
return reply

+ 4
- 0
voice/azure/config.json.template 查看文件

@@ -0,0 +1,4 @@
{
"speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
"speech_recognition_language": "zh-CN"
}

+ 3
- 0
voice/voice_factory.py 查看文件

@@ -20,4 +20,7 @@ def create_voice(voice_type):
elif voice_type == 'pytts':
from voice.pytts.pytts_voice import PyttsVoice
return PyttsVoice()
elif voice_type == 'azure':
from voice.azure.azure_voice import AzureVoice
return AzureVoice()
raise RuntimeError

Loading…
取消
儲存