J1147
/
CoW


			
				
					
						
						
							
							
"""
azure voice service
"""
import json
import os
import time
import azure.cognitiveservices.speech as speechsdk
from aip import AipSpeech
from bridge.reply import Reply, ReplyType
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice
from voice.audio_convert import get_pcm_from_wav
from config import conf
"""
Azure voice
主目录设置文件中需填写azure_voice_api_key和azure_voice_region

查看可用的 voice： https://speech.microsoft.com/portal/voicegallery

"""

class AzureVoice(Voice):

    def __init__(self):
        try:
            curdir = os.path.dirname(__file__)
            config_path = os.path.join(curdir, "config.json")
            config = None
            if not os.path.exists(config_path): #如果没有配置文件，创建本地配置文件
                config = { "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_recognition_language": "zh-CN"}
                with open(config_path, "w") as fw:
                    json.dump(config, fw, indent=4)
            else:
                with open(config_path, "r") as fr:
                    config = json.load(fr)
            self.api_key = conf().get('azure_voice_api_key')
            self.api_region = conf().get('azure_voice_region')
            self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
            self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
            self.speech_config.speech_recognition_language = config["speech_recognition_language"]
        except Exception as e:
            logger.warn("AzureVoice init failed: %s, ignore " % e)

    def voiceToText(self, voice_file):
        audio_config = speechsdk.AudioConfig(filename=voice_file)
        speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)
        result = speech_recognizer.recognize_once()
        if result.reason == speechsdk.ResultReason.RecognizedSpeech:
            logger.info('[Azure] voiceToText voice file name={} text={}'.format(voice_file, result.text))
            reply = Reply(ReplyType.TEXT, result.text)
        else:
            logger.error('[Azure] voiceToText error, result={}'.format(result))
            reply = Reply(ReplyType.ERROR, "抱歉，语音识别失败")
        return reply

    def textToVoice(self, text):
        fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.wav'
        audio_config = speechsdk.AudioConfig(filename=fileName)
        speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)
        result = speech_synthesizer.speak_text(text)
        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
            logger.info(
                '[Azure] textToVoice text={} voice file name={}'.format(text, fileName))
            reply = Reply(ReplyType.VOICE, fileName)
        else:
            logger.error('[Azure] textToVoice error, result={}'.format(result))
            reply = Reply(ReplyType.ERROR, "抱歉，语音合成失败")
        return reply