""" azure voice service """ import json import os import time import azure.cognitiveservices.speech as speechsdk from bridge.reply import Reply, ReplyType from common.log import logger from common.tmp_dir import TmpDir from config import conf from voice.voice import Voice """ Azure voice 主目录设置文件中需填写azure_voice_api_key和azure_voice_region 查看可用的 voice: https://speech.microsoft.com/portal/voicegallery """ class AzureVoice(Voice): def __init__(self): try: curdir = os.path.dirname(__file__) config_path = os.path.join(curdir, "config.json") config = None if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件 config = { "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_recognition_language": "zh-CN", } with open(config_path, "w") as fw: json.dump(config, fw, indent=4) else: with open(config_path, "r") as fr: config = json.load(fr) self.api_key = conf().get("azure_voice_api_key") self.api_region = conf().get("azure_voice_region") self.speech_config = speechsdk.SpeechConfig( subscription=self.api_key, region=self.api_region ) self.speech_config.speech_synthesis_voice_name = config[ "speech_synthesis_voice_name" ] self.speech_config.speech_recognition_language = config[ "speech_recognition_language" ] except Exception as e: logger.warn("AzureVoice init failed: %s, ignore " % e) def voiceToText(self, voice_file): audio_config = speechsdk.AudioConfig(filename=voice_file) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=self.speech_config, audio_config=audio_config ) result = speech_recognizer.recognize_once() if result.reason == speechsdk.ResultReason.RecognizedSpeech: logger.info( "[Azure] voiceToText voice file name={} text={}".format( voice_file, result.text ) ) reply = Reply(ReplyType.TEXT, result.text) else: logger.error( "[Azure] voiceToText error, result={}, canceldetails={}".format( result, result.cancellation_details ) ) reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败") return reply def textToVoice(self, text): fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav" audio_config = speechsdk.AudioConfig(filename=fileName) speech_synthesizer = speechsdk.SpeechSynthesizer( speech_config=self.speech_config, audio_config=audio_config ) result = speech_synthesizer.speak_text(text) if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: logger.info( "[Azure] textToVoice text={} voice file name={}".format(text, fileName) ) reply = Reply(ReplyType.VOICE, fileName) else: logger.error( "[Azure] textToVoice error, result={}, canceldetails={}".format( result, result.cancellation_details ) ) reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败") return reply