|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- """
- azure voice service
- """
- import json
- import os
- import time
-
- import azure.cognitiveservices.speech as speechsdk
-
- from bridge.reply import Reply, ReplyType
- from common.log import logger
- from common.tmp_dir import TmpDir
- from config import conf
- from voice.voice import Voice
-
- """
- Azure voice
- 主目录设置文件中需填写azure_voice_api_key和azure_voice_region
-
- 查看可用的 voice: https://speech.microsoft.com/portal/voicegallery
-
- """
-
-
- class AzureVoice(Voice):
- def __init__(self):
- try:
- curdir = os.path.dirname(__file__)
- config_path = os.path.join(curdir, "config.json")
- config = None
- if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
- config = {
- "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
- "speech_recognition_language": "zh-CN",
- }
- with open(config_path, "w") as fw:
- json.dump(config, fw, indent=4)
- else:
- with open(config_path, "r") as fr:
- config = json.load(fr)
- self.api_key = conf().get("azure_voice_api_key")
- self.api_region = conf().get("azure_voice_region")
- self.speech_config = speechsdk.SpeechConfig(
- subscription=self.api_key, region=self.api_region
- )
- self.speech_config.speech_synthesis_voice_name = config[
- "speech_synthesis_voice_name"
- ]
- self.speech_config.speech_recognition_language = config[
- "speech_recognition_language"
- ]
- except Exception as e:
- logger.warn("AzureVoice init failed: %s, ignore " % e)
-
- def voiceToText(self, voice_file):
- audio_config = speechsdk.AudioConfig(filename=voice_file)
- speech_recognizer = speechsdk.SpeechRecognizer(
- speech_config=self.speech_config, audio_config=audio_config
- )
- result = speech_recognizer.recognize_once()
- if result.reason == speechsdk.ResultReason.RecognizedSpeech:
- logger.info(
- "[Azure] voiceToText voice file name={} text={}".format(
- voice_file, result.text
- )
- )
- reply = Reply(ReplyType.TEXT, result.text)
- else:
- logger.error(
- "[Azure] voiceToText error, result={}, canceldetails={}".format(
- result, result.cancellation_details
- )
- )
- reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
- return reply
-
- def textToVoice(self, text):
- fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav"
- audio_config = speechsdk.AudioConfig(filename=fileName)
- speech_synthesizer = speechsdk.SpeechSynthesizer(
- speech_config=self.speech_config, audio_config=audio_config
- )
- result = speech_synthesizer.speak_text(text)
- if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
- logger.info(
- "[Azure] textToVoice text={} voice file name={}".format(text, fileName)
- )
- reply = Reply(ReplyType.VOICE, fileName)
- else:
- logger.error(
- "[Azure] textToVoice error, result={}, canceldetails={}".format(
- result, result.cancellation_details
- )
- )
- reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
- return reply
|