You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

69 satır
3.0KB

  1. """
  2. azure voice service
  3. """
  4. import json
  5. import os
  6. import time
  7. import azure.cognitiveservices.speech as speechsdk
  8. from bridge.reply import Reply, ReplyType
  9. from common.log import logger
  10. from common.tmp_dir import TmpDir
  11. from voice.voice import Voice
  12. from config import conf
  13. """
  14. Azure voice
  15. 主目录设置文件中需填写azure_voice_api_key和azure_voice_region
  16. 查看可用的 voice: https://speech.microsoft.com/portal/voicegallery
  17. """
  18. class AzureVoice(Voice):
  19. def __init__(self):
  20. try:
  21. curdir = os.path.dirname(__file__)
  22. config_path = os.path.join(curdir, "config.json")
  23. config = None
  24. if not os.path.exists(config_path): #如果没有配置文件,创建本地配置文件
  25. config = { "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_recognition_language": "zh-CN"}
  26. with open(config_path, "w") as fw:
  27. json.dump(config, fw, indent=4)
  28. else:
  29. with open(config_path, "r") as fr:
  30. config = json.load(fr)
  31. self.api_key = conf().get('azure_voice_api_key')
  32. self.api_region = conf().get('azure_voice_region')
  33. self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
  34. self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
  35. self.speech_config.speech_recognition_language = config["speech_recognition_language"]
  36. except Exception as e:
  37. logger.warn("AzureVoice init failed: %s, ignore " % e)
  38. def voiceToText(self, voice_file):
  39. audio_config = speechsdk.AudioConfig(filename=voice_file)
  40. speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)
  41. result = speech_recognizer.recognize_once()
  42. if result.reason == speechsdk.ResultReason.RecognizedSpeech:
  43. logger.info('[Azure] voiceToText voice file name={} text={}'.format(voice_file, result.text))
  44. reply = Reply(ReplyType.TEXT, result.text)
  45. else:
  46. logger.error('[Azure] voiceToText error, result={}'.format(result))
  47. reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
  48. return reply
  49. def textToVoice(self, text):
  50. fileName = TmpDir().path() + 'reply-' + str(int(time.time())) + '.wav'
  51. audio_config = speechsdk.AudioConfig(filename=fileName)
  52. speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)
  53. result = speech_synthesizer.speak_text(text)
  54. if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
  55. logger.info(
  56. '[Azure] textToVoice text={} voice file name={}'.format(text, fileName))
  57. reply = Reply(ReplyType.VOICE, fileName)
  58. else:
  59. logger.error('[Azure] textToVoice error, result={}'.format(result))
  60. reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
  61. return reply