You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
3.3KB

  1. """
  2. azure voice service
  3. """
  4. import json
  5. import os
  6. import time
  7. import azure.cognitiveservices.speech as speechsdk
  8. from bridge.reply import Reply, ReplyType
  9. from common.log import logger
  10. from common.tmp_dir import TmpDir
  11. from config import conf
  12. from voice.voice import Voice
  13. """
  14. Azure voice
  15. 主目录设置文件中需填写azure_voice_api_key和azure_voice_region
  16. 查看可用的 voice: https://speech.microsoft.com/portal/voicegallery
  17. """
  18. class AzureVoice(Voice):
  19. def __init__(self):
  20. try:
  21. curdir = os.path.dirname(__file__)
  22. config_path = os.path.join(curdir, "config.json")
  23. config = None
  24. if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
  25. config = {
  26. "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
  27. "speech_recognition_language": "zh-CN",
  28. }
  29. with open(config_path, "w") as fw:
  30. json.dump(config, fw, indent=4)
  31. else:
  32. with open(config_path, "r") as fr:
  33. config = json.load(fr)
  34. self.api_key = conf().get("azure_voice_api_key")
  35. self.api_region = conf().get("azure_voice_region")
  36. self.speech_config = speechsdk.SpeechConfig(
  37. subscription=self.api_key, region=self.api_region
  38. )
  39. self.speech_config.speech_synthesis_voice_name = config[
  40. "speech_synthesis_voice_name"
  41. ]
  42. self.speech_config.speech_recognition_language = config[
  43. "speech_recognition_language"
  44. ]
  45. except Exception as e:
  46. logger.warn("AzureVoice init failed: %s, ignore " % e)
  47. def voiceToText(self, voice_file):
  48. audio_config = speechsdk.AudioConfig(filename=voice_file)
  49. speech_recognizer = speechsdk.SpeechRecognizer(
  50. speech_config=self.speech_config, audio_config=audio_config
  51. )
  52. result = speech_recognizer.recognize_once()
  53. if result.reason == speechsdk.ResultReason.RecognizedSpeech:
  54. logger.info(
  55. "[Azure] voiceToText voice file name={} text={}".format(
  56. voice_file, result.text
  57. )
  58. )
  59. reply = Reply(ReplyType.TEXT, result.text)
  60. else:
  61. logger.error("[Azure] voiceToText error, result={}".format(result))
  62. reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
  63. return reply
  64. def textToVoice(self, text):
  65. fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav"
  66. audio_config = speechsdk.AudioConfig(filename=fileName)
  67. speech_synthesizer = speechsdk.SpeechSynthesizer(
  68. speech_config=self.speech_config, audio_config=audio_config
  69. )
  70. result = speech_synthesizer.speak_text(text)
  71. if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
  72. logger.info(
  73. "[Azure] textToVoice text={} voice file name={}".format(text, fileName)
  74. )
  75. reply = Reply(ReplyType.VOICE, fileName)
  76. else:
  77. logger.error("[Azure] textToVoice error, result={}".format(result))
  78. reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
  79. return reply