您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

azure_voice.py 3.3KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. """
  2. azure voice service
  3. """
  4. import json
  5. import os
  6. import time
  7. import azure.cognitiveservices.speech as speechsdk
  8. from bridge.reply import Reply, ReplyType
  9. from common.log import logger
  10. from common.tmp_dir import TmpDir
  11. from config import conf
  12. from voice.voice import Voice
  13. """
  14. Azure voice
  15. 主目录设置文件中需填写azure_voice_api_key和azure_voice_region
  16. 查看可用的 voice: https://speech.microsoft.com/portal/voicegallery
  17. """
  18. class AzureVoice(Voice):
  19. def __init__(self):
  20. try:
  21. curdir = os.path.dirname(__file__)
  22. config_path = os.path.join(curdir, "config.json")
  23. config = None
  24. if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
  25. config = {
  26. "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
  27. "speech_recognition_language": "zh-CN",
  28. }
  29. with open(config_path, "w") as fw:
  30. json.dump(config, fw, indent=4)
  31. else:
  32. with open(config_path, "r") as fr:
  33. config = json.load(fr)
  34. self.api_key = conf().get("azure_voice_api_key")
  35. self.api_region = conf().get("azure_voice_region")
  36. self.speech_config = speechsdk.SpeechConfig(
  37. subscription=self.api_key, region=self.api_region
  38. )
  39. self.speech_config.speech_synthesis_voice_name = config[
  40. "speech_synthesis_voice_name"
  41. ]
  42. self.speech_config.speech_recognition_language = config[
  43. "speech_recognition_language"
  44. ]
  45. except Exception as e:
  46. logger.warn("AzureVoice init failed: %s, ignore " % e)
  47. def voiceToText(self, voice_file):
  48. audio_config = speechsdk.AudioConfig(filename=voice_file)
  49. speech_recognizer = speechsdk.SpeechRecognizer(
  50. speech_config=self.speech_config, audio_config=audio_config
  51. )
  52. result = speech_recognizer.recognize_once()
  53. if result.reason == speechsdk.ResultReason.RecognizedSpeech:
  54. logger.info(
  55. "[Azure] voiceToText voice file name={} text={}".format(
  56. voice_file, result.text
  57. )
  58. )
  59. reply = Reply(ReplyType.TEXT, result.text)
  60. else:
  61. logger.error("[Azure] voiceToText error, result={}".format(result))
  62. reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
  63. return reply
  64. def textToVoice(self, text):
  65. fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav"
  66. audio_config = speechsdk.AudioConfig(filename=fileName)
  67. speech_synthesizer = speechsdk.SpeechSynthesizer(
  68. speech_config=self.speech_config, audio_config=audio_config
  69. )
  70. result = speech_synthesizer.speak_text(text)
  71. if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
  72. logger.info(
  73. "[Azure] textToVoice text={} voice file name={}".format(text, fileName)
  74. )
  75. reply = Reply(ReplyType.VOICE, fileName)
  76. else:
  77. logger.error("[Azure] textToVoice error, result={}".format(result))
  78. reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
  79. return reply