Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

97 lines
3.5KB

  1. """
  2. azure voice service
  3. """
  4. import json
  5. import os
  6. import time
  7. import azure.cognitiveservices.speech as speechsdk
  8. from bridge.reply import Reply, ReplyType
  9. from common.log import logger
  10. from common.tmp_dir import TmpDir
  11. from config import conf
  12. from voice.voice import Voice
  13. """
  14. Azure voice
  15. 主目录设置文件中需填写azure_voice_api_key和azure_voice_region
  16. 查看可用的 voice: https://speech.microsoft.com/portal/voicegallery
  17. """
  18. class AzureVoice(Voice):
  19. def __init__(self):
  20. try:
  21. curdir = os.path.dirname(__file__)
  22. config_path = os.path.join(curdir, "config.json")
  23. config = None
  24. if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
  25. config = {
  26. "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
  27. "speech_recognition_language": "zh-CN",
  28. }
  29. with open(config_path, "w") as fw:
  30. json.dump(config, fw, indent=4)
  31. else:
  32. with open(config_path, "r") as fr:
  33. config = json.load(fr)
  34. self.api_key = conf().get("azure_voice_api_key")
  35. self.api_region = conf().get("azure_voice_region")
  36. self.speech_config = speechsdk.SpeechConfig(
  37. subscription=self.api_key, region=self.api_region
  38. )
  39. self.speech_config.speech_synthesis_voice_name = config[
  40. "speech_synthesis_voice_name"
  41. ]
  42. self.speech_config.speech_recognition_language = config[
  43. "speech_recognition_language"
  44. ]
  45. except Exception as e:
  46. logger.warn("AzureVoice init failed: %s, ignore " % e)
  47. def voiceToText(self, voice_file):
  48. audio_config = speechsdk.AudioConfig(filename=voice_file)
  49. speech_recognizer = speechsdk.SpeechRecognizer(
  50. speech_config=self.speech_config, audio_config=audio_config
  51. )
  52. result = speech_recognizer.recognize_once()
  53. if result.reason == speechsdk.ResultReason.RecognizedSpeech:
  54. logger.info(
  55. "[Azure] voiceToText voice file name={} text={}".format(
  56. voice_file, result.text
  57. )
  58. )
  59. reply = Reply(ReplyType.TEXT, result.text)
  60. else:
  61. logger.error(
  62. "[Azure] voiceToText error, result={}, canceldetails={}".format(
  63. result, result.cancellation_details
  64. )
  65. )
  66. reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
  67. return reply
  68. def textToVoice(self, text):
  69. fileName = TmpDir().path() + "reply-" + str(int(time.time())) + ".wav"
  70. audio_config = speechsdk.AudioConfig(filename=fileName)
  71. speech_synthesizer = speechsdk.SpeechSynthesizer(
  72. speech_config=self.speech_config, audio_config=audio_config
  73. )
  74. result = speech_synthesizer.speak_text(text)
  75. if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
  76. logger.info(
  77. "[Azure] textToVoice text={} voice file name={}".format(text, fileName)
  78. )
  79. reply = Reply(ReplyType.VOICE, fileName)
  80. else:
  81. logger.error(
  82. "[Azure] textToVoice error, result={}, canceldetails={}".format(
  83. result, result.cancellation_details
  84. )
  85. )
  86. reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
  87. return reply