You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

baidu_voice.py 2.7KB

1 vuosi sitten
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. """
  2. baidu voice service
  3. """
  4. import time
  5. from aip import AipSpeech
  6. from bridge.reply import Reply, ReplyType
  7. from common.log import logger
  8. from common.tmp_dir import TmpDir
  9. from voice.voice import Voice
  10. from voice.audio_convert import get_pcm_from_wav
  11. from config import conf
  12. """
  13. 百度的语音识别API.
  14. dev_pid:
  15. - 1936: 普通话远场
  16. - 1536:普通话(支持简单的英文识别)
  17. - 1537:普通话(纯中文识别)
  18. - 1737:英语
  19. - 1637:粤语
  20. - 1837:四川话
  21. 要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
  22. 之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
  23. 填入 config.json 中.
  24. baidu_app_id: ''
  25. baidu_api_key: ''
  26. baidu_secret_key: ''
  27. baidu_dev_pid: '1536'
  28. """
  29. class BaiduVoice(Voice):
  30. APP_ID = conf().get('baidu_app_id')
  31. API_KEY = conf().get('baidu_api_key')
  32. SECRET_KEY = conf().get('baidu_secret_key')
  33. DEV_ID = conf().get('baidu_dev_pid')
  34. client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
  35. def __init__(self):
  36. pass
  37. def voiceToText(self, voice_file):
  38. # 识别本地文件
  39. logger.debug('[Baidu] voice file name={}'.format(voice_file))
  40. pcm = get_pcm_from_wav(voice_file)
  41. res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.DEV_ID})
  42. if res["err_no"] == 0:
  43. logger.info("百度语音识别到了:{}".format(res["result"]))
  44. text = "".join(res["result"])
  45. reply = Reply(ReplyType.TEXT, text)
  46. else:
  47. logger.info("百度语音识别出错了: {}".format(res["err_msg"]))
  48. if res["err_msg"] == "request pv too much":
  49. logger.info(" 出现这个原因很可能是你的百度语音服务调用量超出限制,或未开通付费")
  50. reply = Reply(ReplyType.ERROR,
  51. "百度语音识别出错了;{0}".format(res["err_msg"]))
  52. return reply
  53. def textToVoice(self, text):
  54. result = self.client.synthesis(text, 'zh', 1, {
  55. 'spd': 5, 'pit': 5, 'vol': 5, 'per': 111
  56. })
  57. if not isinstance(result, dict):
  58. fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
  59. with open(fileName, 'wb') as f:
  60. f.write(result)
  61. logger.info(
  62. '[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
  63. reply = Reply(ReplyType.VOICE, fileName)
  64. else:
  65. logger.error('[Baidu] textToVoice error={}'.format(result))
  66. reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
  67. return reply