95 lines
3.7KB

  1. """
  2. baidu voice service
  3. """
  4. import json
  5. import os
  6. import time
  7. from aip import AipSpeech
  8. from bridge.reply import Reply, ReplyType
  9. from common.log import logger
  10. from common.tmp_dir import TmpDir
  11. from config import conf
  12. from voice.audio_convert import get_pcm_from_wav
  13. from voice.voice import Voice
  14. """
  15. 百度的语音识别API.
  16. dev_pid:
  17. - 1936: 普通话远场
  18. - 1536:普通话(支持简单的英文识别)
  19. - 1537:普通话(纯中文识别)
  20. - 1737:英语
  21. - 1637:粤语
  22. - 1837:四川话
  23. 要使用本模块, 首先到 yuyin.baidu.com 注册一个开发者账号,
  24. 之后创建一个新应用, 然后在应用管理的"查看key"中获得 API Key 和 Secret Key
  25. 然后在 config.json 中填入这两个值, 以及 app_id, dev_pid
  26. """
  27. class BaiduVoice(Voice):
  28. def __init__(self):
  29. try:
  30. curdir = os.path.dirname(__file__)
  31. config_path = os.path.join(curdir, "config.json")
  32. bconf = None
  33. if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
  34. bconf = {"lang": "zh", "ctp": 1, "spd": 5, "pit": 5, "vol": 5, "per": 0}
  35. with open(config_path, "w") as fw:
  36. json.dump(bconf, fw, indent=4)
  37. else:
  38. with open(config_path, "r") as fr:
  39. bconf = json.load(fr)
  40. self.app_id = str(conf().get("baidu_app_id"))
  41. self.api_key = str(conf().get("baidu_api_key"))
  42. self.secret_key = str(conf().get("baidu_secret_key"))
  43. self.dev_id = conf().get("baidu_dev_pid")
  44. self.lang = bconf["lang"]
  45. self.ctp = bconf["ctp"]
  46. self.spd = bconf["spd"]
  47. self.pit = bconf["pit"]
  48. self.vol = bconf["vol"]
  49. self.per = bconf["per"]
  50. self.client = AipSpeech(self.app_id, self.api_key, self.secret_key)
  51. except Exception as e:
  52. logger.warn("BaiduVoice init failed: %s, ignore " % e)
  53. def voiceToText(self, voice_file):
  54. # 识别本地文件
  55. logger.debug("[Baidu] voice file name={}".format(voice_file))
  56. pcm = get_pcm_from_wav(voice_file)
  57. res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id})
  58. if res["err_no"] == 0:
  59. logger.info("百度语音识别到了:{}".format(res["result"]))
  60. text = "".join(res["result"])
  61. reply = Reply(ReplyType.TEXT, text)
  62. else:
  63. logger.info("百度语音识别出错了: {}".format(res["err_msg"]))
  64. if res["err_msg"] == "request pv too much":
  65. logger.info(" 出现这个原因很可能是你的百度语音服务调用量超出限制,或未开通付费")
  66. reply = Reply(ReplyType.ERROR, "百度语音识别出错了;{0}".format(res["err_msg"]))
  67. return reply
  68. def textToVoice(self, text):
  69. result = self.client.synthesis(
  70. text,
  71. self.lang,
  72. self.ctp,
  73. {"spd": self.spd, "pit": self.pit, "vol": self.vol, "per": self.per},
  74. )
  75. if not isinstance(result, dict):
  76. # Avoid the same filename under multithreading
  77. fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3"
  78. with open(fileName, "wb") as f:
  79. f.write(result)
  80. logger.info("[Baidu] textToVoice text={} voice file name={}".format(text, fileName))
  81. reply = Reply(ReplyType.VOICE, fileName)
  82. else:
  83. logger.error("[Baidu] textToVoice error={}".format(result))
  84. reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
  85. return reply