百度语音转写支持8000采样率, pcm_s16le编码, 单通道语音的组合master
@@ -64,7 +64,9 @@ def any_to_wav(any_path, wav_path): | |||||
if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"): | if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"): | ||||
return sil_to_wav(any_path, wav_path) | return sil_to_wav(any_path, wav_path) | ||||
audio = AudioSegment.from_file(any_path) | audio = AudioSegment.from_file(any_path) | ||||
audio.export(wav_path, format="wav") | |||||
audio.set_frame_rate(8000) # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别 | |||||
audio.set_channels(1) | |||||
audio.export(wav_path, format="wav", codec='pcm_s16le') | |||||
def any_to_sil(any_path, sil_path): | def any_to_sil(any_path, sil_path): | ||||
@@ -62,7 +62,7 @@ class BaiduVoice(Voice): | |||||
# 识别本地文件 | # 识别本地文件 | ||||
logger.debug("[Baidu] voice file name={}".format(voice_file)) | logger.debug("[Baidu] voice file name={}".format(voice_file)) | ||||
pcm = get_pcm_from_wav(voice_file) | pcm = get_pcm_from_wav(voice_file) | ||||
res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id}) | |||||
res = self.client.asr(pcm, "pcm", 8000, {"dev_pid": self.dev_id}) | |||||
if res["err_no"] == 0: | if res["err_no"] == 0: | ||||
logger.info("百度语音识别到了:{}".format(res["result"])) | logger.info("百度语音识别到了:{}".format(res["result"])) | ||||
text = "".join(res["result"]) | text = "".join(res["result"]) | ||||