diff --git a/voice/audio_convert.py b/voice/audio_convert.py index 18fe3c2..5c80528 100644 --- a/voice/audio_convert.py +++ b/voice/audio_convert.py @@ -64,7 +64,9 @@ def any_to_wav(any_path, wav_path): if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"): return sil_to_wav(any_path, wav_path) audio = AudioSegment.from_file(any_path) - audio.export(wav_path, format="wav") + audio.set_frame_rate(8000) # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别 + audio.set_channels(1) + audio.export(wav_path, format="wav", codec='pcm_s16le') def any_to_sil(any_path, sil_path): diff --git a/voice/baidu/baidu_voice.py b/voice/baidu/baidu_voice.py index fbf53ce..66ba4d8 100644 --- a/voice/baidu/baidu_voice.py +++ b/voice/baidu/baidu_voice.py @@ -62,7 +62,7 @@ class BaiduVoice(Voice): # 识别本地文件 logger.debug("[Baidu] voice file name={}".format(voice_file)) pcm = get_pcm_from_wav(voice_file) - res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id}) + res = self.client.asr(pcm, "pcm", 8000, {"dev_pid": self.dev_id}) if res["err_no"] == 0: logger.info("百度语音识别到了:{}".format(res["result"])) text = "".join(res["result"])