From 977d3bc02eb73dfc13d7f5c5b3730f55d6389480 Mon Sep 17 00:00:00 2001 From: FMStereo Date: Thu, 18 Jan 2024 12:46:18 +0800 Subject: [PATCH] =?UTF-8?q?=E7=99=BE=E5=BA=A6=E8=AF=AD=E9=9F=B3=E8=BD=AC?= =?UTF-8?q?=E5=86=99=E6=94=AF=E6=8C=818000=E9=87=87=E6=A0=B7=E7=8E=87,=20p?= =?UTF-8?q?cm=5Fs16le=E7=BC=96=E7=A0=81,=20=E5=8D=95=E9=80=9A=E9=81=93?= =?UTF-8?q?=E8=AF=AD=E9=9F=B3=E7=9A=84=E7=BB=84=E5=90=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- voice/audio_convert.py | 4 +++- voice/baidu/baidu_voice.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/voice/audio_convert.py b/voice/audio_convert.py index 18fe3c2..5c80528 100644 --- a/voice/audio_convert.py +++ b/voice/audio_convert.py @@ -64,7 +64,9 @@ def any_to_wav(any_path, wav_path): if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"): return sil_to_wav(any_path, wav_path) audio = AudioSegment.from_file(any_path) - audio.export(wav_path, format="wav") + audio.set_frame_rate(8000) # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别 + audio.set_channels(1) + audio.export(wav_path, format="wav", codec='pcm_s16le') def any_to_sil(any_path, sil_path): diff --git a/voice/baidu/baidu_voice.py b/voice/baidu/baidu_voice.py index fbf53ce..66ba4d8 100644 --- a/voice/baidu/baidu_voice.py +++ b/voice/baidu/baidu_voice.py @@ -62,7 +62,7 @@ class BaiduVoice(Voice): # 识别本地文件 logger.debug("[Baidu] voice file name={}".format(voice_file)) pcm = get_pcm_from_wav(voice_file) - res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id}) + res = self.client.asr(pcm, "pcm", 8000, {"dev_pid": self.dev_id}) if res["err_no"] == 0: logger.info("百度语音识别到了:{}".format(res["result"])) text = "".join(res["result"])