From 977d3bc02eb73dfc13d7f5c5b3730f55d6389480 Mon Sep 17 00:00:00 2001
From: FMStereo <xiaodong_hsu@sohu.com>
Date: Thu, 18 Jan 2024 12:46:18 +0800
Subject: [PATCH] =?UTF-8?q?=E7=99=BE=E5=BA=A6=E8=AF=AD=E9=9F=B3=E8=BD=AC?=
 =?UTF-8?q?=E5=86=99=E6=94=AF=E6=8C=818000=E9=87=87=E6=A0=B7=E7=8E=87,=20p?=
 =?UTF-8?q?cm=5Fs16le=E7=BC=96=E7=A0=81,=20=E5=8D=95=E9=80=9A=E9=81=93?=
 =?UTF-8?q?=E8=AF=AD=E9=9F=B3=E7=9A=84=E7=BB=84=E5=90=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 voice/audio_convert.py     | 4 +++-
 voice/baidu/baidu_voice.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/voice/audio_convert.py b/voice/audio_convert.py
index 18fe3c2..5c80528 100644
--- a/voice/audio_convert.py
+++ b/voice/audio_convert.py
@@ -64,7 +64,9 @@ def any_to_wav(any_path, wav_path):
     if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
         return sil_to_wav(any_path, wav_path)
     audio = AudioSegment.from_file(any_path)
-    audio.export(wav_path, format="wav")
+    audio.set_frame_rate(8000)    # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别
+    audio.set_channels(1)
+    audio.export(wav_path, format="wav", codec='pcm_s16le')
 
 
 def any_to_sil(any_path, sil_path):
diff --git a/voice/baidu/baidu_voice.py b/voice/baidu/baidu_voice.py
index fbf53ce..66ba4d8 100644
--- a/voice/baidu/baidu_voice.py
+++ b/voice/baidu/baidu_voice.py
@@ -62,7 +62,7 @@ class BaiduVoice(Voice):
         # 识别本地文件
         logger.debug("[Baidu] voice file name={}".format(voice_file))
         pcm = get_pcm_from_wav(voice_file)
-        res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id})
+        res = self.client.asr(pcm, "pcm", 8000, {"dev_pid": self.dev_id})
         if res["err_no"] == 0:
             logger.info("百度语音识别到了：{}".format(res["result"]))
             text = "".join(res["result"])