From 76a81d536030492c9a35f8260d51f88bd298c42c Mon Sep 17 00:00:00 2001 From: lanvent Date: Thu, 27 Apr 2023 22:47:50 +0800 Subject: [PATCH] feat(wechatcomapp): add support for splitting long audio files --- channel/wechatcom/wechatcomapp_channel.py | 10 +++++++--- voice/audio_convert.py | 24 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/channel/wechatcom/wechatcomapp_channel.py b/channel/wechatcom/wechatcomapp_channel.py index d4c090d..a79ca65 100644 --- a/channel/wechatcom/wechatcomapp_channel.py +++ b/channel/wechatcom/wechatcomapp_channel.py @@ -19,7 +19,7 @@ from common.log import logger from common.singleton import singleton from common.utils import compress_imgfile, fsize, split_string_by_utf8_length from config import conf, subscribe_msg -from voice.audio_convert import any_to_amr +from voice.audio_convert import any_to_amr, split_audio MAX_UTF8_LEN = 2048 @@ -66,8 +66,12 @@ class WechatComAppChannel(ChatChannel): file_path = reply.content amr_file = os.path.splitext(file_path)[0] + ".amr" any_to_amr(file_path, amr_file) - response = self.client.media.upload("voice", open(amr_file, "rb")) - logger.debug("[wechatcom] upload voice response: {}".format(response)) + files = split_audio(amr_file, 60000) + if len(files) > 1: + logger.info("[wechatcom] voice too long, split into {} parts".format(len(files))) + for path in files: + response = self.client.media.upload("voice", open(path, "rb")) + logger.debug("[wechatcom] upload voice response: {}".format(response)) except WeChatClientException as e: logger.error("[wechatcom] upload voice failed: {}".format(e)) return diff --git a/voice/audio_convert.py b/voice/audio_convert.py index f9cc2f5..4b72d6b 100644 --- a/voice/audio_convert.py +++ b/voice/audio_convert.py @@ -92,6 +92,7 @@ def any_to_amr(any_path, amr_path): audio = AudioSegment.from_file(any_path) audio = audio.set_frame_rate(8000) # only support 8000 audio.export(amr_path, format="amr") + return audio.duration_seconds * 1000 def sil_to_wav(silk_path, wav_path, rate: int = 24000): @@ -101,3 +102,26 @@ def sil_to_wav(silk_path, wav_path, rate: int = 24000): wav_data = pysilk.decode_file(silk_path, to_wav=True, sample_rate=rate) with open(wav_path, "wb") as f: f.write(wav_data) + + +def split_audio(file_path, max_segment_length_ms=60000): + """ + 分割音频文件 + """ + audio = AudioSegment.from_file(file_path) + audio_length_ms = len(audio) + if audio_length_ms <= max_segment_length_ms: + return [file_path] + segments = [] + for start_ms in range(0, audio_length_ms, max_segment_length_ms): + end_ms = min(audio_length_ms, start_ms + max_segment_length_ms) + segment = audio[start_ms:end_ms] + segments.append(segment) + file_prefix = file_path[: file_path.rindex(".")] + format = file_path[file_path.rindex(".") + 1 :] + files = [] + for i, segment in enumerate(segments): + path = f"{file_prefix}_{i+1}" + f".{format}" + segment.export(path, format=format) + files.append(path) + return files