128 lines
3.8KB

  1. import shutil
  2. import wave
  3. import pysilk
  4. from pydub import AudioSegment
  5. sil_supports = [8000, 12000, 16000, 24000, 32000, 44100, 48000] # slk转wav时,支持的采样率
  6. def find_closest_sil_supports(sample_rate):
  7. """
  8. 找到最接近的支持的采样率
  9. """
  10. if sample_rate in sil_supports:
  11. return sample_rate
  12. closest = 0
  13. mindiff = 9999999
  14. for rate in sil_supports:
  15. diff = abs(rate - sample_rate)
  16. if diff < mindiff:
  17. closest = rate
  18. mindiff = diff
  19. return closest
  20. def get_pcm_from_wav(wav_path):
  21. """
  22. 从 wav 文件中读取 pcm
  23. :param wav_path: wav 文件路径
  24. :returns: pcm 数据
  25. """
  26. wav = wave.open(wav_path, "rb")
  27. return wav.readframes(wav.getnframes())
  28. def any_to_mp3(any_path, mp3_path):
  29. """
  30. 把任意格式转成mp3文件
  31. """
  32. if any_path.endswith(".mp3"):
  33. shutil.copy2(any_path, mp3_path)
  34. return
  35. if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
  36. sil_to_wav(any_path, any_path)
  37. any_path = mp3_path
  38. audio = AudioSegment.from_file(any_path)
  39. audio.export(mp3_path, format="mp3")
  40. def any_to_wav(any_path, wav_path):
  41. """
  42. 把任意格式转成wav文件
  43. """
  44. if any_path.endswith(".wav"):
  45. shutil.copy2(any_path, wav_path)
  46. return
  47. if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
  48. return sil_to_wav(any_path, wav_path)
  49. audio = AudioSegment.from_file(any_path)
  50. audio.export(wav_path, format="wav")
  51. def any_to_sil(any_path, sil_path):
  52. """
  53. 把任意格式转成sil文件
  54. """
  55. if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
  56. shutil.copy2(any_path, sil_path)
  57. return 10000
  58. audio = AudioSegment.from_file(any_path)
  59. rate = find_closest_sil_supports(audio.frame_rate)
  60. # Convert to PCM_s16
  61. pcm_s16 = audio.set_sample_width(2)
  62. pcm_s16 = pcm_s16.set_frame_rate(rate)
  63. wav_data = pcm_s16.raw_data
  64. silk_data = pysilk.encode(wav_data, data_rate=rate, sample_rate=rate)
  65. with open(sil_path, "wb") as f:
  66. f.write(silk_data)
  67. return audio.duration_seconds * 1000
  68. def any_to_amr(any_path, amr_path):
  69. """
  70. 把任意格式转成amr文件
  71. """
  72. if any_path.endswith(".amr"):
  73. shutil.copy2(any_path, amr_path)
  74. return
  75. if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
  76. raise NotImplementedError("Not support file type: {}".format(any_path))
  77. audio = AudioSegment.from_file(any_path)
  78. audio = audio.set_frame_rate(8000) # only support 8000
  79. audio.export(amr_path, format="amr")
  80. return audio.duration_seconds * 1000
  81. def sil_to_wav(silk_path, wav_path, rate: int = 24000):
  82. """
  83. silk 文件转 wav
  84. """
  85. wav_data = pysilk.decode_file(silk_path, to_wav=True, sample_rate=rate)
  86. with open(wav_path, "wb") as f:
  87. f.write(wav_data)
  88. def split_audio(file_path, max_segment_length_ms=60000):
  89. """
  90. 分割音频文件
  91. """
  92. audio = AudioSegment.from_file(file_path)
  93. audio_length_ms = len(audio)
  94. if audio_length_ms <= max_segment_length_ms:
  95. return audio_length_ms, [file_path]
  96. segments = []
  97. for start_ms in range(0, audio_length_ms, max_segment_length_ms):
  98. end_ms = min(audio_length_ms, start_ms + max_segment_length_ms)
  99. segment = audio[start_ms:end_ms]
  100. segments.append(segment)
  101. file_prefix = file_path[: file_path.rindex(".")]
  102. format = file_path[file_path.rindex(".") + 1 :]
  103. files = []
  104. for i, segment in enumerate(segments):
  105. path = f"{file_prefix}_{i+1}" + f".{format}"
  106. segment.export(path, format=format)
  107. files.append(path)
  108. return audio_length_ms, files