From 24de670c2c20e4552b6909c1fc8dc399e2af530f Mon Sep 17 00:00:00 2001 From: zwssunny Date: Mon, 27 Mar 2023 16:53:59 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E8=AF=AD=E9=9F=B3=E7=9A=84?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E5=92=8C=E8=BD=AC=E6=8D=A2=E5=85=BC=E5=AE=B9?= =?UTF-8?q?=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- channel/wechat/wechat_channel.py | 28 +++++++---- channel/wechat/wechaty_channel.py | 82 ++++++------------------------- voice/google/google_voice.py | 10 ++-- 3 files changed, 37 insertions(+), 83 deletions(-) diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py index 5aad491..3d2c26a 100644 --- a/channel/wechat/wechat_channel.py +++ b/channel/wechat/wechat_channel.py @@ -5,6 +5,9 @@ wechat channel """ import os +import requests +import io +import time from lib import itchat import json from lib.itchat.content import * @@ -17,9 +20,7 @@ from common.tmp_dir import TmpDir from config import conf from common.time_check import time_checker from plugins import * -import requests -import io -import time +from voice.audio_convert import mp3_to_wav thread_pool = ThreadPoolExecutor(max_workers=8) @@ -28,8 +29,7 @@ thread_pool = ThreadPoolExecutor(max_workers=8) def thread_pool_callback(worker): worker_exception = worker.exception() if worker_exception: - logger.exception( - "Worker return exception: {}".format(worker_exception)) + logger.exception("Worker return exception: {}".format(worker_exception)) @itchat.msg_register(TEXT) @@ -247,9 +247,16 @@ class WechatChannel(Channel): reply = super().build_reply_content(context.content, context) elif context.type == ContextType.VOICE: msg = context['msg'] - file_name = TmpDir().path() + context.content - msg.download(file_name) - reply = super().build_voice_to_text(file_name) + mp3_path = TmpDir().path() + context.content + msg.download(mp3_path) + # mp3转wav + wav_path = os.path.splitext(mp3_path)[0] + '.wav' + mp3_to_wav(mp3_path=mp3_path, wav_path=wav_path) + # 语音识别 + reply = super().build_voice_to_text(wav_path) + # 删除临时文件 + os.remove(wav_path) + os.remove(mp3_path) if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO: context.content = reply.content # 语音转文字后,将文字内容作为新的context context.type = ContextType.TEXT @@ -263,12 +270,13 @@ class WechatChannel(Channel): prefixes = conf().get('group_chat_prefix') for prefix in prefixes: if context.content.startswith(prefix): - context.content = context.content.replace(prefix, '', 1).strip() + context.content = context.content.replace( + prefix, '', 1).strip() break else: logger.info("[WX]receive voice check prefix: " + 'False') return - + reply = super().build_reply_content(context.content, context) if reply.type == ReplyType.TEXT: if conf().get('voice_reply_voice'): diff --git a/channel/wechat/wechaty_channel.py b/channel/wechat/wechaty_channel.py index 1caf5b4..cc7788a 100644 --- a/channel/wechat/wechaty_channel.py +++ b/channel/wechat/wechaty_channel.py @@ -4,25 +4,19 @@ wechaty channel Python Wechaty - https://github.com/wechaty/python-wechaty """ -import io import os -import json import time import asyncio -import requests -import pysilk -import wave -from pydub import AudioSegment from typing import Optional, Union from bridge.context import Context, ContextType from wechaty_puppet import MessageType, FileBox, ScanStatus # type: ignore from wechaty import Wechaty, Contact -from wechaty.user import Message, Room, MiniProgram, UrlLink +from wechaty.user import Message, MiniProgram, UrlLink from channel.channel import Channel from common.log import logger from common.tmp_dir import TmpDir from config import conf - +from voice.audio_convert import sil_to_wav, mp3_to_sil class WechatyChannel(Channel): @@ -50,8 +44,8 @@ class WechatyChannel(Channel): async def on_scan(self, status: ScanStatus, qr_code: Optional[str] = None, data: Optional[str] = None): - contact = self.Contact.load(self.contact_id) - logger.info('[WX] scan user={}, scan status={}, scan qr_code={}'.format(contact, status.name, qr_code)) + # contact = self.Contact.load(self.contact_id) + # logger.info('[WX] scan user={}, scan status={}, scan qr_code={}'.format(contact, status.name, qr_code)) # print(f'user <{contact}> scan status: {status.name} , 'f'qr_code: {qr_code}') async def on_message(self, msg: Message): @@ -67,7 +61,7 @@ class WechatyChannel(Channel): content = msg.text() mention_content = await msg.mention_text() # 返回过滤掉@name后的消息 match_prefix = self.check_prefix(content, conf().get('single_chat_prefix')) - conversation: Union[Room, Contact] = from_contact if room is None else room + # conversation: Union[Room, Contact] = from_contact if room is None else room if room is None and msg.type() == MessageType.MESSAGE_TYPE_TEXT: if not msg.is_self() and match_prefix is not None: @@ -102,21 +96,8 @@ class WechatyChannel(Channel): await voice_file.to_file(silk_file) logger.info("[WX]receive voice file: " + silk_file) # 将文件转成wav格式音频 - wav_file = silk_file.replace(".slk", ".wav") - with open(silk_file, 'rb') as f: - silk_data = f.read() - pcm_data = pysilk.decode(silk_data) - - with wave.open(wav_file, 'wb') as wav_data: - wav_data.setnchannels(1) - wav_data.setsampwidth(2) - wav_data.setframerate(24000) - wav_data.writeframes(pcm_data) - if os.path.exists(wav_file): - converter_state = "true" # 转换wav成功 - else: - converter_state = "false" # 转换wav失败 - logger.info("[WX]receive voice converter: " + converter_state) + wav_file = os.path.splitext(silk_file)[0] + '.wav' + sil_to_wav(silk_file, wav_file) # 语音识别为文本 query = super().build_voice_to_text(wav_file).content # 交验关键字 @@ -183,21 +164,8 @@ class WechatyChannel(Channel): await voice_file.to_file(silk_file) logger.info("[WX]receive voice file: " + silk_file) # 将文件转成wav格式音频 - wav_file = silk_file.replace(".slk", ".wav") - with open(silk_file, 'rb') as f: - silk_data = f.read() - pcm_data = pysilk.decode(silk_data) - - with wave.open(wav_file, 'wb') as wav_data: - wav_data.setnchannels(1) - wav_data.setsampwidth(2) - wav_data.setframerate(24000) - wav_data.writeframes(pcm_data) - if os.path.exists(wav_file): - converter_state = "true" # 转换wav成功 - else: - converter_state = "false" # 转换wav失败 - logger.info("[WX]receive voice converter: " + converter_state) + wav_file = os.path.splitext(silk_file)[0] + '.wav' + sil_to_wav(silk_file, wav_file) # 语音识别为文本 query = super().build_voice_to_text(wav_file).content # 校验关键字 @@ -260,21 +228,12 @@ class WechatyChannel(Channel): if reply_text: # 转换 mp3 文件为 silk 格式 mp3_file = super().build_text_to_voice(reply_text).content - silk_file = mp3_file.replace(".mp3", ".silk") - # Load the MP3 file - audio = AudioSegment.from_file(mp3_file, format="mp3") - # Convert to WAV format - audio = audio.set_frame_rate(24000).set_channels(1) - wav_data = audio.raw_data - sample_width = audio.sample_width - # Encode to SILK format - silk_data = pysilk.encode(wav_data, 24000) - # Save the silk file - with open(silk_file, "wb") as f: - f.write(silk_data) + silk_file = os.path.splitext(mp3_file)[0] + '.sil' + voiceLength = mp3_to_sil(mp3_file, silk_file) # 发送语音 t = int(time.time()) - file_box = FileBox.from_file(silk_file, name=str(t) + '.silk') + file_box = FileBox.from_file(silk_file, name=str(t) + '.sil') + file_box.metadata = {'voiceLength': voiceLength} await self.send(file_box, reply_user_id) # 清除缓存文件 os.remove(mp3_file) @@ -337,21 +296,12 @@ class WechatyChannel(Channel): reply_text = '@' + group_user_name + ' ' + reply_text.strip() # 转换 mp3 文件为 silk 格式 mp3_file = super().build_text_to_voice(reply_text).content - silk_file = mp3_file.replace(".mp3", ".silk") - # Load the MP3 file - audio = AudioSegment.from_file(mp3_file, format="mp3") - # Convert to WAV format - audio = audio.set_frame_rate(24000).set_channels(1) - wav_data = audio.raw_data - sample_width = audio.sample_width - # Encode to SILK format - silk_data = pysilk.encode(wav_data, 24000) - # Save the silk file - with open(silk_file, "wb") as f: - f.write(silk_data) + silk_file = os.path.splitext(mp3_file)[0] + '.sil' + voiceLength = mp3_to_sil(mp3_file, silk_file) # 发送语音 t = int(time.time()) file_box = FileBox.from_file(silk_file, name=str(t) + '.silk') + file_box.metadata = {'voiceLength': voiceLength} await self.send_group(file_box, group_id) # 清除缓存文件 os.remove(mp3_file) diff --git a/voice/google/google_voice.py b/voice/google/google_voice.py index 2cc9851..8770b58 100644 --- a/voice/google/google_voice.py +++ b/voice/google/google_voice.py @@ -3,17 +3,14 @@ google voice service """ -import pathlib -import subprocess import time -from bridge.reply import Reply, ReplyType import speech_recognition import pyttsx3 from gtts import gTTS +from bridge.reply import Reply, ReplyType from common.log import logger from common.tmp_dir import TmpDir from voice.voice import Voice -from voice.audio_convert import mp3_to_wav class GoogleVoice(Voice): @@ -30,11 +27,10 @@ class GoogleVoice(Voice): self.engine.setProperty('voice', voices[1].id) def voiceToText(self, voice_file): - new_file = voice_file.replace('.mp3', '.wav') + # new_file = voice_file.replace('.mp3', '.wav') # subprocess.call('ffmpeg -i ' + voice_file + # ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True) - mp3_to_wav(voice_file, new_file) - with speech_recognition.AudioFile(new_file) as source: + with speech_recognition.AudioFile(voice_file) as source: audio = self.recognizer.record(source) try: text = self.recognizer.recognize_google(audio, language='zh-CN')