2 年之前 · 24de670c2c
--- a/channel/wechat/wechat_channel.py
+++ b/channel/wechat/wechat_channel.py
@@ -5,6 +5,9 @@ wechat channel
 """

 import os
 import requests
 import io
 import time
 from lib import itchat
 import json
 from lib.itchat.content import *
@@ -17,9 +20,7 @@ from common.tmp_dir import TmpDir
 from config import conf
 from common.time_check import time_checker
 from plugins import *
 import requests
 import io
 import time
 from voice.audio_convert import mp3_to_wav


 thread_pool = ThreadPoolExecutor(max_workers=8)
@@ -28,8 +29,7 @@ thread_pool = ThreadPoolExecutor(max_workers=8)
 def thread_pool_callback(worker):
    worker_exception = worker.exception()
    if worker_exception:
        logger.exception(
            "Worker return exception: {}".format(worker_exception))
        logger.exception("Worker return exception: {}".format(worker_exception))


@itchat.msg_register(TEXT)
@@ -247,9 +247,16 @@ class WechatChannel(Channel):
                reply = super().build_reply_content(context.content, context)
            elif context.type == ContextType.VOICE:
                msg = context['msg']
                file_name = TmpDir().path() + context.content
                msg.download(file_name)
                reply = super().build_voice_to_text(file_name)
                mp3_path = TmpDir().path() + context.content
                msg.download(mp3_path)
                # mp3转wav
                wav_path = os.path.splitext(mp3_path)[0] + '.wav'
                mp3_to_wav(mp3_path=mp3_path, wav_path=wav_path)
                # 语音识别
                reply = super().build_voice_to_text(wav_path)
                # 删除临时文件
                os.remove(wav_path)
                os.remove(mp3_path)
                if reply.type != ReplyType.ERROR and reply.type != ReplyType.INFO:
                    context.content = reply.content  # 语音转文字后，将文字内容作为新的context
                    context.type = ContextType.TEXT
@@ -263,12 +270,13 @@ class WechatChannel(Channel):
                            prefixes = conf().get('group_chat_prefix')
                            for prefix in prefixes:
                                if context.content.startswith(prefix):
                                    context.content = context.content.replace(prefix, '', 1).strip()
                                    context.content = context.content.replace(
                                        prefix, '', 1).strip()
                                    break
                        else:
                            logger.info("[WX]receive voice check prefix: " + 'False')
                            return
                            

                    reply = super().build_reply_content(context.content, context)
                    if reply.type == ReplyType.TEXT:
                        if conf().get('voice_reply_voice'):
--- a/channel/wechat/wechaty_channel.py
+++ b/channel/wechat/wechaty_channel.py
@@ -4,25 +4,19 @@
 wechaty channel
 Python Wechaty - https://github.com/wechaty/python-wechaty
 """
 import io
 import os
 import json
 import time
 import asyncio
 import requests
 import pysilk
 import wave
 from pydub import AudioSegment
 from typing import Optional, Union
 from bridge.context import Context, ContextType
 from wechaty_puppet import MessageType, FileBox, ScanStatus  # type: ignore
 from wechaty import Wechaty, Contact
 from wechaty.user import Message, Room, MiniProgram, UrlLink
 from wechaty.user import Message, MiniProgram, UrlLink
 from channel.channel import Channel
 from common.log import logger
 from common.tmp_dir import TmpDir
 from config import conf

 from voice.audio_convert import sil_to_wav, mp3_to_sil

 class WechatyChannel(Channel):

@@ -50,8 +44,8 @@ class WechatyChannel(Channel):

    async def on_scan(self, status: ScanStatus, qr_code: Optional[str] = None,
                      data: Optional[str] = None):
        contact = self.Contact.load(self.contact_id)
        logger.info('[WX] scan user={}, scan status={}, scan qr_code={}'.format(contact, status.name, qr_code))
        # contact = self.Contact.load(self.contact_id)
        # logger.info('[WX] scan user={}, scan status={}, scan qr_code={}'.format(contact, status.name, qr_code))
        # print(f'user <{contact}> scan status: {status.name} , 'f'qr_code: {qr_code}')

    async def on_message(self, msg: Message):
@@ -67,7 +61,7 @@ class WechatyChannel(Channel):
        content = msg.text()
        mention_content = await msg.mention_text()  # 返回过滤掉@name后的消息
        match_prefix = self.check_prefix(content, conf().get('single_chat_prefix'))
        conversation: Union[Room, Contact] = from_contact if room is None else room
        # conversation: Union[Room, Contact] = from_contact if room is None else room

        if room is None and msg.type() == MessageType.MESSAGE_TYPE_TEXT:
            if not msg.is_self() and match_prefix is not None:
@@ -102,21 +96,8 @@ class WechatyChannel(Channel):
                await voice_file.to_file(silk_file)
                logger.info("[WX]receive voice file: " + silk_file)
                # 将文件转成wav格式音频
                wav_file = silk_file.replace(".slk", ".wav")
                with open(silk_file, 'rb') as f:
                    silk_data = f.read()
                pcm_data = pysilk.decode(silk_data)

                with wave.open(wav_file, 'wb') as wav_data:
                    wav_data.setnchannels(1)
                    wav_data.setsampwidth(2)
                    wav_data.setframerate(24000)
                    wav_data.writeframes(pcm_data)
                if os.path.exists(wav_file): 
                    converter_state = "true" # 转换wav成功
                else:
                    converter_state = "false" # 转换wav失败
                logger.info("[WX]receive voice converter: " + converter_state)
                wav_file = os.path.splitext(silk_file)[0] + '.wav'
                sil_to_wav(silk_file, wav_file)
                # 语音识别为文本
                query = super().build_voice_to_text(wav_file).content
                # 交验关键字
@@ -183,21 +164,8 @@ class WechatyChannel(Channel):
                await voice_file.to_file(silk_file)
                logger.info("[WX]receive voice file: " + silk_file)
                # 将文件转成wav格式音频
                wav_file = silk_file.replace(".slk", ".wav")
                with open(silk_file, 'rb') as f:
                    silk_data = f.read()
                pcm_data = pysilk.decode(silk_data)

                with wave.open(wav_file, 'wb') as wav_data:
                    wav_data.setnchannels(1)
                    wav_data.setsampwidth(2)
                    wav_data.setframerate(24000)
                    wav_data.writeframes(pcm_data)
                if os.path.exists(wav_file): 
                    converter_state = "true" # 转换wav成功
                else:
                    converter_state = "false" # 转换wav失败
                logger.info("[WX]receive voice converter: " + converter_state)
                wav_file = os.path.splitext(silk_file)[0] + '.wav'
                sil_to_wav(silk_file, wav_file)
                # 语音识别为文本
                query = super().build_voice_to_text(wav_file).content
                # 校验关键字
@@ -260,21 +228,12 @@ class WechatyChannel(Channel):
            if reply_text:
                # 转换 mp3 文件为 silk 格式
                mp3_file = super().build_text_to_voice(reply_text).content
                silk_file = mp3_file.replace(".mp3", ".silk")
                # Load the MP3 file
                audio = AudioSegment.from_file(mp3_file, format="mp3")
                # Convert to WAV format
                audio = audio.set_frame_rate(24000).set_channels(1)
                wav_data = audio.raw_data
                sample_width = audio.sample_width
                # Encode to SILK format
                silk_data = pysilk.encode(wav_data, 24000)
                # Save the silk file
                with open(silk_file, "wb") as f:
                    f.write(silk_data)
                silk_file = os.path.splitext(mp3_file)[0] + '.sil'
                voiceLength = mp3_to_sil(mp3_file, silk_file)
                # 发送语音
                t = int(time.time())
                file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
                file_box = FileBox.from_file(silk_file, name=str(t) + '.sil')
                file_box.metadata = {'voiceLength': voiceLength}                
                await self.send(file_box, reply_user_id)
                # 清除缓存文件
                os.remove(mp3_file)
@@ -337,21 +296,12 @@ class WechatyChannel(Channel):
            reply_text = '@' + group_user_name + ' ' + reply_text.strip()
            # 转换 mp3 文件为 silk 格式
            mp3_file = super().build_text_to_voice(reply_text).content
            silk_file = mp3_file.replace(".mp3", ".silk")
            # Load the MP3 file
            audio = AudioSegment.from_file(mp3_file, format="mp3")
            # Convert to WAV format
            audio = audio.set_frame_rate(24000).set_channels(1)
            wav_data = audio.raw_data
            sample_width = audio.sample_width
            # Encode to SILK format
            silk_data = pysilk.encode(wav_data, 24000)
            # Save the silk file
            with open(silk_file, "wb") as f:
                f.write(silk_data)
            silk_file = os.path.splitext(mp3_file)[0] + '.sil'
            voiceLength = mp3_to_sil(mp3_file, silk_file)
            # 发送语音
            t = int(time.time())
            file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
            file_box.metadata = {'voiceLength': voiceLength}            
            await self.send_group(file_box, group_id)
            # 清除缓存文件
            os.remove(mp3_file)
--- a/voice/google/google_voice.py
+++ b/voice/google/google_voice.py
@@ -3,17 +3,14 @@
 google voice service
 """

 import pathlib
 import subprocess
 import time
 from bridge.reply import Reply, ReplyType
 import speech_recognition
 import pyttsx3
 from gtts import gTTS
 from bridge.reply import Reply, ReplyType
 from common.log import logger
 from common.tmp_dir import TmpDir
 from voice.voice import Voice
 from voice.audio_convert import mp3_to_wav


 class GoogleVoice(Voice):
@@ -30,11 +27,10 @@ class GoogleVoice(Voice):
        self.engine.setProperty('voice', voices[1].id)

    def voiceToText(self, voice_file):
        new_file = voice_file.replace('.mp3', '.wav')
        # new_file = voice_file.replace('.mp3', '.wav')
        # subprocess.call('ffmpeg -i ' + voice_file +
        #                 ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
        mp3_to_wav(voice_file, new_file)
        with speech_recognition.AudioFile(new_file) as source:
        with speech_recognition.AudioFile(voice_file) as source:
            audio = self.recognizer.record(source)
        try:
            text = self.recognizer.recognize_google(audio, language='zh-CN')