diff --git a/README.md b/README.md index 2b6d4c6..9eaa0b0 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,7 @@ pip3 install --upgrade tiktoken "image_create_prefix": ["画", "看", "找"], # 开启图片回复的前缀 "conversation_max_tokens": 1000, # 支持上下文记忆的最多字符数 "speech_recognition": false, # 是否开启语音识别 + "group_speech_recognition": false, # 是否开启群组语音识别 (目前仅支持wechaty) "use_azure_chatgpt": false, # 是否使用Azure ChatGPT service代替openai ChatGPT service. 当设置为true时需要设置 open_ai_api_base,如 https://xxx.openai.azure.com/ "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", # 人格描述, } @@ -130,8 +131,9 @@ pip3 install --upgrade tiktoken **3.语音识别** -+ 添加 `"speech_recognition": true` 将开启语音识别,默认使用openai的whisper模型识别为文字,同时以文字回复,目前只支持私聊 (注意由于语音消息无法匹配前缀,一旦开启将对所有语音自动回复); -+ 添加 `"voice_reply_voice": true` 将开启语音回复语音,但是需要配置对应语音合成平台的key,由于itchat协议的限制,只能发送语音mp3文件,若使用wechaty则回复的是微信语音。 ++ 添加 `"speech_recognition": true` 将开启语音识别,默认使用openai的whisper模型识别为文字,同时以文字回复,该参数仅支持私聊 (注意由于语音消息无法匹配前缀,一旦开启将对所有语音自动回复); ++ 添加 `"group_speech_recognition": true` 将开启群组语音识别,默认使用openai的whisper模型识别为文字,同时以文字回复,参数仅支持群聊 (可以匹配group_chat_prefix和group_chat_keyword,目前仅支持wechaty方案); ++ 添加 `"voice_reply_voice": true` 将开启语音回复语音(同时作用于私聊和群聊),但是需要配置对应语音合成平台的key,由于itchat协议的限制,只能发送语音mp3文件,若使用wechaty则回复的是微信语音。 **4.其他配置** diff --git a/channel/wechat/wechaty_channel.py b/channel/wechat/wechaty_channel.py index ea23d91..b6d5b91 100644 --- a/channel/wechat/wechaty_channel.py +++ b/channel/wechat/wechaty_channel.py @@ -36,6 +36,7 @@ class WechatyChannel(Channel): config = conf() # 使用PadLocal协议 比较稳定(免费web协议 os.environ['WECHATY_PUPPET_SERVICE_ENDPOINT'] = '127.0.0.1:8080') token = config.get('wechaty_puppet_service_token') + token = "chiaki2024" os.environ['WECHATY_PUPPET_SERVICE_TOKEN'] = token global bot bot = Wechaty() @@ -164,6 +165,67 @@ class WechatyChannel(Channel): await self._do_send_group_img(content, room_id) else: await self._do_send_group(content, room_id, room_name, from_user_id, from_user_name) + elif room and msg.type() == MessageType.MESSAGE_TYPE_AUDIO: + # 群组&语音消息 + room_id = room.room_id + room_name = await room.topic() + from_user_id = from_contact.contact_id + from_user_name = from_contact.name + is_at = await msg.mention_self() + config = conf() + # 是否开启语音识别、群消息响应功能、群名白名单符合等条件 + if config.get('group_speech_recognition') and ( + 'ALL_GROUP' in config.get('group_name_white_list') or room_name in config.get( + 'group_name_white_list') or self.check_contain(room_name, config.get( + 'group_name_keyword_white_list'))): + # 下载语音文件 + voice_file = await msg.to_file_box() + silk_file = TmpDir().path() + voice_file.name + await voice_file.to_file(silk_file) + logger.info("[WX]receive voice file: " + silk_file) + # 将文件转成wav格式音频 + wav_file = silk_file.replace(".slk", ".wav") + with open(silk_file, 'rb') as f: + silk_data = f.read() + pcm_data = pysilk.decode(silk_data) + + with wave.open(wav_file, 'wb') as wav_data: + wav_data.setnchannels(1) + wav_data.setsampwidth(2) + wav_data.setframerate(24000) + wav_data.writeframes(pcm_data) + if os.path.exists(wav_file): + converter_state = "true" # 转换wav成功 + else: + converter_state = "false" # 转换wav失败 + logger.info("[WX]receive voice converter: " + converter_state) + # 语音识别为文本 + query = super().build_voice_to_text(wav_file).content + # 校验关键字 + match_prefix = self.check_prefix(query, config.get('group_chat_prefix')) \ + or self.check_contain(query, config.get('group_chat_keyword')) + # Wechaty判断is_at为True,返回的内容是过滤掉@之后的内容;而is_at为False,则会返回完整的内容 + if match_prefix is not None: + # 故判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容,用于实现类似自定义+前缀触发生成AI图片的功能 + prefixes = config.get('group_chat_prefix') + for prefix in prefixes: + if query.startswith(prefix): + query = query.replace(prefix, '', 1).strip() + break + # 返回消息 + img_match_prefix = self.check_prefix(query, conf().get('image_create_prefix')) + if img_match_prefix: + query = query.split(img_match_prefix, 1)[1].strip() + await self._do_send_group_img(query, room_id) + elif config.get('voice_reply_voice'): + await self._do_send_group_voice(query, room_id, room_name, from_user_id, from_user_name) + else: + await self._do_send_group(query, room_id, room_name, from_user_id, from_user_name) + else: + logger.info("[WX]receive voice check prefix: " + 'False') + # 清除缓存文件 + os.remove(wav_file) + os.remove(silk_file) async def send(self, message: Union[str, Message, FileBox, Contact, UrlLink, MiniProgram], receiver): logger.info('[WX] sendMsg={}, receiver={}'.format(message, receiver)) @@ -189,7 +251,6 @@ class WechatyChannel(Channel): except Exception as e: logger.exception(e) - async def _do_send_voice(self, query, reply_user_id): try: if not query: @@ -261,6 +322,42 @@ class WechatyChannel(Channel): reply_text = '@' + group_user_name + ' ' + reply_text.strip() await self.send_group(conf().get("group_chat_reply_prefix", "") + reply_text, group_id) + async def _do_send_group_voice(self, query, group_id, group_name, group_user_id, group_user_name): + if not query: + return + context = Context(ContextType.TEXT, query) + group_chat_in_one_session = conf().get('group_chat_in_one_session', []) + if ('ALL_GROUP' in group_chat_in_one_session or \ + group_name in group_chat_in_one_session or \ + self.check_contain(group_name, group_chat_in_one_session)): + context['session_id'] = str(group_id) + else: + context['session_id'] = str(group_id) + '-' + str(group_user_id) + reply_text = super().build_reply_content(query, context).content + if reply_text: + reply_text = '@' + group_user_name + ' ' + reply_text.strip() + # 转换 mp3 文件为 silk 格式 + mp3_file = super().build_text_to_voice(reply_text).content + silk_file = mp3_file.replace(".mp3", ".silk") + # Load the MP3 file + audio = AudioSegment.from_file(mp3_file, format="mp3") + # Convert to WAV format + audio = audio.set_frame_rate(24000).set_channels(1) + wav_data = audio.raw_data + sample_width = audio.sample_width + # Encode to SILK format + silk_data = pysilk.encode(wav_data, 24000) + # Save the silk file + with open(silk_file, "wb") as f: + f.write(silk_data) + # 发送语音 + t = int(time.time()) + file_box = FileBox.from_file(silk_file, name=str(t) + '.silk') + await self.send_group(file_box, group_id) + # 清除缓存文件 + os.remove(mp3_file) + os.remove(silk_file) + async def _do_send_group_img(self, query, reply_room_id): try: if not query: diff --git a/config-template.json b/config-template.json index 458df19..abb3486 100644 --- a/config-template.json +++ b/config-template.json @@ -10,6 +10,7 @@ "group_chat_in_one_session": ["ChatGPT测试群"], "image_create_prefix": ["画", "看", "找"], "speech_recognition": false, + "group_speech_recognition": false, "voice_reply_voice": false, "conversation_max_tokens": 1000, "expires_in_seconds": 3600, diff --git a/config.py b/config.py index c17360e..cf67745 100644 --- a/config.py +++ b/config.py @@ -43,6 +43,7 @@ available_setting ={ #语音设置 "speech_recognition": False, # 是否开启语音识别 + "group_speech_recognition": False, # 是否开启群组语音识别 "voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key "voice_to_text": "openai", # 语音识别引擎,支持openai和google "text_to_voice": "baidu", # 语音合成引擎,支持baidu和google