Browse Source

1.新增wechaty方案的语音识别、语音回复功能;2.更新README;

master
Chiaki 1 year ago
parent
commit
f9b0ad7697
4 changed files with 131 additions and 8 deletions
  1. +1
    -0
      .gitignore
  2. +43
    -5
      README.md
  3. +79
    -0
      channel/wechat/wechaty_channel.py
  4. +8
    -3
      config-template.json

+ 1
- 0
.gitignore View File

@@ -1,5 +1,6 @@
.DS_Store
.idea
.wechaty/
__pycache__/
venv*
*.pyc


+ 43
- 5
README.md View File

@@ -62,15 +62,14 @@
支持 Linux、MacOS、Windows 系统(可在Linux服务器上长期运行),同时需安装 `Python`。
> 建议Python版本在 3.7.1~3.9.X 之间,3.10及以上版本在 MacOS 可用,其他系统上不确定能否正常运行。


1.克隆项目代码:
**1.克隆项目代码:**

```bash
git clone https://github.com/zhayujie/chatgpt-on-wechat
cd chatgpt-on-wechat/
```

2.安装所需核心依赖:
**2.安装所需核心依赖:**

```bash
pip3 install itchat-uos==1.5.0.dev0
@@ -78,13 +77,45 @@ pip3 install --upgrade openai
```
注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。

**3.安装所需拓展依赖(接收语音,回复语音):**
### wechaty 处理语音所需

```bash
pip3 install pysilk
pip3 install pysilk-mod
pip3 install pydub
```

### 百度

```bash
pip3 install baidu-aip chardet
```
### Google

(1) 安装 SpeechRecognition
```bash
pip3 install SpeechRecognition
```
(2) 安装 ffmpeg 和 espeak

MacOS:
```bash
brew install ffmpeg espeak
```
Windows:下载ffmpeg.exe

Linux:
```bash
apt-get install ffmpeg espeak
```

## 配置

配置文件的模板在根目录的`config-template.json`中,需复制该模板创建最终生效的 `config.json` 文件:

```bash
cp config-template.json config.json
cp config-template.json config.json
```

然后在`config.json`中填入配置,以下是对默认配置的说明,可根据需要进行自定义修改:
@@ -93,14 +124,21 @@ cp config-template.json config.json
# config.json文件内容示例
{
"open_ai_api_key": "YOUR API KEY", # 填入上面创建的 OpenAI API KEY
"open_ai_api_base": "https://api.openai.com/v1", # 自定义 OpenAI API 地址
"proxy": "127.0.0.1:7890", # 代理客户端的ip和端口
"baidu_app_id": "", # 百度AI的App Id
"baidu_api_key": "", # 百度AI的API KEY
"baidu_secret_key": "", # 百度AI的Secret KEY
"wechaty_puppet_service_token":"", # wechaty服务token
"single_chat_prefix": ["bot", "@bot"], # 私聊时文本需要包含该前缀才能触发机器人回复
"single_chat_reply_prefix": "[bot] ", # 私聊时自动回复的前缀,用于区分真人
"group_chat_prefix": ["@bot"], # 群聊时包含该前缀则会触发机器人回复
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], # 开启自动回复的群名称列表
"image_create_prefix": ["画", "看", "找"], # 开启图片回复的前缀
"conversation_max_tokens": 1000, # 支持上下文记忆的最多字符数
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。" # 人格描述
"speech_recognition": false, # 是否开启语音识别
"voice_reply_voice": false, # 是否开启语音回复
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", # 人格描述
}
```
**配置说明:**


+ 79
- 0
channel/wechat/wechaty_channel.py View File

@@ -10,12 +10,16 @@ import json
import time
import asyncio
import requests
import pysilk
import wave
from pydub import AudioSegment
from typing import Optional, Union
from wechaty_puppet import MessageType, FileBox, ScanStatus # type: ignore
from wechaty import Wechaty, Contact
from wechaty.user import Message, Room, MiniProgram, UrlLink
from channel.channel import Channel
from common.log import logger
from common.tmp_dir import TmpDir
from config import conf


@@ -89,6 +93,48 @@ class WechatyChannel(Channel):
await self._do_send_img(content, to_user_id)
else:
await self._do_send(content, to_user_id)
elif room is None and msg.type() == MessageType.MESSAGE_TYPE_AUDIO:
if not msg.is_self(): # 接收语音消息
# 下载语音文件
voice_file = await msg.to_file_box()
silk_file = TmpDir().path() + voice_file.name
await voice_file.to_file(silk_file)
logger.info("[WX]receive voice file: " + silk_file)
# 将文件转成wav格式音频
wav_file = silk_file.replace(".slk", ".wav")
with open(silk_file, 'rb') as f:
silk_data = f.read()
pcm_data = pysilk.decode(silk_data)

with wave.open(wav_file, 'wb') as wav_data:
wav_data.setnchannels(1)
wav_data.setsampwidth(2)
wav_data.setframerate(24000)
wav_data.writeframes(pcm_data)
if os.path.exists(wav_file):
converter_state = "true" # 转换wav成功
else:
converter_state = "false" # 转换wav失败
logger.info("[WX]receive voice converter: " + converter_state)
# 语音识别为文本
query = super().build_voice_to_text(wav_file)
# 交验关键字
match_prefix = self.check_prefix(query, conf().get('single_chat_prefix'))
if match_prefix is not None:
if match_prefix != '':
str_list = query.split(match_prefix, 1)
if len(str_list) == 2:
query = str_list[1].strip()
# 返回消息
if conf().get('voice_reply_voice'):
await self._do_send_voice(query, from_user_id)
else:
await self._do_send(query, from_user_id)
else:
logger.info("[WX]receive voice check prefix: " + 'False')
# 清除缓存文件
os.remove(wav_file)
os.remove(silk_file)
elif room and msg.type() == MessageType.MESSAGE_TYPE_TEXT:
# 群组&文本消息
room_id = room.room_id
@@ -135,6 +181,39 @@ class WechatyChannel(Channel):
except Exception as e:
logger.exception(e)


async def _do_send_voice(self, query, reply_user_id):
try:
if not query:
return
context = dict()
context['session_id'] = reply_user_id
reply_text = super().build_reply_content(query, context)
if reply_text:
# 转换 mp3 文件为 silk 格式
mp3_file = super().build_text_to_voice(reply_text)
silk_file = mp3_file.replace(".mp3", ".silk")
# Load the MP3 file
audio = AudioSegment.from_file(mp3_file, format="mp3")
# Convert to WAV format
audio = audio.set_frame_rate(24000).set_channels(1)
wav_data = audio.raw_data
sample_width = audio.sample_width
# Encode to SILK format
silk_data = pysilk.encode(wav_data, 24000)
# Save the silk file
with open(silk_file, "wb") as f:
f.write(silk_data)
# 发送语音
t = int(time.time())
file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
await self.send(file_box, reply_user_id)
# 清除缓存文件
os.remove(mp3_file)
os.remove(silk_file)
except Exception as e:
logger.exception(e)
async def _do_send_img(self, query, reply_user_id):
try:
if not query:


+ 8
- 3
config-template.json View File

@@ -1,13 +1,18 @@
{
"open_ai_api_key": "YOUR API KEY",
"proxy": "",
"wechaty_puppet_service_token":"",
"baidu_app_id": "",
"baidu_api_key": "",
"baidu_secret_key": "",
"single_chat_prefix": ["bot", "@bot"],
"single_chat_reply_prefix": "[bot] ",
"group_chat_prefix": ["@bot"],
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],
"image_create_prefix": ["画", "看", "找"],
"conversation_max_tokens": 1000,
"speech_recognition": false,
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。",
"expires_in_seconds": 3600
"voice_reply_voice": false,
"conversation_max_tokens": 1000,
"expires_in_seconds": 3600,
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。"
}

Loading…
Cancel
Save