1.新增wechaty方案的语音识别、语音回复功能;2.更新README;master
@@ -1,5 +1,6 @@ | |||
.DS_Store | |||
.idea | |||
.wechaty/ | |||
__pycache__/ | |||
venv* | |||
*.pyc | |||
@@ -62,15 +62,14 @@ | |||
支持 Linux、MacOS、Windows 系统(可在Linux服务器上长期运行),同时需安装 `Python`。 | |||
> 建议Python版本在 3.7.1~3.9.X 之间,3.10及以上版本在 MacOS 可用,其他系统上不确定能否正常运行。 | |||
1.克隆项目代码: | |||
**1.克隆项目代码:** | |||
```bash | |||
git clone https://github.com/zhayujie/chatgpt-on-wechat | |||
cd chatgpt-on-wechat/ | |||
``` | |||
2.安装所需核心依赖: | |||
**2.安装所需核心依赖:** | |||
```bash | |||
pip3 install itchat-uos==1.5.0.dev0 | |||
@@ -78,13 +77,45 @@ pip3 install --upgrade openai | |||
``` | |||
注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。 | |||
**3.安装所需拓展依赖(接收语音,回复语音):** | |||
### wechaty 处理语音所需 | |||
```bash | |||
pip3 install pysilk | |||
pip3 install pysilk-mod | |||
pip3 install pydub | |||
``` | |||
### 百度 | |||
```bash | |||
pip3 install baidu-aip chardet | |||
``` | |||
(1) 安装 SpeechRecognition | |||
```bash | |||
pip3 install SpeechRecognition | |||
``` | |||
(2) 安装 ffmpeg 和 espeak | |||
MacOS: | |||
```bash | |||
brew install ffmpeg espeak | |||
``` | |||
Windows:下载ffmpeg.exe | |||
Linux: | |||
```bash | |||
apt-get install ffmpeg espeak | |||
``` | |||
## 配置 | |||
配置文件的模板在根目录的`config-template.json`中,需复制该模板创建最终生效的 `config.json` 文件: | |||
```bash | |||
cp config-template.json config.json | |||
cp config-template.json config.json | |||
``` | |||
然后在`config.json`中填入配置,以下是对默认配置的说明,可根据需要进行自定义修改: | |||
@@ -93,14 +124,21 @@ cp config-template.json config.json | |||
# config.json文件内容示例 | |||
{ | |||
"open_ai_api_key": "YOUR API KEY", # 填入上面创建的 OpenAI API KEY | |||
"open_ai_api_base": "https://api.openai.com/v1", # 自定义 OpenAI API 地址 | |||
"proxy": "127.0.0.1:7890", # 代理客户端的ip和端口 | |||
"baidu_app_id": "", # 百度AI的App Id | |||
"baidu_api_key": "", # 百度AI的API KEY | |||
"baidu_secret_key": "", # 百度AI的Secret KEY | |||
"wechaty_puppet_service_token":"", # wechaty服务token | |||
"single_chat_prefix": ["bot", "@bot"], # 私聊时文本需要包含该前缀才能触发机器人回复 | |||
"single_chat_reply_prefix": "[bot] ", # 私聊时自动回复的前缀,用于区分真人 | |||
"group_chat_prefix": ["@bot"], # 群聊时包含该前缀则会触发机器人回复 | |||
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], # 开启自动回复的群名称列表 | |||
"image_create_prefix": ["画", "看", "找"], # 开启图片回复的前缀 | |||
"conversation_max_tokens": 1000, # 支持上下文记忆的最多字符数 | |||
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。" # 人格描述 | |||
"speech_recognition": false, # 是否开启语音识别 | |||
"voice_reply_voice": false, # 是否开启语音回复 | |||
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", # 人格描述 | |||
} | |||
``` | |||
**配置说明:** | |||
@@ -10,12 +10,16 @@ import json | |||
import time | |||
import asyncio | |||
import requests | |||
import pysilk | |||
import wave | |||
from pydub import AudioSegment | |||
from typing import Optional, Union | |||
from wechaty_puppet import MessageType, FileBox, ScanStatus # type: ignore | |||
from wechaty import Wechaty, Contact | |||
from wechaty.user import Message, Room, MiniProgram, UrlLink | |||
from channel.channel import Channel | |||
from common.log import logger | |||
from common.tmp_dir import TmpDir | |||
from config import conf | |||
@@ -89,6 +93,48 @@ class WechatyChannel(Channel): | |||
await self._do_send_img(content, to_user_id) | |||
else: | |||
await self._do_send(content, to_user_id) | |||
elif room is None and msg.type() == MessageType.MESSAGE_TYPE_AUDIO: | |||
if not msg.is_self(): # 接收语音消息 | |||
# 下载语音文件 | |||
voice_file = await msg.to_file_box() | |||
silk_file = TmpDir().path() + voice_file.name | |||
await voice_file.to_file(silk_file) | |||
logger.info("[WX]receive voice file: " + silk_file) | |||
# 将文件转成wav格式音频 | |||
wav_file = silk_file.replace(".slk", ".wav") | |||
with open(silk_file, 'rb') as f: | |||
silk_data = f.read() | |||
pcm_data = pysilk.decode(silk_data) | |||
with wave.open(wav_file, 'wb') as wav_data: | |||
wav_data.setnchannels(1) | |||
wav_data.setsampwidth(2) | |||
wav_data.setframerate(24000) | |||
wav_data.writeframes(pcm_data) | |||
if os.path.exists(wav_file): | |||
converter_state = "true" # 转换wav成功 | |||
else: | |||
converter_state = "false" # 转换wav失败 | |||
logger.info("[WX]receive voice converter: " + converter_state) | |||
# 语音识别为文本 | |||
query = super().build_voice_to_text(wav_file) | |||
# 交验关键字 | |||
match_prefix = self.check_prefix(query, conf().get('single_chat_prefix')) | |||
if match_prefix is not None: | |||
if match_prefix != '': | |||
str_list = query.split(match_prefix, 1) | |||
if len(str_list) == 2: | |||
query = str_list[1].strip() | |||
# 返回消息 | |||
if conf().get('voice_reply_voice'): | |||
await self._do_send_voice(query, from_user_id) | |||
else: | |||
await self._do_send(query, from_user_id) | |||
else: | |||
logger.info("[WX]receive voice check prefix: " + 'False') | |||
# 清除缓存文件 | |||
os.remove(wav_file) | |||
os.remove(silk_file) | |||
elif room and msg.type() == MessageType.MESSAGE_TYPE_TEXT: | |||
# 群组&文本消息 | |||
room_id = room.room_id | |||
@@ -135,6 +181,39 @@ class WechatyChannel(Channel): | |||
except Exception as e: | |||
logger.exception(e) | |||
async def _do_send_voice(self, query, reply_user_id): | |||
try: | |||
if not query: | |||
return | |||
context = dict() | |||
context['session_id'] = reply_user_id | |||
reply_text = super().build_reply_content(query, context) | |||
if reply_text: | |||
# 转换 mp3 文件为 silk 格式 | |||
mp3_file = super().build_text_to_voice(reply_text) | |||
silk_file = mp3_file.replace(".mp3", ".silk") | |||
# Load the MP3 file | |||
audio = AudioSegment.from_file(mp3_file, format="mp3") | |||
# Convert to WAV format | |||
audio = audio.set_frame_rate(24000).set_channels(1) | |||
wav_data = audio.raw_data | |||
sample_width = audio.sample_width | |||
# Encode to SILK format | |||
silk_data = pysilk.encode(wav_data, 24000) | |||
# Save the silk file | |||
with open(silk_file, "wb") as f: | |||
f.write(silk_data) | |||
# 发送语音 | |||
t = int(time.time()) | |||
file_box = FileBox.from_file(silk_file, name=str(t) + '.silk') | |||
await self.send(file_box, reply_user_id) | |||
# 清除缓存文件 | |||
os.remove(mp3_file) | |||
os.remove(silk_file) | |||
except Exception as e: | |||
logger.exception(e) | |||
async def _do_send_img(self, query, reply_user_id): | |||
try: | |||
if not query: | |||
@@ -1,13 +1,18 @@ | |||
{ | |||
"open_ai_api_key": "YOUR API KEY", | |||
"proxy": "", | |||
"wechaty_puppet_service_token":"", | |||
"baidu_app_id": "", | |||
"baidu_api_key": "", | |||
"baidu_secret_key": "", | |||
"single_chat_prefix": ["bot", "@bot"], | |||
"single_chat_reply_prefix": "[bot] ", | |||
"group_chat_prefix": ["@bot"], | |||
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], | |||
"image_create_prefix": ["画", "看", "找"], | |||
"conversation_max_tokens": 1000, | |||
"speech_recognition": false, | |||
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", | |||
"expires_in_seconds": 3600 | |||
"voice_reply_voice": false, | |||
"conversation_max_tokens": 1000, | |||
"expires_in_seconds": 3600, | |||
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。" | |||
} |