Browse Source

1.新增wechaty方案的语音识别、语音回复功能;2.更新README;

master
Chiaki 1 year ago
parent
commit
f9b0ad7697
4 changed files with 131 additions and 8 deletions
  1. +1
    -0
      .gitignore
  2. +43
    -5
      README.md
  3. +79
    -0
      channel/wechat/wechaty_channel.py
  4. +8
    -3
      config-template.json

+ 1
- 0
.gitignore View File

@@ -1,5 +1,6 @@
.DS_Store .DS_Store
.idea .idea
.wechaty/
__pycache__/ __pycache__/
venv* venv*
*.pyc *.pyc


+ 43
- 5
README.md View File

@@ -62,15 +62,14 @@
支持 Linux、MacOS、Windows 系统(可在Linux服务器上长期运行),同时需安装 `Python`。 支持 Linux、MacOS、Windows 系统(可在Linux服务器上长期运行),同时需安装 `Python`。
> 建议Python版本在 3.7.1~3.9.X 之间,3.10及以上版本在 MacOS 可用,其他系统上不确定能否正常运行。 > 建议Python版本在 3.7.1~3.9.X 之间,3.10及以上版本在 MacOS 可用,其他系统上不确定能否正常运行。



1.克隆项目代码:
**1.克隆项目代码:**


```bash ```bash
git clone https://github.com/zhayujie/chatgpt-on-wechat git clone https://github.com/zhayujie/chatgpt-on-wechat
cd chatgpt-on-wechat/ cd chatgpt-on-wechat/
``` ```


2.安装所需核心依赖:
**2.安装所需核心依赖:**


```bash ```bash
pip3 install itchat-uos==1.5.0.dev0 pip3 install itchat-uos==1.5.0.dev0
@@ -78,13 +77,45 @@ pip3 install --upgrade openai
``` ```
注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。 注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。


**3.安装所需拓展依赖(接收语音,回复语音):**
### wechaty 处理语音所需

```bash
pip3 install pysilk
pip3 install pysilk-mod
pip3 install pydub
```

### 百度

```bash
pip3 install baidu-aip chardet
```
### Google

(1) 安装 SpeechRecognition
```bash
pip3 install SpeechRecognition
```
(2) 安装 ffmpeg 和 espeak

MacOS:
```bash
brew install ffmpeg espeak
```
Windows:下载ffmpeg.exe

Linux:
```bash
apt-get install ffmpeg espeak
```


## 配置 ## 配置


配置文件的模板在根目录的`config-template.json`中,需复制该模板创建最终生效的 `config.json` 文件: 配置文件的模板在根目录的`config-template.json`中,需复制该模板创建最终生效的 `config.json` 文件:


```bash ```bash
cp config-template.json config.json
cp config-template.json config.json
``` ```


然后在`config.json`中填入配置,以下是对默认配置的说明,可根据需要进行自定义修改: 然后在`config.json`中填入配置,以下是对默认配置的说明,可根据需要进行自定义修改:
@@ -93,14 +124,21 @@ cp config-template.json config.json
# config.json文件内容示例 # config.json文件内容示例
{ {
"open_ai_api_key": "YOUR API KEY", # 填入上面创建的 OpenAI API KEY "open_ai_api_key": "YOUR API KEY", # 填入上面创建的 OpenAI API KEY
"open_ai_api_base": "https://api.openai.com/v1", # 自定义 OpenAI API 地址
"proxy": "127.0.0.1:7890", # 代理客户端的ip和端口 "proxy": "127.0.0.1:7890", # 代理客户端的ip和端口
"baidu_app_id": "", # 百度AI的App Id
"baidu_api_key": "", # 百度AI的API KEY
"baidu_secret_key": "", # 百度AI的Secret KEY
"wechaty_puppet_service_token":"", # wechaty服务token
"single_chat_prefix": ["bot", "@bot"], # 私聊时文本需要包含该前缀才能触发机器人回复 "single_chat_prefix": ["bot", "@bot"], # 私聊时文本需要包含该前缀才能触发机器人回复
"single_chat_reply_prefix": "[bot] ", # 私聊时自动回复的前缀,用于区分真人 "single_chat_reply_prefix": "[bot] ", # 私聊时自动回复的前缀,用于区分真人
"group_chat_prefix": ["@bot"], # 群聊时包含该前缀则会触发机器人回复 "group_chat_prefix": ["@bot"], # 群聊时包含该前缀则会触发机器人回复
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], # 开启自动回复的群名称列表 "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], # 开启自动回复的群名称列表
"image_create_prefix": ["画", "看", "找"], # 开启图片回复的前缀 "image_create_prefix": ["画", "看", "找"], # 开启图片回复的前缀
"conversation_max_tokens": 1000, # 支持上下文记忆的最多字符数 "conversation_max_tokens": 1000, # 支持上下文记忆的最多字符数
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。" # 人格描述
"speech_recognition": false, # 是否开启语音识别
"voice_reply_voice": false, # 是否开启语音回复
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", # 人格描述
} }
``` ```
**配置说明:** **配置说明:**


+ 79
- 0
channel/wechat/wechaty_channel.py View File

@@ -10,12 +10,16 @@ import json
import time import time
import asyncio import asyncio
import requests import requests
import pysilk
import wave
from pydub import AudioSegment
from typing import Optional, Union from typing import Optional, Union
from wechaty_puppet import MessageType, FileBox, ScanStatus # type: ignore from wechaty_puppet import MessageType, FileBox, ScanStatus # type: ignore
from wechaty import Wechaty, Contact from wechaty import Wechaty, Contact
from wechaty.user import Message, Room, MiniProgram, UrlLink from wechaty.user import Message, Room, MiniProgram, UrlLink
from channel.channel import Channel from channel.channel import Channel
from common.log import logger from common.log import logger
from common.tmp_dir import TmpDir
from config import conf from config import conf




@@ -89,6 +93,48 @@ class WechatyChannel(Channel):
await self._do_send_img(content, to_user_id) await self._do_send_img(content, to_user_id)
else: else:
await self._do_send(content, to_user_id) await self._do_send(content, to_user_id)
elif room is None and msg.type() == MessageType.MESSAGE_TYPE_AUDIO:
if not msg.is_self(): # 接收语音消息
# 下载语音文件
voice_file = await msg.to_file_box()
silk_file = TmpDir().path() + voice_file.name
await voice_file.to_file(silk_file)
logger.info("[WX]receive voice file: " + silk_file)
# 将文件转成wav格式音频
wav_file = silk_file.replace(".slk", ".wav")
with open(silk_file, 'rb') as f:
silk_data = f.read()
pcm_data = pysilk.decode(silk_data)

with wave.open(wav_file, 'wb') as wav_data:
wav_data.setnchannels(1)
wav_data.setsampwidth(2)
wav_data.setframerate(24000)
wav_data.writeframes(pcm_data)
if os.path.exists(wav_file):
converter_state = "true" # 转换wav成功
else:
converter_state = "false" # 转换wav失败
logger.info("[WX]receive voice converter: " + converter_state)
# 语音识别为文本
query = super().build_voice_to_text(wav_file)
# 交验关键字
match_prefix = self.check_prefix(query, conf().get('single_chat_prefix'))
if match_prefix is not None:
if match_prefix != '':
str_list = query.split(match_prefix, 1)
if len(str_list) == 2:
query = str_list[1].strip()
# 返回消息
if conf().get('voice_reply_voice'):
await self._do_send_voice(query, from_user_id)
else:
await self._do_send(query, from_user_id)
else:
logger.info("[WX]receive voice check prefix: " + 'False')
# 清除缓存文件
os.remove(wav_file)
os.remove(silk_file)
elif room and msg.type() == MessageType.MESSAGE_TYPE_TEXT: elif room and msg.type() == MessageType.MESSAGE_TYPE_TEXT:
# 群组&文本消息 # 群组&文本消息
room_id = room.room_id room_id = room.room_id
@@ -135,6 +181,39 @@ class WechatyChannel(Channel):
except Exception as e: except Exception as e:
logger.exception(e) logger.exception(e)



async def _do_send_voice(self, query, reply_user_id):
try:
if not query:
return
context = dict()
context['session_id'] = reply_user_id
reply_text = super().build_reply_content(query, context)
if reply_text:
# 转换 mp3 文件为 silk 格式
mp3_file = super().build_text_to_voice(reply_text)
silk_file = mp3_file.replace(".mp3", ".silk")
# Load the MP3 file
audio = AudioSegment.from_file(mp3_file, format="mp3")
# Convert to WAV format
audio = audio.set_frame_rate(24000).set_channels(1)
wav_data = audio.raw_data
sample_width = audio.sample_width
# Encode to SILK format
silk_data = pysilk.encode(wav_data, 24000)
# Save the silk file
with open(silk_file, "wb") as f:
f.write(silk_data)
# 发送语音
t = int(time.time())
file_box = FileBox.from_file(silk_file, name=str(t) + '.silk')
await self.send(file_box, reply_user_id)
# 清除缓存文件
os.remove(mp3_file)
os.remove(silk_file)
except Exception as e:
logger.exception(e)
async def _do_send_img(self, query, reply_user_id): async def _do_send_img(self, query, reply_user_id):
try: try:
if not query: if not query:


+ 8
- 3
config-template.json View File

@@ -1,13 +1,18 @@
{ {
"open_ai_api_key": "YOUR API KEY", "open_ai_api_key": "YOUR API KEY",
"proxy": "", "proxy": "",
"wechaty_puppet_service_token":"",
"baidu_app_id": "",
"baidu_api_key": "",
"baidu_secret_key": "",
"single_chat_prefix": ["bot", "@bot"], "single_chat_prefix": ["bot", "@bot"],
"single_chat_reply_prefix": "[bot] ", "single_chat_reply_prefix": "[bot] ",
"group_chat_prefix": ["@bot"], "group_chat_prefix": ["@bot"],
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],
"image_create_prefix": ["画", "看", "找"], "image_create_prefix": ["画", "看", "找"],
"conversation_max_tokens": 1000,
"speech_recognition": false, "speech_recognition": false,
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。",
"expires_in_seconds": 3600
"voice_reply_voice": false,
"conversation_max_tokens": 1000,
"expires_in_seconds": 3600,
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。"
} }

Loading…
Cancel
Save