@@ -6,3 +6,4 @@ venv* | |||||
config.json | config.json | ||||
QR.png | QR.png | ||||
nohup.out | nohup.out | ||||
tmp |
@@ -71,6 +71,14 @@ cd chatgpt-on-wechat/ | |||||
```bash | ```bash | ||||
pip3 install itchat-uos==1.5.0.dev0 | pip3 install itchat-uos==1.5.0.dev0 | ||||
pip3 install --upgrade openai | pip3 install --upgrade openai | ||||
如果使用百度的语音识别,需要安装百度的pythonSDK | |||||
pip3 install baidu-aip chardet | |||||
如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak | |||||
pip3 install SpeechRecognition | |||||
--在MacOS中安装ffmpeg,brew install ffmpeg espeak | |||||
--在Windows中安装ffmpeg,下载ffmpeg.exe | |||||
--在Linux中安装ffmpeg,apt-get install ffmpeg espeak | |||||
``` | ``` | ||||
注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。 | 注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。 | ||||
@@ -112,7 +120,11 @@ cp config-template.json config.json | |||||
+ 默认只要被人 @ 就会触发机器人自动回复;另外群聊天中只要检测到以 "@bot" 开头的内容,同样会自动回复(方便自己触发),这对应配置项 `group_chat_prefix` | + 默认只要被人 @ 就会触发机器人自动回复;另外群聊天中只要检测到以 "@bot" 开头的内容,同样会自动回复(方便自己触发),这对应配置项 `group_chat_prefix` | ||||
+ 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay)) | + 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay)) | ||||
**3.其他配置** | |||||
**3.语音识别** | |||||
+ 配置`speech_recognition=true`开启语音识别,默认使用openai的whisper模型 | |||||
+ 配置`voice_reply_voice=true`语音回复语音,但是需要配置对应语音合成平台的key,由于itchat协议的限制,只能发送语音mp3文件。使用wechaty则回复的是微信语音。 | |||||
**4.其他配置** | |||||
+ `proxy`:由于目前 `openai` 接口国内无法访问,需配置代理客户端的地址,详情参考 [#351](https://github.com/zhayujie/chatgpt-on-wechat/issues/351) | + `proxy`:由于目前 `openai` 接口国内无法访问,需配置代理客户端的地址,详情参考 [#351](https://github.com/zhayujie/chatgpt-on-wechat/issues/351) | ||||
+ 对于图像生成,在满足个人或群组触发条件外,还需要额外的关键词前缀来触发,对应配置 `image_create_prefix ` | + 对于图像生成,在满足个人或群组触发条件外,还需要额外的关键词前缀来触发,对应配置 `image_create_prefix ` | ||||
@@ -1,4 +1,5 @@ | |||||
from bot import bot_factory | from bot import bot_factory | ||||
from voice import voice_factory | |||||
class Bridge(object): | class Bridge(object): | ||||
@@ -7,3 +8,9 @@ class Bridge(object): | |||||
def fetch_reply_content(self, query, context): | def fetch_reply_content(self, query, context): | ||||
return bot_factory.create_bot("chatGPT").reply(query, context) | return bot_factory.create_bot("chatGPT").reply(query, context) | ||||
def fetch_voice_to_text(self, voiceFile): | |||||
return voice_factory.create_voice("openai").voiceToText(voiceFile) | |||||
def fetch_text_to_voice(self, text): | |||||
return voice_factory.create_voice("baidu").textToVoice(text) |
@@ -11,7 +11,7 @@ class Channel(object): | |||||
""" | """ | ||||
raise NotImplementedError | raise NotImplementedError | ||||
def handle(self, msg): | |||||
def handle_text(self, msg): | |||||
""" | """ | ||||
process received msg | process received msg | ||||
:param msg: message object | :param msg: message object | ||||
@@ -29,3 +29,9 @@ class Channel(object): | |||||
def build_reply_content(self, query, context=None): | def build_reply_content(self, query, context=None): | ||||
return Bridge().fetch_reply_content(query, context) | return Bridge().fetch_reply_content(query, context) | ||||
def build_voice_to_text(self, voice_file): | |||||
return Bridge().fetch_voice_to_text(voice_file) | |||||
def build_text_to_voice(self, text): | |||||
return Bridge().fetch_text_to_voice(text) |
@@ -3,12 +3,14 @@ | |||||
""" | """ | ||||
wechat channel | wechat channel | ||||
""" | """ | ||||
import itchat | import itchat | ||||
import json | import json | ||||
from itchat.content import * | from itchat.content import * | ||||
from channel.channel import Channel | from channel.channel import Channel | ||||
from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
from common.log import logger | from common.log import logger | ||||
from common.tmp_dir import TmpDir | |||||
from config import conf | from config import conf | ||||
import requests | import requests | ||||
import io | import io | ||||
@@ -18,7 +20,7 @@ thread_pool = ThreadPoolExecutor(max_workers=8) | |||||
@itchat.msg_register(TEXT) | @itchat.msg_register(TEXT) | ||||
def handler_single_msg(msg): | def handler_single_msg(msg): | ||||
WechatChannel().handle(msg) | |||||
WechatChannel().handle_text(msg) | |||||
return None | return None | ||||
@@ -28,6 +30,12 @@ def handler_group_msg(msg): | |||||
return None | return None | ||||
@itchat.msg_register(VOICE) | |||||
def handler_single_voice(msg): | |||||
WechatChannel().handle_voice(msg) | |||||
return None | |||||
class WechatChannel(Channel): | class WechatChannel(Channel): | ||||
def __init__(self): | def __init__(self): | ||||
pass | pass | ||||
@@ -39,12 +47,27 @@ class WechatChannel(Channel): | |||||
# start message listener | # start message listener | ||||
itchat.run() | itchat.run() | ||||
def handle(self, msg): | |||||
logger.debug("[WX]receive msg: " + json.dumps(msg, ensure_ascii=False)) | |||||
def handle_voice(self, msg): | |||||
if conf().get('speech_recognition') != True : | |||||
return | |||||
logger.debug("[WX]receive voice msg: " + msg['FileName']) | |||||
thread_pool.submit(self._do_handle_voice, msg) | |||||
def _do_handle_voice(self, msg): | |||||
fileName = TmpDir().path() + msg['FileName'] | |||||
msg.download(fileName) | |||||
content = super().build_voice_to_text(fileName) | |||||
self._handle_single_msg(msg, content, conf().get('voice_reply_voice')) | |||||
def handle_text(self, msg): | |||||
logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False)) | |||||
content = msg['Text'] | |||||
self._handle_single_msg(msg, content, False) | |||||
def _handle_single_msg(self, msg, content, reply_voice=False): | |||||
from_user_id = msg['FromUserName'] | from_user_id = msg['FromUserName'] | ||||
to_user_id = msg['ToUserName'] # 接收人id | to_user_id = msg['ToUserName'] # 接收人id | ||||
other_user_id = msg['User']['UserName'] # 对手方id | other_user_id = msg['User']['UserName'] # 对手方id | ||||
content = msg['Text'] | |||||
match_prefix = self.check_prefix(content, conf().get('single_chat_prefix')) | match_prefix = self.check_prefix(content, conf().get('single_chat_prefix')) | ||||
if "」\n- - - - - - - - - - - - - - -" in content: | if "」\n- - - - - - - - - - - - - - -" in content: | ||||
logger.debug("[WX]reference query skipped") | logger.debug("[WX]reference query skipped") | ||||
@@ -60,9 +83,10 @@ class WechatChannel(Channel): | |||||
if img_match_prefix: | if img_match_prefix: | ||||
content = content.split(img_match_prefix, 1)[1].strip() | content = content.split(img_match_prefix, 1)[1].strip() | ||||
thread_pool.submit(self._do_send_img, content, from_user_id) | thread_pool.submit(self._do_send_img, content, from_user_id) | ||||
else: | |||||
thread_pool.submit(self._do_send, content, from_user_id) | |||||
elif reply_voice: | |||||
thread_pool.submit(self._do_send_voice, content, from_user_id) | |||||
else : | |||||
thread_pool.submit(self._do_send_text, content, from_user_id) | |||||
elif to_user_id == other_user_id and match_prefix: | elif to_user_id == other_user_id and match_prefix: | ||||
# 自己给好友发送消息 | # 自己给好友发送消息 | ||||
str_list = content.split(match_prefix, 1) | str_list = content.split(match_prefix, 1) | ||||
@@ -72,8 +96,10 @@ class WechatChannel(Channel): | |||||
if img_match_prefix: | if img_match_prefix: | ||||
content = content.split(img_match_prefix, 1)[1].strip() | content = content.split(img_match_prefix, 1)[1].strip() | ||||
thread_pool.submit(self._do_send_img, content, to_user_id) | thread_pool.submit(self._do_send_img, content, to_user_id) | ||||
elif reply_voice: | |||||
thread_pool.submit(self._do_send_voice, content, to_user_id) | |||||
else: | else: | ||||
thread_pool.submit(self._do_send, content, to_user_id) | |||||
thread_pool.submit(self._do_send_text, content, to_user_id) | |||||
def handle_group(self, msg): | def handle_group(self, msg): | ||||
@@ -105,10 +131,24 @@ class WechatChannel(Channel): | |||||
thread_pool.submit(self._do_send_group, content, msg) | thread_pool.submit(self._do_send_group, content, msg) | ||||
def send(self, msg, receiver): | def send(self, msg, receiver): | ||||
logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver)) | |||||
itchat.send(msg, toUserName=receiver) | itchat.send(msg, toUserName=receiver) | ||||
logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver)) | |||||
def _do_send(self, query, reply_user_id): | |||||
def _do_send_voice(self, query, reply_user_id): | |||||
try: | |||||
if not query: | |||||
return | |||||
context = dict() | |||||
context['from_user_id'] = reply_user_id | |||||
reply_text = super().build_reply_content(query, context) | |||||
if reply_text: | |||||
replyFile = super().build_text_to_voice(reply_text) | |||||
itchat.send_file(replyFile, toUserName=reply_user_id) | |||||
logger.info('[WX] sendFile={}, receiver={}'.format(replyFile, reply_user_id)) | |||||
except Exception as e: | |||||
logger.exception(e) | |||||
def _do_send_text(self, query, reply_user_id): | |||||
try: | try: | ||||
if not query: | if not query: | ||||
return | return | ||||
@@ -138,8 +178,8 @@ class WechatChannel(Channel): | |||||
image_storage.seek(0) | image_storage.seek(0) | ||||
# 图片发送 | # 图片发送 | ||||
logger.info('[WX] sendImage, receiver={}'.format(reply_user_id)) | |||||
itchat.send_image(image_storage, reply_user_id) | itchat.send_image(image_storage, reply_user_id) | ||||
logger.info('[WX] sendImage, receiver={}'.format(reply_user_id)) | |||||
except Exception as e: | except Exception as e: | ||||
logger.exception(e) | logger.exception(e) | ||||
@@ -0,0 +1,20 @@ | |||||
import os | |||||
import pathlib | |||||
from config import conf | |||||
class TmpDir(object): | |||||
"""A temporary directory that is deleted when the object is destroyed. | |||||
""" | |||||
tmpFilePath = pathlib.Path('./tmp/') | |||||
def __init__(self): | |||||
pathExists = os.path.exists(self.tmpFilePath) | |||||
if not pathExists and conf().get('speech_recognition') == True: | |||||
os.makedirs(self.tmpFilePath) | |||||
def path(self): | |||||
return str(self.tmpFilePath) + '/' | |||||
@@ -7,6 +7,11 @@ | |||||
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], | "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], | ||||
"image_create_prefix": ["画", "看", "找"], | "image_create_prefix": ["画", "看", "找"], | ||||
"conversation_max_tokens": 1000, | "conversation_max_tokens": 1000, | ||||
"speech_recognition": false, | |||||
"voice_reply_voice": false, | |||||
"baidu_app_id": "YOUR BAIDU APP ID", | |||||
"baidu_api_key": "YOUR BAIDU API KEY", | |||||
"baidu_secret_key": "YOUR BAIDU SERVICE KEY", | |||||
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", | "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", | ||||
"expires_in_seconds": 3600 | "expires_in_seconds": 3600 | ||||
} | } |
@@ -0,0 +1,36 @@ | |||||
""" | |||||
baidu voice service | |||||
""" | |||||
import time | |||||
from aip import AipSpeech | |||||
from common.log import logger | |||||
from common.tmp_dir import TmpDir | |||||
from voice.voice import Voice | |||||
from config import conf | |||||
class BaiduVoice(Voice): | |||||
APP_ID = conf().get('baidu_app_id') | |||||
API_KEY = conf().get('baidu_api_key') | |||||
SECRET_KEY = conf().get('baidu_secret_key') | |||||
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) | |||||
def __init__(self): | |||||
pass | |||||
def voiceToText(self, voice_file): | |||||
pass | |||||
def textToVoice(self, text): | |||||
result = self.client.synthesis(text, 'zh', 1, { | |||||
'spd': 5, 'pit': 5, 'vol': 5, 'per': 111 | |||||
}) | |||||
if not isinstance(result, dict): | |||||
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' | |||||
with open(fileName, 'wb') as f: | |||||
f.write(result) | |||||
logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName)) | |||||
return fileName | |||||
else: | |||||
logger.error('[Baidu] textToVoice error={}'.format(result)) | |||||
return None |
@@ -0,0 +1,51 @@ | |||||
""" | |||||
google voice service | |||||
""" | |||||
import pathlib | |||||
import subprocess | |||||
import time | |||||
import speech_recognition | |||||
import pyttsx3 | |||||
from common.log import logger | |||||
from common.tmp_dir import TmpDir | |||||
from voice.voice import Voice | |||||
class GoogleVoice(Voice): | |||||
recognizer = speech_recognition.Recognizer() | |||||
engine = pyttsx3.init() | |||||
def __init__(self): | |||||
# 语速 | |||||
self.engine.setProperty('rate', 125) | |||||
# 音量 | |||||
self.engine.setProperty('volume', 1.0) | |||||
# 0为男声,1为女声 | |||||
voices = self.engine.getProperty('voices') | |||||
self.engine.setProperty('voice', voices[1].id) | |||||
def voiceToText(self, voice_file): | |||||
new_file = voice_file.replace('.mp3', '.wav') | |||||
subprocess.call('ffmpeg -i ' + voice_file + | |||||
' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True) | |||||
with speech_recognition.AudioFile(new_file) as source: | |||||
audio = self.recognizer.record(source) | |||||
try: | |||||
text = self.recognizer.recognize_google(audio, language='zh-CN') | |||||
logger.info( | |||||
'[Google] voiceToText text={} voice file name={}'.format(text, voice_file)) | |||||
return text | |||||
except speech_recognition.UnknownValueError: | |||||
return "抱歉,我听不懂。" | |||||
except speech_recognition.RequestError as e: | |||||
return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e) | |||||
def textToVoice(self, text): | |||||
textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' | |||||
self.engine.save_to_file(text, textFile) | |||||
self.engine.runAndWait() | |||||
logger.info( | |||||
'[Google] textToVoice text={} voice file name={}'.format(text, textFile)) | |||||
return textFile |
@@ -0,0 +1,27 @@ | |||||
""" | |||||
google voice service | |||||
""" | |||||
import json | |||||
import openai | |||||
from config import conf | |||||
from common.log import logger | |||||
from voice.voice import Voice | |||||
class OpenaiVoice(Voice): | |||||
def __init__(self): | |||||
openai.api_key = conf().get('open_ai_api_key') | |||||
def voiceToText(self, voice_file): | |||||
logger.debug( | |||||
'[Openai] voice file name={}'.format(voice_file)) | |||||
file = open(voice_file, "rb") | |||||
reply = openai.Audio.transcribe("whisper-1", file) | |||||
text = reply["text"] | |||||
logger.info( | |||||
'[Openai] voiceToText text={} voice file name={}'.format(text, voice_file)) | |||||
return text | |||||
def textToVoice(self, text): | |||||
pass |
@@ -0,0 +1,16 @@ | |||||
""" | |||||
Voice service abstract class | |||||
""" | |||||
class Voice(object): | |||||
def voiceToText(self, voice_file): | |||||
""" | |||||
Send voice to voice service and get text | |||||
""" | |||||
raise NotImplementedError | |||||
def textToVoice(self, text): | |||||
""" | |||||
Send text to voice service and get voice | |||||
""" | |||||
raise NotImplementedError |
@@ -0,0 +1,20 @@ | |||||
""" | |||||
voice factory | |||||
""" | |||||
def create_voice(voice_type): | |||||
""" | |||||
create a voice instance | |||||
:param voice_type: voice type code | |||||
:return: voice instance | |||||
""" | |||||
if voice_type == 'baidu': | |||||
from voice.baidu.baidu_voice import BaiduVoice | |||||
return BaiduVoice() | |||||
elif voice_type == 'google': | |||||
from voice.google.google_voice import GoogleVoice | |||||
return GoogleVoice() | |||||
elif voice_type == 'openai': | |||||
from voice.openai.openai_voice import OpenaiVoice | |||||
return OpenaiVoice() | |||||
raise RuntimeError |