@@ -72,9 +72,8 @@ cd chatgpt-on-wechat/ | |||||
pip3 install itchat-uos==1.5.0.dev0 | pip3 install itchat-uos==1.5.0.dev0 | ||||
pip3 install --upgrade openai | pip3 install --upgrade openai | ||||
默认使用openai的whisper-1模型 | |||||
如果使用百度的语音识别,需要安装百度的pythonSDK | 如果使用百度的语音识别,需要安装百度的pythonSDK | ||||
pip3 install baidu-aip | |||||
pip3 install baidu-aip chardet | |||||
如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak | 如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak | ||||
pip3 install SpeechRecognition | pip3 install SpeechRecognition | ||||
--在MacOS中安装ffmpeg,brew install ffmpeg espeak | --在MacOS中安装ffmpeg,brew install ffmpeg espeak | ||||
@@ -122,7 +121,8 @@ cp config-template.json config.json | |||||
+ 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay)) | + 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay)) | ||||
**3.语音识别** | **3.语音识别** | ||||
+ 配置`speech_recognition=true`开启语音识别 | |||||
+ 配置`speech_recognition=true`开启语音识别,默认使用openai的whisper模型 | |||||
+ 配置`voice_reply_voice=true`语音回复语音,但是需要配置对应语音合成平台的key | |||||
**4.其他配置** | **4.其他配置** | ||||
@@ -4,14 +4,13 @@ | |||||
wechat channel | wechat channel | ||||
""" | """ | ||||
import os | |||||
import pathlib | |||||
import itchat | import itchat | ||||
import json | import json | ||||
from itchat.content import * | from itchat.content import * | ||||
from channel.channel import Channel | from channel.channel import Channel | ||||
from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
from common.log import logger | from common.log import logger | ||||
from common.tmp_dir import TmpDir | |||||
from config import conf | from config import conf | ||||
import requests | import requests | ||||
import io | import io | ||||
@@ -38,12 +37,8 @@ def handler_single_voice(msg): | |||||
class WechatChannel(Channel): | class WechatChannel(Channel): | ||||
tmpFilePath = pathlib.Path('./tmp/') | |||||
def __init__(self): | def __init__(self): | ||||
pathExists = os.path.exists(self.tmpFilePath) | |||||
if not pathExists and conf().get('speech_recognition') == True: | |||||
os.makedirs(self.tmpFilePath) | |||||
pass | |||||
def startup(self): | def startup(self): | ||||
# login by scan QRCode | # login by scan QRCode | ||||
@@ -59,17 +54,17 @@ class WechatChannel(Channel): | |||||
thread_pool.submit(self._do_handle_voice, msg) | thread_pool.submit(self._do_handle_voice, msg) | ||||
def _do_handle_voice(self, msg): | def _do_handle_voice(self, msg): | ||||
fileName = self.tmpFilePath+msg['FileName'] | |||||
fileName = TmpDir().path() + msg['FileName'] | |||||
msg.download(fileName) | msg.download(fileName) | ||||
content = super().build_voice_to_text(fileName) | content = super().build_voice_to_text(fileName) | ||||
self._handle_single_msg(msg, content, False) | |||||
self._handle_single_msg(msg, content, conf().get('voice_reply_voice')) | |||||
def handle_text(self, msg): | def handle_text(self, msg): | ||||
logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False)) | logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False)) | ||||
content = msg['Text'] | content = msg['Text'] | ||||
self._handle_single_msg(msg, content, False) | self._handle_single_msg(msg, content, False) | ||||
def _handle_single_msg(self, msg, content, is_voice): | |||||
def _handle_single_msg(self, msg, content, reply_voice=False): | |||||
from_user_id = msg['FromUserName'] | from_user_id = msg['FromUserName'] | ||||
to_user_id = msg['ToUserName'] # 接收人id | to_user_id = msg['ToUserName'] # 接收人id | ||||
other_user_id = msg['User']['UserName'] # 对手方id | other_user_id = msg['User']['UserName'] # 对手方id | ||||
@@ -88,7 +83,7 @@ class WechatChannel(Channel): | |||||
if img_match_prefix: | if img_match_prefix: | ||||
content = content.split(img_match_prefix, 1)[1].strip() | content = content.split(img_match_prefix, 1)[1].strip() | ||||
thread_pool.submit(self._do_send_img, content, from_user_id) | thread_pool.submit(self._do_send_img, content, from_user_id) | ||||
elif is_voice: | |||||
elif reply_voice: | |||||
thread_pool.submit(self._do_send_voice, content, from_user_id) | thread_pool.submit(self._do_send_voice, content, from_user_id) | ||||
else : | else : | ||||
thread_pool.submit(self._do_send_text, content, from_user_id) | thread_pool.submit(self._do_send_text, content, from_user_id) | ||||
@@ -101,7 +96,7 @@ class WechatChannel(Channel): | |||||
if img_match_prefix: | if img_match_prefix: | ||||
content = content.split(img_match_prefix, 1)[1].strip() | content = content.split(img_match_prefix, 1)[1].strip() | ||||
thread_pool.submit(self._do_send_img, content, to_user_id) | thread_pool.submit(self._do_send_img, content, to_user_id) | ||||
elif is_voice: | |||||
elif reply_voice: | |||||
thread_pool.submit(self._do_send_voice, content, to_user_id) | thread_pool.submit(self._do_send_voice, content, to_user_id) | ||||
else: | else: | ||||
thread_pool.submit(self._do_send_text, content, to_user_id) | thread_pool.submit(self._do_send_text, content, to_user_id) | ||||
@@ -0,0 +1,20 @@ | |||||
import os | |||||
import pathlib | |||||
from config import conf | |||||
class TmpDir(object): | |||||
"""A temporary directory that is deleted when the object is destroyed. | |||||
""" | |||||
tmpFilePath = pathlib.Path('./tmp/') | |||||
def __init__(self): | |||||
pathExists = os.path.exists(self.tmpFilePath) | |||||
if not pathExists and conf().get('speech_recognition') == True: | |||||
os.makedirs(self.tmpFilePath) | |||||
def path(self): | |||||
return str(self.tmpFilePath) + '/' | |||||
@@ -8,6 +8,7 @@ | |||||
"image_create_prefix": ["画", "看", "找"], | "image_create_prefix": ["画", "看", "找"], | ||||
"conversation_max_tokens": 1000, | "conversation_max_tokens": 1000, | ||||
"speech_recognition": false, | "speech_recognition": false, | ||||
"voice_reply_voice": false, | |||||
"baidu_app_id": "YOUR BAIDU APP ID", | "baidu_app_id": "YOUR BAIDU APP ID", | ||||
"baidu_api_key": "YOUR BAIDU API KEY", | "baidu_api_key": "YOUR BAIDU API KEY", | ||||
"baidu_secret_key": "YOUR BAIDU SERVICE KEY", | "baidu_secret_key": "YOUR BAIDU SERVICE KEY", | ||||
@@ -2,7 +2,10 @@ | |||||
""" | """ | ||||
baidu voice service | baidu voice service | ||||
""" | """ | ||||
import time | |||||
from aip import AipSpeech | from aip import AipSpeech | ||||
from common.log import logger | |||||
from common.tmp_dir import TmpDir | |||||
from voice.voice import Voice | from voice.voice import Voice | ||||
from config import conf | from config import conf | ||||
@@ -19,4 +22,15 @@ class BaiduVoice(Voice): | |||||
pass | pass | ||||
def textToVoice(self, text): | def textToVoice(self, text): | ||||
pass | |||||
result = self.client.synthesis(text, 'zh', 1, { | |||||
'spd': 5, 'pit': 5, 'vol': 5, 'per': 111 | |||||
}) | |||||
if not isinstance(result, dict): | |||||
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' | |||||
with open(fileName, 'wb') as f: | |||||
f.write(result) | |||||
logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName)) | |||||
return fileName | |||||
else: | |||||
logger.error('[Baidu] textToVoice error={}'.format(result)) | |||||
return None |
@@ -9,6 +9,7 @@ import time | |||||
import speech_recognition | import speech_recognition | ||||
import pyttsx3 | import pyttsx3 | ||||
from common.log import logger | from common.log import logger | ||||
from common.tmp_dir import TmpDir | |||||
from voice.voice import Voice | from voice.voice import Voice | ||||
@@ -42,7 +43,7 @@ class GoogleVoice(Voice): | |||||
return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e) | return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e) | ||||
def textToVoice(self, text): | def textToVoice(self, text): | ||||
textFile = self.tmpFilePath + '语音回复_' + str(int(time.time())) + '.mp3' | |||||
textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' | |||||
self.engine.save_to_file(text, textFile) | self.engine.save_to_file(text, textFile) | ||||
self.engine.runAndWait() | self.engine.runAndWait() | ||||
logger.info( | logger.info( | ||||