@@ -72,9 +72,8 @@ cd chatgpt-on-wechat/ | |||
pip3 install itchat-uos==1.5.0.dev0 | |||
pip3 install --upgrade openai | |||
默认使用openai的whisper-1模型 | |||
如果使用百度的语音识别,需要安装百度的pythonSDK | |||
pip3 install baidu-aip | |||
pip3 install baidu-aip chardet | |||
如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak | |||
pip3 install SpeechRecognition | |||
--在MacOS中安装ffmpeg,brew install ffmpeg espeak | |||
@@ -122,7 +121,8 @@ cp config-template.json config.json | |||
+ 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay)) | |||
**3.语音识别** | |||
+ 配置`speech_recognition=true`开启语音识别 | |||
+ 配置`speech_recognition=true`开启语音识别,默认使用openai的whisper模型 | |||
+ 配置`voice_reply_voice=true`语音回复语音,但是需要配置对应语音合成平台的key | |||
**4.其他配置** | |||
@@ -4,14 +4,13 @@ | |||
wechat channel | |||
""" | |||
import os | |||
import pathlib | |||
import itchat | |||
import json | |||
from itchat.content import * | |||
from channel.channel import Channel | |||
from concurrent.futures import ThreadPoolExecutor | |||
from common.log import logger | |||
from common.tmp_dir import TmpDir | |||
from config import conf | |||
import requests | |||
import io | |||
@@ -38,12 +37,8 @@ def handler_single_voice(msg): | |||
class WechatChannel(Channel): | |||
tmpFilePath = pathlib.Path('./tmp/') | |||
def __init__(self): | |||
pathExists = os.path.exists(self.tmpFilePath) | |||
if not pathExists and conf().get('speech_recognition') == True: | |||
os.makedirs(self.tmpFilePath) | |||
pass | |||
def startup(self): | |||
# login by scan QRCode | |||
@@ -59,17 +54,17 @@ class WechatChannel(Channel): | |||
thread_pool.submit(self._do_handle_voice, msg) | |||
def _do_handle_voice(self, msg): | |||
fileName = self.tmpFilePath+msg['FileName'] | |||
fileName = TmpDir().path() + msg['FileName'] | |||
msg.download(fileName) | |||
content = super().build_voice_to_text(fileName) | |||
self._handle_single_msg(msg, content, False) | |||
self._handle_single_msg(msg, content, conf().get('voice_reply_voice')) | |||
def handle_text(self, msg): | |||
logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False)) | |||
content = msg['Text'] | |||
self._handle_single_msg(msg, content, False) | |||
def _handle_single_msg(self, msg, content, is_voice): | |||
def _handle_single_msg(self, msg, content, reply_voice=False): | |||
from_user_id = msg['FromUserName'] | |||
to_user_id = msg['ToUserName'] # 接收人id | |||
other_user_id = msg['User']['UserName'] # 对手方id | |||
@@ -88,7 +83,7 @@ class WechatChannel(Channel): | |||
if img_match_prefix: | |||
content = content.split(img_match_prefix, 1)[1].strip() | |||
thread_pool.submit(self._do_send_img, content, from_user_id) | |||
elif is_voice: | |||
elif reply_voice: | |||
thread_pool.submit(self._do_send_voice, content, from_user_id) | |||
else : | |||
thread_pool.submit(self._do_send_text, content, from_user_id) | |||
@@ -101,7 +96,7 @@ class WechatChannel(Channel): | |||
if img_match_prefix: | |||
content = content.split(img_match_prefix, 1)[1].strip() | |||
thread_pool.submit(self._do_send_img, content, to_user_id) | |||
elif is_voice: | |||
elif reply_voice: | |||
thread_pool.submit(self._do_send_voice, content, to_user_id) | |||
else: | |||
thread_pool.submit(self._do_send_text, content, to_user_id) | |||
@@ -0,0 +1,20 @@ | |||
import os | |||
import pathlib | |||
from config import conf | |||
class TmpDir(object): | |||
"""A temporary directory that is deleted when the object is destroyed. | |||
""" | |||
tmpFilePath = pathlib.Path('./tmp/') | |||
def __init__(self): | |||
pathExists = os.path.exists(self.tmpFilePath) | |||
if not pathExists and conf().get('speech_recognition') == True: | |||
os.makedirs(self.tmpFilePath) | |||
def path(self): | |||
return str(self.tmpFilePath) + '/' | |||
@@ -8,6 +8,7 @@ | |||
"image_create_prefix": ["画", "看", "找"], | |||
"conversation_max_tokens": 1000, | |||
"speech_recognition": false, | |||
"voice_reply_voice": false, | |||
"baidu_app_id": "YOUR BAIDU APP ID", | |||
"baidu_api_key": "YOUR BAIDU API KEY", | |||
"baidu_secret_key": "YOUR BAIDU SERVICE KEY", | |||
@@ -2,7 +2,10 @@ | |||
""" | |||
baidu voice service | |||
""" | |||
import time | |||
from aip import AipSpeech | |||
from common.log import logger | |||
from common.tmp_dir import TmpDir | |||
from voice.voice import Voice | |||
from config import conf | |||
@@ -19,4 +22,15 @@ class BaiduVoice(Voice): | |||
pass | |||
def textToVoice(self, text): | |||
pass | |||
result = self.client.synthesis(text, 'zh', 1, { | |||
'spd': 5, 'pit': 5, 'vol': 5, 'per': 111 | |||
}) | |||
if not isinstance(result, dict): | |||
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' | |||
with open(fileName, 'wb') as f: | |||
f.write(result) | |||
logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName)) | |||
return fileName | |||
else: | |||
logger.error('[Baidu] textToVoice error={}'.format(result)) | |||
return None |
@@ -9,6 +9,7 @@ import time | |||
import speech_recognition | |||
import pyttsx3 | |||
from common.log import logger | |||
from common.tmp_dir import TmpDir | |||
from voice.voice import Voice | |||
@@ -42,7 +43,7 @@ class GoogleVoice(Voice): | |||
return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e) | |||
def textToVoice(self, text): | |||
textFile = self.tmpFilePath + '语音回复_' + str(int(time.time())) + '.mp3' | |||
textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3' | |||
self.engine.save_to_file(text, textFile) | |||
self.engine.runAndWait() | |||
logger.info( | |||