ソースを参照

[voice] using baidu service to gen reply voice

master
wanggang 1年前
コミット
3db452ef71
6個のファイルの変更48行の追加17行の削除
  1. +3
    -3
      README.md
  2. +7
    -12
      channel/wechat/wechat_channel.py
  3. +20
    -0
      common/tmp_dir.py
  4. +1
    -0
      config-template.json
  5. +15
    -1
      voice/baidu/baidu_voice.py
  6. +2
    -1
      voice/google/google_voice.py

+ 3
- 3
README.md ファイルの表示

@@ -72,9 +72,8 @@ cd chatgpt-on-wechat/
pip3 install itchat-uos==1.5.0.dev0
pip3 install --upgrade openai

默认使用openai的whisper-1模型
如果使用百度的语音识别,需要安装百度的pythonSDK
pip3 install baidu-aip
pip3 install baidu-aip chardet
如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak
pip3 install SpeechRecognition
--在MacOS中安装ffmpeg,brew install ffmpeg espeak
@@ -122,7 +121,8 @@ cp config-template.json config.json
+ 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay))

**3.语音识别**
+ 配置`speech_recognition=true`开启语音识别
+ 配置`speech_recognition=true`开启语音识别,默认使用openai的whisper模型
+ 配置`voice_reply_voice=true`语音回复语音,但是需要配置对应语音合成平台的key

**4.其他配置**



+ 7
- 12
channel/wechat/wechat_channel.py ファイルの表示

@@ -4,14 +4,13 @@
wechat channel
"""

import os
import pathlib
import itchat
import json
from itchat.content import *
from channel.channel import Channel
from concurrent.futures import ThreadPoolExecutor
from common.log import logger
from common.tmp_dir import TmpDir
from config import conf
import requests
import io
@@ -38,12 +37,8 @@ def handler_single_voice(msg):


class WechatChannel(Channel):
tmpFilePath = pathlib.Path('./tmp/')

def __init__(self):
pathExists = os.path.exists(self.tmpFilePath)
if not pathExists and conf().get('speech_recognition') == True:
os.makedirs(self.tmpFilePath)
pass

def startup(self):
# login by scan QRCode
@@ -59,17 +54,17 @@ class WechatChannel(Channel):
thread_pool.submit(self._do_handle_voice, msg)

def _do_handle_voice(self, msg):
fileName = self.tmpFilePath+msg['FileName']
fileName = TmpDir().path() + msg['FileName']
msg.download(fileName)
content = super().build_voice_to_text(fileName)
self._handle_single_msg(msg, content, False)
self._handle_single_msg(msg, content, conf().get('voice_reply_voice'))

def handle_text(self, msg):
logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
content = msg['Text']
self._handle_single_msg(msg, content, False)

def _handle_single_msg(self, msg, content, is_voice):
def _handle_single_msg(self, msg, content, reply_voice=False):
from_user_id = msg['FromUserName']
to_user_id = msg['ToUserName'] # 接收人id
other_user_id = msg['User']['UserName'] # 对手方id
@@ -88,7 +83,7 @@ class WechatChannel(Channel):
if img_match_prefix:
content = content.split(img_match_prefix, 1)[1].strip()
thread_pool.submit(self._do_send_img, content, from_user_id)
elif is_voice:
elif reply_voice:
thread_pool.submit(self._do_send_voice, content, from_user_id)
else :
thread_pool.submit(self._do_send_text, content, from_user_id)
@@ -101,7 +96,7 @@ class WechatChannel(Channel):
if img_match_prefix:
content = content.split(img_match_prefix, 1)[1].strip()
thread_pool.submit(self._do_send_img, content, to_user_id)
elif is_voice:
elif reply_voice:
thread_pool.submit(self._do_send_voice, content, to_user_id)
else:
thread_pool.submit(self._do_send_text, content, to_user_id)


+ 20
- 0
common/tmp_dir.py ファイルの表示

@@ -0,0 +1,20 @@

import os
import pathlib
from config import conf


class TmpDir(object):
"""A temporary directory that is deleted when the object is destroyed.
"""

tmpFilePath = pathlib.Path('./tmp/')
def __init__(self):
pathExists = os.path.exists(self.tmpFilePath)
if not pathExists and conf().get('speech_recognition') == True:
os.makedirs(self.tmpFilePath)

def path(self):
return str(self.tmpFilePath) + '/'

+ 1
- 0
config-template.json ファイルの表示

@@ -8,6 +8,7 @@
"image_create_prefix": ["画", "看", "找"],
"conversation_max_tokens": 1000,
"speech_recognition": false,
"voice_reply_voice": false,
"baidu_app_id": "YOUR BAIDU APP ID",
"baidu_api_key": "YOUR BAIDU API KEY",
"baidu_secret_key": "YOUR BAIDU SERVICE KEY",


+ 15
- 1
voice/baidu/baidu_voice.py ファイルの表示

@@ -2,7 +2,10 @@
"""
baidu voice service
"""
import time
from aip import AipSpeech
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice
from config import conf

@@ -19,4 +22,15 @@ class BaiduVoice(Voice):
pass

def textToVoice(self, text):
pass
result = self.client.synthesis(text, 'zh', 1, {
'spd': 5, 'pit': 5, 'vol': 5, 'per': 111
})
if not isinstance(result, dict):
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
with open(fileName, 'wb') as f:
f.write(result)
logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
return fileName
else:
logger.error('[Baidu] textToVoice error={}'.format(result))
return None

+ 2
- 1
voice/google/google_voice.py ファイルの表示

@@ -9,6 +9,7 @@ import time
import speech_recognition
import pyttsx3
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice


@@ -42,7 +43,7 @@ class GoogleVoice(Voice):
return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e)

def textToVoice(self, text):
textFile = self.tmpFilePath + '语音回复_' + str(int(time.time())) + '.mp3'
textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
self.engine.save_to_file(text, textFile)
self.engine.runAndWait()
logger.info(


読み込み中…
キャンセル
保存