瀏覽代碼

[voice] add google voice support

master
wanggang 1 年之前
父節點
當前提交
d38fc61043
共有 10 個檔案被更改,包括 132 行新增7 行删除
  1. +1
    -0
      .gitignore
  2. +10
    -1
      README.md
  3. +4
    -0
      bridge/bridge.py
  4. +4
    -1
      channel/channel.py
  5. +29
    -5
      channel/wechat/wechat_channel.py
  6. +1
    -0
      config-template.json
  7. +21
    -0
      voice/google/google_voice.py
  8. +10
    -0
      voice/voice.py
  9. +17
    -0
      voice/voice_factory.py
  10. +35
    -0
      voice/xfyun/xfyun_voice.py

+ 1
- 0
.gitignore 查看文件

@@ -6,3 +6,4 @@ venv*
config.json
QR.png
nohup.out
tmp

+ 10
- 1
README.md 查看文件

@@ -71,6 +71,12 @@ cd chatgpt-on-wechat/
```bash
pip3 install itchat-uos==1.5.0.dev0
pip3 install --upgrade openai

如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg
pip3 install speech_recognition
--在MacOS中安装ffmpeg,brew install ffmpeg
--在Windows中安装ffmpeg,下载ffmpeg.exe
--在Linux中安装ffmpeg,apt-get install ffmpeg
```
注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。

@@ -112,7 +118,10 @@ cp config-template.json config.json
+ 默认只要被人 @ 就会触发机器人自动回复;另外群聊天中只要检测到以 "@bot" 开头的内容,同样会自动回复(方便自己触发),这对应配置项 `group_chat_prefix`
+ 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay))

**3.其他配置**
**3.语音识别**
+ 配置`speech_recognition=true`开启语音识别

**4.其他配置**

+ `proxy`:由于目前 `openai` 接口国内无法访问,需配置代理客户端的地址,详情参考 [#351](https://github.com/zhayujie/chatgpt-on-wechat/issues/351)
+ 对于图像生成,在满足个人或群组触发条件外,还需要额外的关键词前缀来触发,对应配置 `image_create_prefix `


+ 4
- 0
bridge/bridge.py 查看文件

@@ -1,4 +1,5 @@
from bot import bot_factory
from voice import voice_factory


class Bridge(object):
@@ -7,3 +8,6 @@ class Bridge(object):

def fetch_reply_content(self, query, context):
return bot_factory.create_bot("chatGPT").reply(query, context)

def fetch_voice_to_text(self, voiceFile):
return voice_factory.create_voice("google").voiceToText(voiceFile)

+ 4
- 1
channel/channel.py 查看文件

@@ -11,7 +11,7 @@ class Channel(object):
"""
raise NotImplementedError

def handle(self, msg):
def handle_text(self, msg):
"""
process received msg
:param msg: message object
@@ -29,3 +29,6 @@ class Channel(object):

def build_reply_content(self, query, context=None):
return Bridge().fetch_reply_content(query, context)

def build_void_text(self, voice_file):
return Bridge().fetch_voice_to_text(voice_file)

+ 29
- 5
channel/wechat/wechat_channel.py 查看文件

@@ -3,6 +3,8 @@
"""
wechat channel
"""

import os
import itchat
import json
from itchat.content import *
@@ -18,7 +20,7 @@ thread_pool = ThreadPoolExecutor(max_workers=8)

@itchat.msg_register(TEXT)
def handler_single_msg(msg):
WechatChannel().handle(msg)
WechatChannel().handle_text(msg)
return None


@@ -28,9 +30,19 @@ def handler_group_msg(msg):
return None


@itchat.msg_register(VOICE)
def handler_single_voice(msg):
WechatChannel().handle_voice(msg)
return None


class WechatChannel(Channel):
tmpFilePath = './tmp/'

def __init__(self):
pass
isExists = os.path.exists(self.tmpFilePath)
if not isExists:
os.makedirs(self.tmpFilePath)

def startup(self):
# login by scan QRCode
@@ -39,12 +51,24 @@ class WechatChannel(Channel):
# start message listener
itchat.run()

def handle(self, msg):
logger.debug("[WX]receive msg: " + json.dumps(msg, ensure_ascii=False))
def handle_voice(self, msg):
if conf().get('speech_recognition') != True :
return
logger.debug("[WX]receive voice msg: ", msg['FileName'])
fileName = msg['FileName']
msg.download(self.tmpFilePath+fileName)
content = super().build_void_text(self.tmpFilePath+fileName)
self._handle_single_msg(msg, content)

def handle_text(self, msg):
logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
content = msg['Text']
self._handle_single_msg(msg, content)

def _handle_single_msg(self, msg, content):
from_user_id = msg['FromUserName']
to_user_id = msg['ToUserName'] # 接收人id
other_user_id = msg['User']['UserName'] # 对手方id
content = msg['Text']
match_prefix = self.check_prefix(content, conf().get('single_chat_prefix'))
if "」\n- - - - - - - - - - - - - - -" in content:
logger.debug("[WX]reference query skipped")


+ 1
- 0
config-template.json 查看文件

@@ -7,6 +7,7 @@
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],
"image_create_prefix": ["画", "看", "找"],
"conversation_max_tokens": 1000,
"speech_recognition": false,
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。",
"expires_in_seconds": 3600
}

+ 21
- 0
voice/google/google_voice.py 查看文件

@@ -0,0 +1,21 @@

"""
google voice service
"""

import subprocess
import speech_recognition
from voice.voice import Voice

class GoogleVoice(Voice):
recognizer = speech_recognition.Recognizer()

def __init__(self):
pass

def voiceToText(self, voice_file):
new_file = voice_file.replace('.mp3', '.wav')
subprocess.call('ffmpeg -i ' + voice_file + ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
with speech_recognition.AudioFile(new_file) as source:
audio = self.recognizer.record(source)
return self.recognizer.recognize_google(audio, language='zh-CN')

+ 10
- 0
voice/voice.py 查看文件

@@ -0,0 +1,10 @@
"""
Voice service abstract class
"""

class Voice(object):
def voiceToText(self, voice_file):
"""
Send voice to voice service and get text
"""
raise NotImplementedError

+ 17
- 0
voice/voice_factory.py 查看文件

@@ -0,0 +1,17 @@
"""
voice factory
"""

def create_voice(voice_type):
"""
create a voice instance
:param voice_type: voice type code
:return: voice instance
"""
if voice_type == 'xfyun':
from voice.xfyun.xfyun_voice import XfyunVoice
return XfyunVoice()
elif voice_type == 'google':
from voice.google.google_voice import GoogleVoice
return GoogleVoice()
raise RuntimeError

+ 35
- 0
voice/xfyun/xfyun_voice.py 查看文件

@@ -0,0 +1,35 @@

"""
科大讯飞 voice service
"""

from voice.voice import Voice

# 科大讯飞语音识别
lfasr_host = 'http://raasr.xfyun.cn/api'
# 请求的接口名
api_prepare = '/prepare'
api_upload = '/upload'
api_merge = '/merge'
api_get_progress = '/getProgress'
api_get_result = '/getResult'
# 文件分片大小10M
file_piece_sice = 10485760
# ——————————————————转写可配置参数————————————————
# 参数可在官网界面(https://doc.xfyun.cn/rest_api/%E8%AF%AD%E9%9F%B3%E8%BD%AC%E5%86%99.html)查看,根据需求可自行在gene_params方法里添加修改
# 转写类型
lfasr_type = 0
# 是否开启分词
has_participle = 'false'
has_seperate = 'true'
# 多候选词个数
max_alternatives = 0
# 子用户标识
suid = ''

class XfyunVoice(Voice):
def __init__(self):
pass

def voiceToText(self, voice_file):
pass

Loading…
取消
儲存