From d38fc6104398810fe50282b56bebca043bdca34f Mon Sep 17 00:00:00 2001 From: wanggang Date: Tue, 7 Mar 2023 14:29:59 +0800 Subject: [PATCH] [voice] add google voice support --- .gitignore | 1 + README.md | 11 +++++++++- bridge/bridge.py | 4 ++++ channel/channel.py | 5 ++++- channel/wechat/wechat_channel.py | 34 ++++++++++++++++++++++++++----- config-template.json | 1 + voice/google/google_voice.py | 21 +++++++++++++++++++ voice/voice.py | 10 +++++++++ voice/voice_factory.py | 17 ++++++++++++++++ voice/xfyun/xfyun_voice.py | 35 ++++++++++++++++++++++++++++++++ 10 files changed, 132 insertions(+), 7 deletions(-) create mode 100644 voice/google/google_voice.py create mode 100644 voice/voice.py create mode 100644 voice/voice_factory.py create mode 100644 voice/xfyun/xfyun_voice.py diff --git a/.gitignore b/.gitignore index c4d7bdc..8bc62f3 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ venv* config.json QR.png nohup.out +tmp diff --git a/README.md b/README.md index a750378..4ff7cea 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,12 @@ cd chatgpt-on-wechat/ ```bash pip3 install itchat-uos==1.5.0.dev0 pip3 install --upgrade openai + +如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg +pip3 install speech_recognition +--在MacOS中安装ffmpeg,brew install ffmpeg +--在Windows中安装ffmpeg,下载ffmpeg.exe +--在Linux中安装ffmpeg,apt-get install ffmpeg ``` 注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。 @@ -112,7 +118,10 @@ cp config-template.json config.json + 默认只要被人 @ 就会触发机器人自动回复;另外群聊天中只要检测到以 "@bot" 开头的内容,同样会自动回复(方便自己触发),这对应配置项 `group_chat_prefix` + 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay)) -**3.其他配置** +**3.语音识别** ++ 配置`speech_recognition=true`开启语音识别 + +**4.其他配置** + `proxy`:由于目前 `openai` 接口国内无法访问,需配置代理客户端的地址,详情参考 [#351](https://github.com/zhayujie/chatgpt-on-wechat/issues/351) + 对于图像生成,在满足个人或群组触发条件外,还需要额外的关键词前缀来触发,对应配置 `image_create_prefix ` diff --git a/bridge/bridge.py b/bridge/bridge.py index 6c164e8..78d950a 100644 --- a/bridge/bridge.py +++ b/bridge/bridge.py @@ -1,4 +1,5 @@ from bot import bot_factory +from voice import voice_factory class Bridge(object): @@ -7,3 +8,6 @@ class Bridge(object): def fetch_reply_content(self, query, context): return bot_factory.create_bot("chatGPT").reply(query, context) + + def fetch_voice_to_text(self, voiceFile): + return voice_factory.create_voice("google").voiceToText(voiceFile) diff --git a/channel/channel.py b/channel/channel.py index e2617d1..d4c0fc5 100644 --- a/channel/channel.py +++ b/channel/channel.py @@ -11,7 +11,7 @@ class Channel(object): """ raise NotImplementedError - def handle(self, msg): + def handle_text(self, msg): """ process received msg :param msg: message object @@ -29,3 +29,6 @@ class Channel(object): def build_reply_content(self, query, context=None): return Bridge().fetch_reply_content(query, context) + + def build_void_text(self, voice_file): + return Bridge().fetch_voice_to_text(voice_file) diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py index 66778f4..b773010 100644 --- a/channel/wechat/wechat_channel.py +++ b/channel/wechat/wechat_channel.py @@ -3,6 +3,8 @@ """ wechat channel """ + +import os import itchat import json from itchat.content import * @@ -18,7 +20,7 @@ thread_pool = ThreadPoolExecutor(max_workers=8) @itchat.msg_register(TEXT) def handler_single_msg(msg): - WechatChannel().handle(msg) + WechatChannel().handle_text(msg) return None @@ -28,9 +30,19 @@ def handler_group_msg(msg): return None +@itchat.msg_register(VOICE) +def handler_single_voice(msg): + WechatChannel().handle_voice(msg) + return None + + class WechatChannel(Channel): + tmpFilePath = './tmp/' + def __init__(self): - pass + isExists = os.path.exists(self.tmpFilePath) + if not isExists: + os.makedirs(self.tmpFilePath) def startup(self): # login by scan QRCode @@ -39,12 +51,24 @@ class WechatChannel(Channel): # start message listener itchat.run() - def handle(self, msg): - logger.debug("[WX]receive msg: " + json.dumps(msg, ensure_ascii=False)) + def handle_voice(self, msg): + if conf().get('speech_recognition') != True : + return + logger.debug("[WX]receive voice msg: ", msg['FileName']) + fileName = msg['FileName'] + msg.download(self.tmpFilePath+fileName) + content = super().build_void_text(self.tmpFilePath+fileName) + self._handle_single_msg(msg, content) + + def handle_text(self, msg): + logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False)) + content = msg['Text'] + self._handle_single_msg(msg, content) + + def _handle_single_msg(self, msg, content): from_user_id = msg['FromUserName'] to_user_id = msg['ToUserName'] # 接收人id other_user_id = msg['User']['UserName'] # 对手方id - content = msg['Text'] match_prefix = self.check_prefix(content, conf().get('single_chat_prefix')) if "」\n- - - - - - - - - - - - - - -" in content: logger.debug("[WX]reference query skipped") diff --git a/config-template.json b/config-template.json index fd6d46a..9ad9f5d 100644 --- a/config-template.json +++ b/config-template.json @@ -7,6 +7,7 @@ "group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"], "image_create_prefix": ["画", "看", "找"], "conversation_max_tokens": 1000, + "speech_recognition": false, "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", "expires_in_seconds": 3600 } diff --git a/voice/google/google_voice.py b/voice/google/google_voice.py new file mode 100644 index 0000000..7af3880 --- /dev/null +++ b/voice/google/google_voice.py @@ -0,0 +1,21 @@ + +""" +google voice service +""" + +import subprocess +import speech_recognition +from voice.voice import Voice + +class GoogleVoice(Voice): + recognizer = speech_recognition.Recognizer() + + def __init__(self): + pass + + def voiceToText(self, voice_file): + new_file = voice_file.replace('.mp3', '.wav') + subprocess.call('ffmpeg -i ' + voice_file + ' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True) + with speech_recognition.AudioFile(new_file) as source: + audio = self.recognizer.record(source) + return self.recognizer.recognize_google(audio, language='zh-CN') diff --git a/voice/voice.py b/voice/voice.py new file mode 100644 index 0000000..2f66dab --- /dev/null +++ b/voice/voice.py @@ -0,0 +1,10 @@ +""" +Voice service abstract class +""" + +class Voice(object): + def voiceToText(self, voice_file): + """ + Send voice to voice service and get text + """ + raise NotImplementedError diff --git a/voice/voice_factory.py b/voice/voice_factory.py new file mode 100644 index 0000000..5457d14 --- /dev/null +++ b/voice/voice_factory.py @@ -0,0 +1,17 @@ +""" +voice factory +""" + +def create_voice(voice_type): + """ + create a voice instance + :param voice_type: voice type code + :return: voice instance + """ + if voice_type == 'xfyun': + from voice.xfyun.xfyun_voice import XfyunVoice + return XfyunVoice() + elif voice_type == 'google': + from voice.google.google_voice import GoogleVoice + return GoogleVoice() + raise RuntimeError diff --git a/voice/xfyun/xfyun_voice.py b/voice/xfyun/xfyun_voice.py new file mode 100644 index 0000000..74b27b2 --- /dev/null +++ b/voice/xfyun/xfyun_voice.py @@ -0,0 +1,35 @@ + +""" +科大讯飞 voice service +""" + +from voice.voice import Voice + +# 科大讯飞语音识别 +lfasr_host = 'http://raasr.xfyun.cn/api' +# 请求的接口名 +api_prepare = '/prepare' +api_upload = '/upload' +api_merge = '/merge' +api_get_progress = '/getProgress' +api_get_result = '/getResult' +# 文件分片大小10M +file_piece_sice = 10485760 +# ——————————————————转写可配置参数———————————————— +# 参数可在官网界面(https://doc.xfyun.cn/rest_api/%E8%AF%AD%E9%9F%B3%E8%BD%AC%E5%86%99.html)查看,根据需求可自行在gene_params方法里添加修改 +# 转写类型 +lfasr_type = 0 +# 是否开启分词 +has_participle = 'false' +has_seperate = 'true' +# 多候选词个数 +max_alternatives = 0 +# 子用户标识 +suid = '' + +class XfyunVoice(Voice): + def __init__(self): + pass + + def voiceToText(self, voice_file): + pass \ No newline at end of file