From 882e6c35762bd805a6bf9a320f78ee5fa8ec7362 Mon Sep 17 00:00:00 2001 From: wanggang Date: Wed, 8 Mar 2023 11:02:01 +0800 Subject: [PATCH] [voice] add support for wispper --- bridge/bridge.py | 4 ++-- channel/wechat/wechat_channel.py | 4 ++-- config-template.json | 3 +++ voice/baidu/baidu_voice.py | 22 ++++++++++++++++++++ voice/openai/openai_voice.py | 25 +++++++++++++++++++++++ voice/voice_factory.py | 9 +++++--- voice/xfyun/xfyun_voice.py | 35 -------------------------------- 7 files changed, 60 insertions(+), 42 deletions(-) create mode 100644 voice/baidu/baidu_voice.py create mode 100644 voice/openai/openai_voice.py delete mode 100644 voice/xfyun/xfyun_voice.py diff --git a/bridge/bridge.py b/bridge/bridge.py index 9d00bfe..e739a7f 100644 --- a/bridge/bridge.py +++ b/bridge/bridge.py @@ -10,7 +10,7 @@ class Bridge(object): return bot_factory.create_bot("chatGPT").reply(query, context) def fetch_voice_to_text(self, voiceFile): - return voice_factory.create_voice("google").voiceToText(voiceFile) + return voice_factory.create_voice("openai").voiceToText(voiceFile) def fetch_text_to_voice(self, text): - return voice_factory.create_voice("google").textToVoice(text) \ No newline at end of file + return voice_factory.create_voice("baidu").textToVoice(text) \ No newline at end of file diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py index 3fdc94f..2282455 100644 --- a/channel/wechat/wechat_channel.py +++ b/channel/wechat/wechat_channel.py @@ -54,14 +54,14 @@ class WechatChannel(Channel): def handle_voice(self, msg): if conf().get('speech_recognition') != True : return - logger.debug("[WX]receive voice msg: ", msg['FileName']) + logger.debug("[WX]receive voice msg: " + msg['FileName']) thread_pool.submit(self._do_handle_voice, msg) def _do_handle_voice(self, msg): fileName = self.tmpFilePath+msg['FileName'] msg.download(fileName) content = super().build_voice_to_text(fileName) - self._handle_single_msg(msg, content, True) + self._handle_single_msg(msg, content, False) def handle_text(self, msg): logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False)) diff --git a/config-template.json b/config-template.json index 9ad9f5d..f7549d4 100644 --- a/config-template.json +++ b/config-template.json @@ -8,6 +8,9 @@ "image_create_prefix": ["画", "看", "找"], "conversation_max_tokens": 1000, "speech_recognition": false, + "baidu_app_id": "YOUR BAIDU APP ID", + "baidu_api_key": "YOUR BAIDU API KEY", + "baidu_secret_key": "YOUR BAIDU SERVICE KEY", "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。", "expires_in_seconds": 3600 } diff --git a/voice/baidu/baidu_voice.py b/voice/baidu/baidu_voice.py new file mode 100644 index 0000000..8534c2b --- /dev/null +++ b/voice/baidu/baidu_voice.py @@ -0,0 +1,22 @@ + +""" +baidu voice service +""" +from aip import AipSpeech +from voice.voice import Voice +from config import conf + +class BaiduVoice(Voice): + APP_ID = conf().get('baidu_app_id') + API_KEY = conf().get('baidu_api_key') + SECRET_KEY = conf().get('baidu_secret_key') + client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) + + def __init__(self): + pass + + def voiceToText(self, voice_file): + pass + + def textToVoice(self, text): + pass diff --git a/voice/openai/openai_voice.py b/voice/openai/openai_voice.py new file mode 100644 index 0000000..8cc28b7 --- /dev/null +++ b/voice/openai/openai_voice.py @@ -0,0 +1,25 @@ + +""" +google voice service +""" +import json +import openai +from common.log import logger +from voice.voice import Voice + + +class OpenaiVoice(Voice): + def __init__(self): + pass + + def voiceToText(self, voice_file): + file = open(voice_file, "rb") + reply = openai.Audio.transcribe("whisper-1", file) + json_dict = json.loads(reply) + text = json_dict['text'] + logger.info( + '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file)) + return text + + def textToVoice(self, text): + pass diff --git a/voice/voice_factory.py b/voice/voice_factory.py index 5457d14..053840e 100644 --- a/voice/voice_factory.py +++ b/voice/voice_factory.py @@ -8,10 +8,13 @@ def create_voice(voice_type): :param voice_type: voice type code :return: voice instance """ - if voice_type == 'xfyun': - from voice.xfyun.xfyun_voice import XfyunVoice - return XfyunVoice() + if voice_type == 'baidu': + from voice.baidu.baidu_voice import BaiduVoice + return BaiduVoice() elif voice_type == 'google': from voice.google.google_voice import GoogleVoice return GoogleVoice() + elif voice_type == 'openai': + from voice.openai.openai_voice import OpenaiVoice + return OpenaiVoice() raise RuntimeError diff --git a/voice/xfyun/xfyun_voice.py b/voice/xfyun/xfyun_voice.py deleted file mode 100644 index 74b27b2..0000000 --- a/voice/xfyun/xfyun_voice.py +++ /dev/null @@ -1,35 +0,0 @@ - -""" -科大讯飞 voice service -""" - -from voice.voice import Voice - -# 科大讯飞语音识别 -lfasr_host = 'http://raasr.xfyun.cn/api' -# 请求的接口名 -api_prepare = '/prepare' -api_upload = '/upload' -api_merge = '/merge' -api_get_progress = '/getProgress' -api_get_result = '/getResult' -# 文件分片大小10M -file_piece_sice = 10485760 -# ——————————————————转写可配置参数———————————————— -# 参数可在官网界面(https://doc.xfyun.cn/rest_api/%E8%AF%AD%E9%9F%B3%E8%BD%AC%E5%86%99.html)查看,根据需求可自行在gene_params方法里添加修改 -# 转写类型 -lfasr_type = 0 -# 是否开启分词 -has_participle = 'false' -has_seperate = 'true' -# 多候选词个数 -max_alternatives = 0 -# 子用户标识 -suid = '' - -class XfyunVoice(Voice): - def __init__(self): - pass - - def voiceToText(self, voice_file): - pass \ No newline at end of file