diff --git a/README.md b/README.md index 1676395..93660e8 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,9 @@ cd chatgpt-on-wechat/ pip3 install itchat-uos==1.5.0.dev0 pip3 install --upgrade openai +默认使用openai的whisper-1模型 +如果使用百度的语音识别,需要安装百度的pythonSDK +pip3 install baidu-aip 如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak pip3 install SpeechRecognition --在MacOS中安装ffmpeg,brew install ffmpeg espeak diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py index 2282455..0f20613 100644 --- a/channel/wechat/wechat_channel.py +++ b/channel/wechat/wechat_channel.py @@ -5,6 +5,7 @@ wechat channel """ import os +import pathlib import itchat import json from itchat.content import * @@ -37,11 +38,11 @@ def handler_single_voice(msg): class WechatChannel(Channel): - tmpFilePath = './tmp/' + tmpFilePath = pathlib.Path('./tmp/') def __init__(self): - isExists = os.path.exists(self.tmpFilePath) - if not isExists: + pathExists = os.path.exists(self.tmpFilePath) + if not pathExists and conf().get('speech_recognition') == True: os.makedirs(self.tmpFilePath) def startup(self): diff --git a/voice/google/google_voice.py b/voice/google/google_voice.py index 58955f4..3fff9d7 100644 --- a/voice/google/google_voice.py +++ b/voice/google/google_voice.py @@ -3,6 +3,7 @@ google voice service """ +import pathlib import subprocess import time import speech_recognition @@ -12,7 +13,6 @@ from voice.voice import Voice class GoogleVoice(Voice): - tmpFilePath = './tmp/' recognizer = speech_recognition.Recognizer() engine = pyttsx3.init() diff --git a/voice/openai/openai_voice.py b/voice/openai/openai_voice.py index 8cc28b7..475aac6 100644 --- a/voice/openai/openai_voice.py +++ b/voice/openai/openai_voice.py @@ -4,19 +4,21 @@ google voice service """ import json import openai +from config import conf from common.log import logger from voice.voice import Voice class OpenaiVoice(Voice): def __init__(self): - pass + openai.api_key = conf().get('open_ai_api_key') def voiceToText(self, voice_file): + logger.debug( + '[Openai] voice file name={}'.format(voice_file)) file = open(voice_file, "rb") reply = openai.Audio.transcribe("whisper-1", file) - json_dict = json.loads(reply) - text = json_dict['text'] + text = reply["text"] logger.info( '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file)) return text