浏览代码

[voice] add support for whisper-1 model

master
wanggang 2 年前
父节点
当前提交
d7a8854fa1
共有 4 个文件被更改,包括 13 次插入7 次删除
  1. +3
    -0
      README.md
  2. +4
    -3
      channel/wechat/wechat_channel.py
  3. +1
    -1
      voice/google/google_voice.py
  4. +5
    -3
      voice/openai/openai_voice.py

+ 3
- 0
README.md 查看文件

@@ -72,6 +72,9 @@ cd chatgpt-on-wechat/
pip3 install itchat-uos==1.5.0.dev0 pip3 install itchat-uos==1.5.0.dev0
pip3 install --upgrade openai pip3 install --upgrade openai


默认使用openai的whisper-1模型
如果使用百度的语音识别,需要安装百度的pythonSDK
pip3 install baidu-aip
如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak 如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak
pip3 install SpeechRecognition pip3 install SpeechRecognition
--在MacOS中安装ffmpeg,brew install ffmpeg espeak --在MacOS中安装ffmpeg,brew install ffmpeg espeak


+ 4
- 3
channel/wechat/wechat_channel.py 查看文件

@@ -5,6 +5,7 @@ wechat channel
""" """


import os import os
import pathlib
import itchat import itchat
import json import json
from itchat.content import * from itchat.content import *
@@ -37,11 +38,11 @@ def handler_single_voice(msg):




class WechatChannel(Channel): class WechatChannel(Channel):
tmpFilePath = './tmp/'
tmpFilePath = pathlib.Path('./tmp/')


def __init__(self): def __init__(self):
isExists = os.path.exists(self.tmpFilePath)
if not isExists:
pathExists = os.path.exists(self.tmpFilePath)
if not pathExists and conf().get('speech_recognition') == True:
os.makedirs(self.tmpFilePath) os.makedirs(self.tmpFilePath)


def startup(self): def startup(self):


+ 1
- 1
voice/google/google_voice.py 查看文件

@@ -3,6 +3,7 @@
google voice service google voice service
""" """


import pathlib
import subprocess import subprocess
import time import time
import speech_recognition import speech_recognition
@@ -12,7 +13,6 @@ from voice.voice import Voice




class GoogleVoice(Voice): class GoogleVoice(Voice):
tmpFilePath = './tmp/'
recognizer = speech_recognition.Recognizer() recognizer = speech_recognition.Recognizer()
engine = pyttsx3.init() engine = pyttsx3.init()




+ 5
- 3
voice/openai/openai_voice.py 查看文件

@@ -4,19 +4,21 @@ google voice service
""" """
import json import json
import openai import openai
from config import conf
from common.log import logger from common.log import logger
from voice.voice import Voice from voice.voice import Voice




class OpenaiVoice(Voice): class OpenaiVoice(Voice):
def __init__(self): def __init__(self):
pass
openai.api_key = conf().get('open_ai_api_key')


def voiceToText(self, voice_file): def voiceToText(self, voice_file):
logger.debug(
'[Openai] voice file name={}'.format(voice_file))
file = open(voice_file, "rb") file = open(voice_file, "rb")
reply = openai.Audio.transcribe("whisper-1", file) reply = openai.Audio.transcribe("whisper-1", file)
json_dict = json.loads(reply)
text = json_dict['text']
text = reply["text"]
logger.info( logger.info(
'[Openai] voiceToText text={} voice file name={}'.format(text, voice_file)) '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
return text return text


正在加载...
取消
保存