Browse Source

[voice] add support for whisper-1 model

master
wanggang 1 year ago
parent
commit
d7a8854fa1
4 changed files with 13 additions and 7 deletions
  1. +3
    -0
      README.md
  2. +4
    -3
      channel/wechat/wechat_channel.py
  3. +1
    -1
      voice/google/google_voice.py
  4. +5
    -3
      voice/openai/openai_voice.py

+ 3
- 0
README.md View File

@@ -72,6 +72,9 @@ cd chatgpt-on-wechat/
pip3 install itchat-uos==1.5.0.dev0 pip3 install itchat-uos==1.5.0.dev0
pip3 install --upgrade openai pip3 install --upgrade openai


默认使用openai的whisper-1模型
如果使用百度的语音识别,需要安装百度的pythonSDK
pip3 install baidu-aip
如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak 如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak
pip3 install SpeechRecognition pip3 install SpeechRecognition
--在MacOS中安装ffmpeg,brew install ffmpeg espeak --在MacOS中安装ffmpeg,brew install ffmpeg espeak


+ 4
- 3
channel/wechat/wechat_channel.py View File

@@ -5,6 +5,7 @@ wechat channel
""" """


import os import os
import pathlib
import itchat import itchat
import json import json
from itchat.content import * from itchat.content import *
@@ -37,11 +38,11 @@ def handler_single_voice(msg):




class WechatChannel(Channel): class WechatChannel(Channel):
tmpFilePath = './tmp/'
tmpFilePath = pathlib.Path('./tmp/')


def __init__(self): def __init__(self):
isExists = os.path.exists(self.tmpFilePath)
if not isExists:
pathExists = os.path.exists(self.tmpFilePath)
if not pathExists and conf().get('speech_recognition') == True:
os.makedirs(self.tmpFilePath) os.makedirs(self.tmpFilePath)


def startup(self): def startup(self):


+ 1
- 1
voice/google/google_voice.py View File

@@ -3,6 +3,7 @@
google voice service google voice service
""" """


import pathlib
import subprocess import subprocess
import time import time
import speech_recognition import speech_recognition
@@ -12,7 +13,6 @@ from voice.voice import Voice




class GoogleVoice(Voice): class GoogleVoice(Voice):
tmpFilePath = './tmp/'
recognizer = speech_recognition.Recognizer() recognizer = speech_recognition.Recognizer()
engine = pyttsx3.init() engine = pyttsx3.init()




+ 5
- 3
voice/openai/openai_voice.py View File

@@ -4,19 +4,21 @@ google voice service
""" """
import json import json
import openai import openai
from config import conf
from common.log import logger from common.log import logger
from voice.voice import Voice from voice.voice import Voice




class OpenaiVoice(Voice): class OpenaiVoice(Voice):
def __init__(self): def __init__(self):
pass
openai.api_key = conf().get('open_ai_api_key')


def voiceToText(self, voice_file): def voiceToText(self, voice_file):
logger.debug(
'[Openai] voice file name={}'.format(voice_file))
file = open(voice_file, "rb") file = open(voice_file, "rb")
reply = openai.Audio.transcribe("whisper-1", file) reply = openai.Audio.transcribe("whisper-1", file)
json_dict = json.loads(reply)
text = json_dict['text']
text = reply["text"]
logger.info( logger.info(
'[Openai] voiceToText text={} voice file name={}'.format(text, voice_file)) '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
return text return text


Loading…
Cancel
Save