From 882e6c35762bd805a6bf9a320f78ee5fa8ec7362 Mon Sep 17 00:00:00 2001
From: wanggang <mail@ender.wang>
Date: Wed, 8 Mar 2023 11:02:01 +0800
Subject: [PATCH] [voice] add support for wispper

---
 bridge/bridge.py                 |  4 ++--
 channel/wechat/wechat_channel.py |  4 ++--
 config-template.json             |  3 +++
 voice/baidu/baidu_voice.py       | 22 ++++++++++++++++++++
 voice/openai/openai_voice.py     | 25 +++++++++++++++++++++++
 voice/voice_factory.py           |  9 +++++---
 voice/xfyun/xfyun_voice.py       | 35 --------------------------------
 7 files changed, 60 insertions(+), 42 deletions(-)
 create mode 100644 voice/baidu/baidu_voice.py
 create mode 100644 voice/openai/openai_voice.py
 delete mode 100644 voice/xfyun/xfyun_voice.py

diff --git a/bridge/bridge.py b/bridge/bridge.py
index 9d00bfe..e739a7f 100644
--- a/bridge/bridge.py
+++ b/bridge/bridge.py
@@ -10,7 +10,7 @@ class Bridge(object):
         return bot_factory.create_bot("chatGPT").reply(query, context)
 
     def fetch_voice_to_text(self, voiceFile):
-        return voice_factory.create_voice("google").voiceToText(voiceFile)
+        return voice_factory.create_voice("openai").voiceToText(voiceFile)
 
     def fetch_text_to_voice(self, text):
-        return voice_factory.create_voice("google").textToVoice(text)
\ No newline at end of file
+        return voice_factory.create_voice("baidu").textToVoice(text)
\ No newline at end of file
diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py
index 3fdc94f..2282455 100644
--- a/channel/wechat/wechat_channel.py
+++ b/channel/wechat/wechat_channel.py
@@ -54,14 +54,14 @@ class WechatChannel(Channel):
     def handle_voice(self, msg):
         if conf().get('speech_recognition') != True :
             return
-        logger.debug("[WX]receive voice msg: ", msg['FileName'])
+        logger.debug("[WX]receive voice msg: " + msg['FileName'])
         thread_pool.submit(self._do_handle_voice, msg)
 
     def _do_handle_voice(self, msg):
         fileName = self.tmpFilePath+msg['FileName']
         msg.download(fileName)
         content = super().build_voice_to_text(fileName)
-        self._handle_single_msg(msg, content, True)
+        self._handle_single_msg(msg, content, False)
 
     def handle_text(self, msg):
         logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
diff --git a/config-template.json b/config-template.json
index 9ad9f5d..f7549d4 100644
--- a/config-template.json
+++ b/config-template.json
@@ -8,6 +8,9 @@
   "image_create_prefix": ["画", "看", "找"],
   "conversation_max_tokens": 1000,
   "speech_recognition": false,
+  "baidu_app_id": "YOUR BAIDU APP ID",
+  "baidu_api_key": "YOUR BAIDU API KEY",
+  "baidu_secret_key": "YOUR BAIDU SERVICE KEY",
   "character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题，并且可以使用多种语言与人交流。",
   "expires_in_seconds": 3600
 }
diff --git a/voice/baidu/baidu_voice.py b/voice/baidu/baidu_voice.py
new file mode 100644
index 0000000..8534c2b
--- /dev/null
+++ b/voice/baidu/baidu_voice.py
@@ -0,0 +1,22 @@
+
+"""
+baidu voice service
+"""
+from aip import AipSpeech
+from voice.voice import Voice
+from config import conf
+
+class BaiduVoice(Voice):
+    APP_ID = conf().get('baidu_app_id')
+    API_KEY = conf().get('baidu_api_key')
+    SECRET_KEY = conf().get('baidu_secret_key')
+    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
+    
+    def __init__(self):
+        pass
+
+    def voiceToText(self, voice_file):
+        pass
+
+    def textToVoice(self, text):
+        pass
diff --git a/voice/openai/openai_voice.py b/voice/openai/openai_voice.py
new file mode 100644
index 0000000..8cc28b7
--- /dev/null
+++ b/voice/openai/openai_voice.py
@@ -0,0 +1,25 @@
+
+"""
+google voice service
+"""
+import json
+import openai
+from common.log import logger
+from voice.voice import Voice
+
+
+class OpenaiVoice(Voice):
+    def __init__(self):
+        pass
+
+    def voiceToText(self, voice_file):
+        file = open(voice_file, "rb")
+        reply = openai.Audio.transcribe("whisper-1", file)
+        json_dict = json.loads(reply)
+        text = json_dict['text']
+        logger.info(
+            '[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
+        return text
+
+    def textToVoice(self, text):
+        pass
diff --git a/voice/voice_factory.py b/voice/voice_factory.py
index 5457d14..053840e 100644
--- a/voice/voice_factory.py
+++ b/voice/voice_factory.py
@@ -8,10 +8,13 @@ def create_voice(voice_type):
     :param voice_type: voice type code
     :return: voice instance
     """
-    if voice_type == 'xfyun':
-        from voice.xfyun.xfyun_voice import XfyunVoice
-        return XfyunVoice()
+    if voice_type == 'baidu':
+        from voice.baidu.baidu_voice import BaiduVoice
+        return BaiduVoice()
     elif voice_type == 'google':
         from voice.google.google_voice import GoogleVoice
         return GoogleVoice()
+    elif voice_type == 'openai':
+        from voice.openai.openai_voice import OpenaiVoice
+        return OpenaiVoice()
     raise RuntimeError
diff --git a/voice/xfyun/xfyun_voice.py b/voice/xfyun/xfyun_voice.py
deleted file mode 100644
index 74b27b2..0000000
--- a/voice/xfyun/xfyun_voice.py
+++ /dev/null
@@ -1,35 +0,0 @@
-
-"""
-科大讯飞 voice service
-"""
-
-from voice.voice import Voice
-
-# 科大讯飞语音识别
-lfasr_host = 'http://raasr.xfyun.cn/api'
-# 请求的接口名
-api_prepare = '/prepare'
-api_upload = '/upload'
-api_merge = '/merge'
-api_get_progress = '/getProgress'
-api_get_result = '/getResult'
-# 文件分片大小10M
-file_piece_sice = 10485760
-# ——————————————————转写可配置参数————————————————
-# 参数可在官网界面（https://doc.xfyun.cn/rest_api/%E8%AF%AD%E9%9F%B3%E8%BD%AC%E5%86%99.html）查看，根据需求可自行在gene_params方法里添加修改
-# 转写类型
-lfasr_type = 0
-# 是否开启分词
-has_participle = 'false'
-has_seperate = 'true'
-# 多候选词个数
-max_alternatives = 0
-# 子用户标识
-suid = ''
-
-class XfyunVoice(Voice):
-    def __init__(self):
-        pass
-
-    def voiceToText(self, voice_file):
-        pass
\ No newline at end of file