From e02c8bff819f01e2fccf335679eb8e7e06323c8d Mon Sep 17 00:00:00 2001
From: Clivia <132346501+Yanyutin753@users.noreply.github.com>
Date: Mon, 8 Jul 2024 17:58:59 +0800
Subject: [PATCH 1/4] =?UTF-8?q?PictureChange=E6=8F=92=E4=BB=B6=E5=8A=9F?=
 =?UTF-8?q?=E8=83=BD=E5=8D=87=E7=BA=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 plugins/source.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugins/source.json b/plugins/source.json
index c5c99d0..3e97bdd 100644
--- a/plugins/source.json
+++ b/plugins/source.json
@@ -22,7 +22,7 @@
     },
     "pictureChange": {
       "url": "https://github.com/Yanyutin753/pictureChange.git",
-      "desc": "利用stable-diffusion和百度Ai进行图生图或者画图的插件"
+      "desc": "1. 支持百度AI和Stable Diffusion WebUI进行图像处理，提供多种模型选择，支持图生图、文生图自定义模板。2. 支持Suno音乐AI可将图像和文字转为音乐。3. 支持自定义模型进行文件、图片总结功能。4. 支持管理员控制群聊内容与参数和功能改变。"
     },
     "Blackroom": {
       "url": "https://github.com/dividduang/blackroom.git",

From 993853757b1f9f7f8619a815449e527c6d7938ee Mon Sep 17 00:00:00 2001
From: 6vision <vision_wangpc@sina.com>
Date: Mon, 15 Jul 2024 18:57:58 +0800
Subject: [PATCH 2/4] Linkai bot supports more file types.

---
 bot/linkai/link_ai_bot.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bot/linkai/link_ai_bot.py b/bot/linkai/link_ai_bot.py
index 3fe8131..95c514d 100644
--- a/bot/linkai/link_ai_bot.py
+++ b/bot/linkai/link_ai_bot.py
@@ -399,6 +399,7 @@ class LinkAIBot(Bot):
             return
         max_send_num = conf().get("max_media_send_count")
         send_interval = conf().get("media_send_interval")
+        file_type = (".pdf", ".doc", ".docx", ".csv", ".xls", ".xlsx", ".txt", ".rtf", ".ppt", ".pptx")
         try:
             i = 0
             for url in image_urls:
@@ -407,7 +408,7 @@ class LinkAIBot(Bot):
                 i += 1
                 if url.endswith(".mp4"):
                     reply_type = ReplyType.VIDEO_URL
-                elif url.endswith(".pdf") or url.endswith(".doc") or url.endswith(".docx") or url.endswith(".csv"):
+                elif url.endswith(file_type):
                     reply_type = ReplyType.FILE
                     url = _download_file(url)
                     if not url:

From f0e416455ff53c444630caebde7960d532190c2c Mon Sep 17 00:00:00 2001
From: kody <tupobwq@foxmail.com>
Date: Mon, 15 Jul 2024 22:03:31 +0800
Subject: [PATCH 3/4] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E4=BD=BF?=
 =?UTF-8?q?=E7=94=A8=E9=98=BF=E9=87=8C=E4=BA=91=E8=BF=9B=E8=A1=8C=E8=AF=AD?=
 =?UTF-8?q?=E9=9F=B3=E8=AF=86=E5=88=AB=E7=9A=84=E5=BC=95=E6=93=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config.py                      |  2 +-
 voice/ali/ali_api.py           | 64 ++++++++++++++++++++++++++++++++++
 voice/ali/ali_voice.py         | 28 ++++++++++++---
 voice/ali/config.json.template |  3 +-
 4 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/config.py b/config.py
index 26f073d..c15b70f 100644
--- a/config.py
+++ b/config.py
@@ -95,7 +95,7 @@ available_setting = {
     "group_speech_recognition": False,  # 是否开启群组语音识别
     "voice_reply_voice": False,  # 是否使用语音回复语音，需要设置对应语音合成引擎的api key
     "always_reply_voice": False,  # 是否一直使用语音回复
-    "voice_to_text": "openai",  # 语音识别引擎，支持openai,baidu,google,azure
+    "voice_to_text": "openai",  # 语音识别引擎，支持openai,baidu,google,ali,azure
     "text_to_voice": "openai",  # 语音合成引擎，支持openai,baidu,google,pytts(offline),ali,azure,elevenlabs,edge(online)
     "text_to_voice_model": "tts-1",
     "tts_voice_id": "alloy",
diff --git a/voice/ali/ali_api.py b/voice/ali/ali_api.py
index cac0c8c..def5c7a 100644
--- a/voice/ali/ali_api.py
+++ b/voice/ali/ali_api.py
@@ -8,6 +8,7 @@ Description:
 
 """
 
+import http.client
 import json
 import time
 import requests
@@ -61,6 +62,69 @@ def text_to_speech_aliyun(url, text, appkey, token):
 
     return output_file
 
+def speech_to_text_aliyun(url, audioContent, appkey, token):
+    """
+    使用阿里云的语音识别服务识别音频文件中的语音。
+
+    参数:
+    - url (str): 阿里云语音识别服务的端点URL。
+    - audioContent (byte): pcm音频数据。
+    - appkey (str): 您的阿里云appkey。
+    - token (str): 阿里云API的认证令牌。
+
+    返回值:
+    - str: 成功时输出识别到的文本，否则为None。
+    """
+    format = 'pcm'
+    sample_rate = 16000
+    enablePunctuationPrediction  = True
+    enableInverseTextNormalization = True
+    enableVoiceDetection  = False
+
+    # 设置RESTful请求参数
+    request = url + '?appkey=' + appkey
+    request = request + '&format=' + format
+    request = request + '&sample_rate=' + str(sample_rate)
+
+    if enablePunctuationPrediction :
+        request = request + '&enable_punctuation_prediction=' + 'true'
+
+    if enableInverseTextNormalization :
+        request = request + '&enable_inverse_text_normalization=' + 'true'
+
+    if enableVoiceDetection :
+        request = request + '&enable_voice_detection=' + 'true'
+        
+    host = 'nls-gateway-cn-shanghai.aliyuncs.com'
+
+    # 设置HTTPS请求头部
+    httpHeaders = {
+        'X-NLS-Token': token,
+        'Content-type': 'application/octet-stream',
+        'Content-Length': len(audioContent)
+        }
+
+    conn = http.client.HTTPSConnection(host)
+    conn.request(method='POST', url=request, body=audioContent, headers=httpHeaders)
+
+    response = conn.getresponse()
+    body = response.read()
+    try:
+        body = json.loads(body)
+        status = body['status']
+        if status == 20000000 :
+            result = body['result']
+            if result :
+                logger.info(f"阿里云语音识别到了：{result}")
+            conn.close()
+            return result
+        else :
+            logger.error(f"语音识别失败，状态码: {status}")
+    except ValueError:
+        logger.error(f"语音识别失败，收到非JSON格式的数据: {body}")
+    conn.close()
+    return None
+
 
 class AliyunTokenGenerator:
     """
diff --git a/voice/ali/ali_voice.py b/voice/ali/ali_voice.py
index 79a9aaa..43ea0b4 100644
--- a/voice/ali/ali_voice.py
+++ b/voice/ali/ali_voice.py
@@ -15,9 +15,9 @@ import time
 
 from bridge.reply import Reply, ReplyType
 from common.log import logger
+from voice.audio_convert import get_pcm_from_wav
 from voice.voice import Voice
-from voice.ali.ali_api import AliyunTokenGenerator
-from voice.ali.ali_api import text_to_speech_aliyun
+from voice.ali.ali_api import AliyunTokenGenerator, speech_to_text_aliyun, text_to_speech_aliyun
 from config import conf
 
 
@@ -34,7 +34,8 @@ class AliVoice(Voice):
             self.token = None
             self.token_expire_time = 0
             # 默认复用阿里云千问的 access_key 和 access_secret
-            self.api_url = config.get("api_url")
+            self.api_url_voice_to_text = config.get("api_url_voice_to_text")
+            self.api_url_text_to_voice = config.get("api_url_text_to_voice")
             self.app_key = config.get("app_key")
             self.access_key_id = conf().get("qwen_access_key_id") or config.get("access_key_id")
             self.access_key_secret = conf().get("qwen_access_key_secret") or config.get("access_key_secret")
@@ -53,7 +54,7 @@ class AliVoice(Voice):
                       r'äöüÄÖÜáéíóúÁÉÍÓÚàèìòùÀÈÌÒÙâêîôûÂÊÎÔÛçÇñÑ，。！？,.]', '', text)
         # 提取有效的token
         token_id = self.get_valid_token()
-        fileName = text_to_speech_aliyun(self.api_url, text, self.app_key, token_id)
+        fileName = text_to_speech_aliyun(self.api_url_text_to_voice, text, self.app_key, token_id)
         if fileName:
             logger.info("[Ali] textToVoice text={} voice file name={}".format(text, fileName))
             reply = Reply(ReplyType.VOICE, fileName)
@@ -61,6 +62,25 @@ class AliVoice(Voice):
             reply = Reply(ReplyType.ERROR, "抱歉，语音合成失败")
         return reply
 
+    def voiceToText(self, voice_file):
+        """
+        将语音文件转换为文本。
+
+        :param voice_file: 要转换的语音文件。
+        :return: 返回一个Reply对象，其中包含转换得到的文本或错误信息。
+        """
+        # 提取有效的token
+        token_id = self.get_valid_token()
+        logger.debug("[Ali] voice file name={}".format(voice_file))
+        pcm = get_pcm_from_wav(voice_file)
+        text = speech_to_text_aliyun(self.api_url_voice_to_text, pcm, self.app_key, token_id)
+        if text:
+            logger.info("[Ali] VoicetoText = {}".format(text))
+            reply = Reply(ReplyType.TEXT, text)
+        else:
+            reply = Reply(ReplyType.ERROR, "抱歉，语音识别失败")
+        return reply
+
     def get_valid_token(self):
         """
         获取有效的阿里云token。
diff --git a/voice/ali/config.json.template b/voice/ali/config.json.template
index 6a4aaa9..563c57f 100644
--- a/voice/ali/config.json.template
+++ b/voice/ali/config.json.template
@@ -1,5 +1,6 @@
 {
-    "api_url": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts",
+    "api_url_text_to_voice": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts",
+    "api_url_voice_to_text": "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/asr",
     "app_key": "",
     "access_key_id": "",
     "access_key_secret": ""

From 52a490c87ec3aa5780859994aca190f3dd189f0c Mon Sep 17 00:00:00 2001
From: 6vision <vision_wangpc@sina.com>
Date: Fri, 19 Jul 2024 11:04:45 +0800
Subject: [PATCH 4/4] Support gpt-4o-mini model

---
 bot/chatgpt/chat_gpt_session.py | 2 +-
 common/const.py                 | 3 ++-
 config.py                       | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/bot/chatgpt/chat_gpt_session.py b/bot/chatgpt/chat_gpt_session.py
index d39d769..f7ff12f 100644
--- a/bot/chatgpt/chat_gpt_session.py
+++ b/bot/chatgpt/chat_gpt_session.py
@@ -67,7 +67,7 @@ def num_tokens_from_messages(messages, model):
     elif model in ["gpt-4-0314", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0613", "gpt-3.5-turbo-0613",
                    "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-35-turbo-16k", "gpt-4-turbo-preview",
                    "gpt-4-1106-preview", const.GPT4_TURBO_PREVIEW, const.GPT4_VISION_PREVIEW, const.GPT4_TURBO_01_25,
-                   const.GPT_4o, const.LINKAI_4o, const.LINKAI_4_TURBO]:
+                   const.GPT_4o, const.GPT_4o_MINI, const.LINKAI_4o, const.LINKAI_4_TURBO]:
         return num_tokens_from_messages(messages, model="gpt-4")
     elif model.startswith("claude-3"):
         return num_tokens_from_messages(messages, model="gpt-3.5-turbo")
diff --git a/common/const.py b/common/const.py
index 45e72e7..68d3795 100644
--- a/common/const.py
+++ b/common/const.py
@@ -32,6 +32,7 @@ GPT4_TURBO_11_06 = "gpt-4-1106-preview"
 GPT4_VISION_PREVIEW = "gpt-4-vision-preview"
 
 GPT4 = "gpt-4"
+GPT_4o_MINI = "gpt-4o-mini"
 GPT4_32k = "gpt-4-32k"
 GPT4_06_13 = "gpt-4-0613"
 GPT4_32k_06_13 = "gpt-4-32k-0613"
@@ -57,7 +58,7 @@ GEMINI_15_PRO = "gemini-1.5-pro"
 
 MODEL_LIST = [
               GPT35, GPT35_0125, GPT35_1106, "gpt-3.5-turbo-16k",
-              GPT_4o, GPT4_TURBO, GPT4_TURBO_PREVIEW, GPT4_TURBO_01_25, GPT4_TURBO_11_06, GPT4, GPT4_32k, GPT4_06_13, GPT4_32k_06_13,
+              GPT_4o, GPT_4o_MINI, GPT4_TURBO, GPT4_TURBO_PREVIEW, GPT4_TURBO_01_25, GPT4_TURBO_11_06, GPT4, GPT4_32k, GPT4_06_13, GPT4_32k_06_13,
               WEN_XIN, WEN_XIN_4,
               XUNFEI, ZHIPU_AI, MOONSHOT, MiniMax,
               GEMINI, GEMINI_PRO, GEMINI_15_flash, GEMINI_15_PRO,
diff --git a/config.py b/config.py
index 26f073d..a246ce3 100644
--- a/config.py
+++ b/config.py
@@ -17,7 +17,7 @@ available_setting = {
     "open_ai_api_base": "https://api.openai.com/v1",
     "proxy": "",  # openai使用的代理
     # chatgpt模型， 当use_azure_chatgpt为true时，其名称为Azure上model deployment名称
-    "model": "gpt-3.5-turbo",  # 可选择: gpt-4o, gpt-4-turbo, claude-3-sonnet, wenxin, moonshot, qwen-turbo, xunfei, glm-4, minimax, gemini等模型，全部可选模型详见common/const.py文件
+    "model": "gpt-3.5-turbo",  # 可选择: gpt-4o, pt-4o-mini, gpt-4-turbo, claude-3-sonnet, wenxin, moonshot, qwen-turbo, xunfei, glm-4, minimax, gemini等模型，全部可选模型详见common/const.py文件
     "bot_type": "",  # 可选配置，使用兼容openai格式的三方服务时候，需填"chatGPT"。bot具体名称详见common/const.py文件列出的bot_type，如不填根据model名称判断，
     "use_azure_chatgpt": False,  # 是否使用azure的chatgpt
     "azure_deployment_id": "",  # azure 模型部署名称