From e02c8bff819f01e2fccf335679eb8e7e06323c8d Mon Sep 17 00:00:00 2001 From: Clivia <132346501+Yanyutin753@users.noreply.github.com> Date: Mon, 8 Jul 2024 17:58:59 +0800 Subject: [PATCH 1/4] =?UTF-8?q?PictureChange=E6=8F=92=E4=BB=B6=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E5=8D=87=E7=BA=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugins/source.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/source.json b/plugins/source.json index c5c99d0..3e97bdd 100644 --- a/plugins/source.json +++ b/plugins/source.json @@ -22,7 +22,7 @@ }, "pictureChange": { "url": "https://github.com/Yanyutin753/pictureChange.git", - "desc": "利用stable-diffusion和百度Ai进行图生图或者画图的插件" + "desc": "1. 支持百度AI和Stable Diffusion WebUI进行图像处理,提供多种模型选择,支持图生图、文生图自定义模板。2. 支持Suno音乐AI可将图像和文字转为音乐。3. 支持自定义模型进行文件、图片总结功能。4. 支持管理员控制群聊内容与参数和功能改变。" }, "Blackroom": { "url": "https://github.com/dividduang/blackroom.git", From 993853757b1f9f7f8619a815449e527c6d7938ee Mon Sep 17 00:00:00 2001 From: 6vision Date: Mon, 15 Jul 2024 18:57:58 +0800 Subject: [PATCH 2/4] Linkai bot supports more file types. --- bot/linkai/link_ai_bot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bot/linkai/link_ai_bot.py b/bot/linkai/link_ai_bot.py index 3fe8131..95c514d 100644 --- a/bot/linkai/link_ai_bot.py +++ b/bot/linkai/link_ai_bot.py @@ -399,6 +399,7 @@ class LinkAIBot(Bot): return max_send_num = conf().get("max_media_send_count") send_interval = conf().get("media_send_interval") + file_type = (".pdf", ".doc", ".docx", ".csv", ".xls", ".xlsx", ".txt", ".rtf", ".ppt", ".pptx") try: i = 0 for url in image_urls: @@ -407,7 +408,7 @@ class LinkAIBot(Bot): i += 1 if url.endswith(".mp4"): reply_type = ReplyType.VIDEO_URL - elif url.endswith(".pdf") or url.endswith(".doc") or url.endswith(".docx") or url.endswith(".csv"): + elif url.endswith(file_type): reply_type = ReplyType.FILE url = _download_file(url) if not url: From f0e416455ff53c444630caebde7960d532190c2c Mon Sep 17 00:00:00 2001 From: kody Date: Mon, 15 Jul 2024 22:03:31 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E9=98=BF=E9=87=8C=E4=BA=91=E8=BF=9B=E8=A1=8C=E8=AF=AD?= =?UTF-8?q?=E9=9F=B3=E8=AF=86=E5=88=AB=E7=9A=84=E5=BC=95=E6=93=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 2 +- voice/ali/ali_api.py | 64 ++++++++++++++++++++++++++++++++++ voice/ali/ali_voice.py | 28 ++++++++++++--- voice/ali/config.json.template | 3 +- 4 files changed, 91 insertions(+), 6 deletions(-) diff --git a/config.py b/config.py index 26f073d..c15b70f 100644 --- a/config.py +++ b/config.py @@ -95,7 +95,7 @@ available_setting = { "group_speech_recognition": False, # 是否开启群组语音识别 "voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key "always_reply_voice": False, # 是否一直使用语音回复 - "voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,azure + "voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,ali,azure "text_to_voice": "openai", # 语音合成引擎,支持openai,baidu,google,pytts(offline),ali,azure,elevenlabs,edge(online) "text_to_voice_model": "tts-1", "tts_voice_id": "alloy", diff --git a/voice/ali/ali_api.py b/voice/ali/ali_api.py index cac0c8c..def5c7a 100644 --- a/voice/ali/ali_api.py +++ b/voice/ali/ali_api.py @@ -8,6 +8,7 @@ Description: """ +import http.client import json import time import requests @@ -61,6 +62,69 @@ def text_to_speech_aliyun(url, text, appkey, token): return output_file +def speech_to_text_aliyun(url, audioContent, appkey, token): + """ + 使用阿里云的语音识别服务识别音频文件中的语音。 + + 参数: + - url (str): 阿里云语音识别服务的端点URL。 + - audioContent (byte): pcm音频数据。 + - appkey (str): 您的阿里云appkey。 + - token (str): 阿里云API的认证令牌。 + + 返回值: + - str: 成功时输出识别到的文本,否则为None。 + """ + format = 'pcm' + sample_rate = 16000 + enablePunctuationPrediction = True + enableInverseTextNormalization = True + enableVoiceDetection = False + + # 设置RESTful请求参数 + request = url + '?appkey=' + appkey + request = request + '&format=' + format + request = request + '&sample_rate=' + str(sample_rate) + + if enablePunctuationPrediction : + request = request + '&enable_punctuation_prediction=' + 'true' + + if enableInverseTextNormalization : + request = request + '&enable_inverse_text_normalization=' + 'true' + + if enableVoiceDetection : + request = request + '&enable_voice_detection=' + 'true' + + host = 'nls-gateway-cn-shanghai.aliyuncs.com' + + # 设置HTTPS请求头部 + httpHeaders = { + 'X-NLS-Token': token, + 'Content-type': 'application/octet-stream', + 'Content-Length': len(audioContent) + } + + conn = http.client.HTTPSConnection(host) + conn.request(method='POST', url=request, body=audioContent, headers=httpHeaders) + + response = conn.getresponse() + body = response.read() + try: + body = json.loads(body) + status = body['status'] + if status == 20000000 : + result = body['result'] + if result : + logger.info(f"阿里云语音识别到了:{result}") + conn.close() + return result + else : + logger.error(f"语音识别失败,状态码: {status}") + except ValueError: + logger.error(f"语音识别失败,收到非JSON格式的数据: {body}") + conn.close() + return None + class AliyunTokenGenerator: """ diff --git a/voice/ali/ali_voice.py b/voice/ali/ali_voice.py index 79a9aaa..43ea0b4 100644 --- a/voice/ali/ali_voice.py +++ b/voice/ali/ali_voice.py @@ -15,9 +15,9 @@ import time from bridge.reply import Reply, ReplyType from common.log import logger +from voice.audio_convert import get_pcm_from_wav from voice.voice import Voice -from voice.ali.ali_api import AliyunTokenGenerator -from voice.ali.ali_api import text_to_speech_aliyun +from voice.ali.ali_api import AliyunTokenGenerator, speech_to_text_aliyun, text_to_speech_aliyun from config import conf @@ -34,7 +34,8 @@ class AliVoice(Voice): self.token = None self.token_expire_time = 0 # 默认复用阿里云千问的 access_key 和 access_secret - self.api_url = config.get("api_url") + self.api_url_voice_to_text = config.get("api_url_voice_to_text") + self.api_url_text_to_voice = config.get("api_url_text_to_voice") self.app_key = config.get("app_key") self.access_key_id = conf().get("qwen_access_key_id") or config.get("access_key_id") self.access_key_secret = conf().get("qwen_access_key_secret") or config.get("access_key_secret") @@ -53,7 +54,7 @@ class AliVoice(Voice): r'äöüÄÖÜáéíóúÁÉÍÓÚàèìòùÀÈÌÒÙâêîôûÂÊÎÔÛçÇñÑ,。!?,.]', '', text) # 提取有效的token token_id = self.get_valid_token() - fileName = text_to_speech_aliyun(self.api_url, text, self.app_key, token_id) + fileName = text_to_speech_aliyun(self.api_url_text_to_voice, text, self.app_key, token_id) if fileName: logger.info("[Ali] textToVoice text={} voice file name={}".format(text, fileName)) reply = Reply(ReplyType.VOICE, fileName) @@ -61,6 +62,25 @@ class AliVoice(Voice): reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败") return reply + def voiceToText(self, voice_file): + """ + 将语音文件转换为文本。 + + :param voice_file: 要转换的语音文件。 + :return: 返回一个Reply对象,其中包含转换得到的文本或错误信息。 + """ + # 提取有效的token + token_id = self.get_valid_token() + logger.debug("[Ali] voice file name={}".format(voice_file)) + pcm = get_pcm_from_wav(voice_file) + text = speech_to_text_aliyun(self.api_url_voice_to_text, pcm, self.app_key, token_id) + if text: + logger.info("[Ali] VoicetoText = {}".format(text)) + reply = Reply(ReplyType.TEXT, text) + else: + reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败") + return reply + def get_valid_token(self): """ 获取有效的阿里云token。 diff --git a/voice/ali/config.json.template b/voice/ali/config.json.template index 6a4aaa9..563c57f 100644 --- a/voice/ali/config.json.template +++ b/voice/ali/config.json.template @@ -1,5 +1,6 @@ { - "api_url": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts", + "api_url_text_to_voice": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts", + "api_url_voice_to_text": "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/asr", "app_key": "", "access_key_id": "", "access_key_secret": "" From 52a490c87ec3aa5780859994aca190f3dd189f0c Mon Sep 17 00:00:00 2001 From: 6vision Date: Fri, 19 Jul 2024 11:04:45 +0800 Subject: [PATCH 4/4] Support gpt-4o-mini model --- bot/chatgpt/chat_gpt_session.py | 2 +- common/const.py | 3 ++- config.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bot/chatgpt/chat_gpt_session.py b/bot/chatgpt/chat_gpt_session.py index d39d769..f7ff12f 100644 --- a/bot/chatgpt/chat_gpt_session.py +++ b/bot/chatgpt/chat_gpt_session.py @@ -67,7 +67,7 @@ def num_tokens_from_messages(messages, model): elif model in ["gpt-4-0314", "gpt-4-0613", "gpt-4-32k", "gpt-4-32k-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613", "gpt-35-turbo-16k", "gpt-4-turbo-preview", "gpt-4-1106-preview", const.GPT4_TURBO_PREVIEW, const.GPT4_VISION_PREVIEW, const.GPT4_TURBO_01_25, - const.GPT_4o, const.LINKAI_4o, const.LINKAI_4_TURBO]: + const.GPT_4o, const.GPT_4o_MINI, const.LINKAI_4o, const.LINKAI_4_TURBO]: return num_tokens_from_messages(messages, model="gpt-4") elif model.startswith("claude-3"): return num_tokens_from_messages(messages, model="gpt-3.5-turbo") diff --git a/common/const.py b/common/const.py index 45e72e7..68d3795 100644 --- a/common/const.py +++ b/common/const.py @@ -32,6 +32,7 @@ GPT4_TURBO_11_06 = "gpt-4-1106-preview" GPT4_VISION_PREVIEW = "gpt-4-vision-preview" GPT4 = "gpt-4" +GPT_4o_MINI = "gpt-4o-mini" GPT4_32k = "gpt-4-32k" GPT4_06_13 = "gpt-4-0613" GPT4_32k_06_13 = "gpt-4-32k-0613" @@ -57,7 +58,7 @@ GEMINI_15_PRO = "gemini-1.5-pro" MODEL_LIST = [ GPT35, GPT35_0125, GPT35_1106, "gpt-3.5-turbo-16k", - GPT_4o, GPT4_TURBO, GPT4_TURBO_PREVIEW, GPT4_TURBO_01_25, GPT4_TURBO_11_06, GPT4, GPT4_32k, GPT4_06_13, GPT4_32k_06_13, + GPT_4o, GPT_4o_MINI, GPT4_TURBO, GPT4_TURBO_PREVIEW, GPT4_TURBO_01_25, GPT4_TURBO_11_06, GPT4, GPT4_32k, GPT4_06_13, GPT4_32k_06_13, WEN_XIN, WEN_XIN_4, XUNFEI, ZHIPU_AI, MOONSHOT, MiniMax, GEMINI, GEMINI_PRO, GEMINI_15_flash, GEMINI_15_PRO, diff --git a/config.py b/config.py index 26f073d..a246ce3 100644 --- a/config.py +++ b/config.py @@ -17,7 +17,7 @@ available_setting = { "open_ai_api_base": "https://api.openai.com/v1", "proxy": "", # openai使用的代理 # chatgpt模型, 当use_azure_chatgpt为true时,其名称为Azure上model deployment名称 - "model": "gpt-3.5-turbo", # 可选择: gpt-4o, gpt-4-turbo, claude-3-sonnet, wenxin, moonshot, qwen-turbo, xunfei, glm-4, minimax, gemini等模型,全部可选模型详见common/const.py文件 + "model": "gpt-3.5-turbo", # 可选择: gpt-4o, pt-4o-mini, gpt-4-turbo, claude-3-sonnet, wenxin, moonshot, qwen-turbo, xunfei, glm-4, minimax, gemini等模型,全部可选模型详见common/const.py文件 "bot_type": "", # 可选配置,使用兼容openai格式的三方服务时候,需填"chatGPT"。bot具体名称详见common/const.py文件列出的bot_type,如不填根据model名称判断, "use_azure_chatgpt": False, # 是否使用azure的chatgpt "azure_deployment_id": "", # azure 模型部署名称