|
- import requests
- import json
- import plugins
- from bridge.reply import Reply, ReplyType
- from bridge.context import ContextType
- from channel.chat_message import ChatMessage
- from plugins import *
- from common.log import logger
- from common.expired_dict import ExpiredDict
- import os
- import base64
- from pathlib import Path
- from PIL import Image
- import oss2
-
-
- EXTENSION_TO_TYPE = {
- 'pdf': 'pdf',
- 'doc': 'docx', 'docx': 'docx',
- 'md': 'md',
- 'txt': 'txt',
- 'xls': 'excel', 'xlsx': 'excel',
- 'csv': 'csv',
- 'html': 'html', 'htm': 'html',
- 'ppt': 'ppt', 'pptx': 'ppt'
- }
-
- @plugins.register(
- name="kimi4upload",
- desire_priority=-1,
- desc="A plugin for upload",
- version="0.0.01",
- author="",
- )
-
-
- class file4upload(Plugin):
- def __init__(self):
- super().__init__()
- try:
- curdir = os.path.dirname(__file__)
- config_path = os.path.join(curdir, "config.json")
- if os.path.exists(config_path):
- with open(config_path, "r", encoding="utf-8") as f:
- self.config = json.load(f)
- else:
- # 使用父类的方法来加载配置
- self.config = super().load_config()
-
- if not self.config:
- raise Exception("config.json not found")
- # 设置事件处理函数
- self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
- self.params_cache = ExpiredDict(300)
-
- # 从配置中提取所需的设置
- self.keys = self.config.get("keys", {})
- self.url_sum = self.config.get("url_sum", {})
- self.search_sum = self.config.get("search_sum", {})
- self.file_sum = self.config.get("file_sum", {})
- self.image_sum = self.config.get("image_sum", {})
- self.note = self.config.get("note", {})
-
- self.sum4all_key = self.keys.get("sum4all_key", "")
- self.search1api_key = self.keys.get("search1api_key", "")
- self.gemini_key = self.keys.get("gemini_key", "")
- self.bibigpt_key = self.keys.get("bibigpt_key", "")
- self.outputLanguage = self.keys.get("outputLanguage", "zh-CN")
- self.opensum_key = self.keys.get("opensum_key", "")
- self.open_ai_api_key = self.keys.get("open_ai_api_key", "")
- self.model = self.keys.get("model", "gpt-3.5-turbo")
- self.open_ai_api_base = self.keys.get("open_ai_api_base", "https://api.openai.com/v1")
- self.xunfei_app_id = self.keys.get("xunfei_app_id", "")
- self.xunfei_api_key = self.keys.get("xunfei_api_key", "")
- self.xunfei_api_secret = self.keys.get("xunfei_api_secret", "")
- self.perplexity_key = self.keys.get("perplexity_key", "")
- self.flomo_key = self.keys.get("flomo_key", "")
- # 之前提示
- self.previous_prompt=''
-
- self.file_sum_enabled = self.file_sum.get("enabled", False)
- self.file_sum_service = self.file_sum.get("service", "")
- self.max_file_size = self.file_sum.get("max_file_size", 15000)
- self.file_sum_group = self.file_sum.get("group", True)
- self.file_sum_qa_prefix = self.file_sum.get("qa_prefix", "问")
- self.file_sum_prompt = self.file_sum.get("prompt", "")
-
- self.image_sum_enabled = self.image_sum.get("enabled", False)
- self.image_sum_service = self.image_sum.get("service", "")
- self.image_sum_group = self.image_sum.get("group", True)
- self.image_sum_qa_prefix = self.image_sum.get("qa_prefix", "问")
- self.image_sum_prompt = self.image_sum.get("prompt", "")
-
- # 初始化成功日志
- logger.info("[file4upload] inited.")
- except Exception as e:
- # 初始化失败日志
- logger.warn(f"file4upload init failed: {e}")
-
- def on_handle_context(self, e_context: EventContext):
- context = e_context["context"]
- if context.type not in [ContextType.TEXT, ContextType.SHARING,ContextType.FILE,ContextType.IMAGE]:
- return
- msg: ChatMessage = e_context["context"]["msg"]
- user_id = msg.from_user_id
- content = context.content
- isgroup = e_context["context"].get("isgroup", False)
- # logger.info(f"user_id:{user_id},content:{content},isgroup:{isgroup}")
- # logger.info(f'上下文参数缓存键字典:{self.params_cache.keys}')
- # logger.info(f'user_id in self.params_cache: {user_id in self.params_cache}')
- # 上次提示
- if context.type == ContextType.TEXT:
- self.previous_prompt=msg.content
-
- if isgroup and not self.file_sum_group:
- # 群聊中忽略处理文件
- logger.info("群聊消息,文件处理功能已禁用")
- return
- logger.info("on_handle_context: 处理上下文开始")
- context.get("msg").prepare()
- # file_path = context.content
- # logger.info(f"on_handle_context: 获取到文件路径 {file_path}")
-
- api_key='sk-5z2L4zy9T1w90j6e3T90ANZdyN2zLWClRwFnBzWgzdrG4onx'
-
- if context.type == ContextType.IMAGE:
- file_path = context.content
- logger.info(f"on_handle_context: 获取到文件路径 {file_path}")
- print(f'处理首次上次的图片,准备抽取文字')
- file_content=self.extract_content_by_llm(file_path,api_key)
- self.params_cache[user_id] = {}
- if file_content is not None:
- logger.info('图片中抽取文字,使用使用图片的文字请求LLM')
-
- messages = [{
- "role": "system",
- "content": "你是 Kimi,由 Moonshot AI 提供的人工智能助手,你更擅长中文和英文的对话。你会为用户提供安全,有帮助,准确的回答。同时,你会拒绝一切涉及恐怖主义,种族歧视,黄色暴力等问题的回答。Moonshot AI 为专有名词,不可翻译成其他语言。",
- },{
- "role": "system",
- "content": file_content,
- }]
- self.params_cache[user_id]['last_word_messages']=messages
- self.params_cache[user_id]['last_image_oss']=None
- else:
- logger.info('不能抽取文字,使用图片oss请求LLM')
- # logger.info(f"on_handle_context: 获取到图片路径 {file_path}")
- # base64_image=self.encode_image_to_base64(file_path)
-
- # self.params_cache[user_id]['last_image_oss']=base64_image
- # self.params_cache[user_id]['last_word_messages']=None
- access_key_id = 'LTAI5tRTG6pLhTpKACJYoPR5'
- access_key_secret = 'E7dMzeeMxq4VQvLg7Tq7uKf3XWpYfN'
- # OSS区域对应的Endpoint
- endpoint = 'http://oss-cn-shanghai.aliyuncs.com' # 根据你的区域选择
- # Bucket名称
- bucket_name = 'cow-agent'
- local_file_path=file_path
- oss_file_name=f'cow/{os.path.basename(file_path)}'
- logger.info(f'oss_file_name:{oss_file_name}\n local_file_path :{local_file_path}')
- file_url = upload_oss(access_key_id, access_key_secret, endpoint, bucket_name, local_file_path, oss_file_name)
- logger.info(f'写入图片缓存oss 地址{file_url}')
- self.params_cache[user_id]['last_image_oss']=file_url
- self.params_cache[user_id]['last_word_messages']=None
-
-
- if self.previous_prompt == '':
- reply = Reply()
- reply.type = ReplyType.TEXT
- # reply.content = f"{remove_markdown(reply_content)}\n\n💬5min内输入{self.file_sum_qa_prefix}+问题,可继续追问"
- reply.content = f"您刚刚上传了一张图片,请问我有什么可以帮您的呢?"
- e_context["reply"] = reply
- e_context.action = EventAction.BREAK
- return
- else:
- e_context.action = EventAction.CONTINUE
-
- if context.type == ContextType.FILE:
- file_path = context.content
- logger.info(f"on_handle_context: 获取到文件路径 {file_path}")
- print(f'处理首次上次的文件')
- file_content=self.extract_content_by_llm(file_path,api_key)
- if file_content is not None:
- self.params_cache[user_id] = {}
- messages = [{
- "role": "system",
- "content": "你是 Kimi,由 Moonshot AI 提供的人工智能助手,你更擅长中文和英文的对话。你会为用户提供安全,有帮助,准确的回答。同时,你会拒绝一切涉及恐怖主义,种族歧视,黄色暴力等问题的回答。Moonshot AI 为专有名词,不可翻译成其他语言。",
- },{
- "role": "system",
- "content": file_content,
- }]
- self.params_cache[user_id]['last_word_messages']=messages
- self.params_cache[user_id]['last_image_oss']=None
-
- if self.previous_prompt == '':
- reply = Reply()
- reply.type = ReplyType.TEXT
- # reply.content = f"{remove_markdown(reply_content)}\n\n💬5min内输入{self.file_sum_qa_prefix}+问题,可继续追问"
- reply.content = f"您刚刚上传了一个文件,请问我有什么可以帮您的呢?如可以问“请总结分析这份报告文件,同时,提供治疗和健康建议。”"
- e_context["reply"] = reply
- e_context.action = EventAction.BREAK
- return
- else:
- e_context.action = EventAction.CONTINUE
-
- # if user_id in self.params_cache and (self.params_cache[user_id]['last_word_messages']!=None):
- if user_id in self.params_cache:
- if 'last_word_messages' in self.params_cache[user_id] and self.params_cache[user_id]['last_word_messages'] is not None:
- print(f'缓存处理已经上传的文件')
- # last_word_messages=self.params_cache[user_id]['last_word_messages']
- # cache_messages=last_word_messages[:2]
- cache_messages=self.params_cache[user_id]['last_word_messages']
- messages = [
- *cache_messages,
- {
- "role": "user",
- "content": self.previous_prompt ,#msg.content,
- },
- ]
- self.handle_file_upload(messages, e_context)
-
- # if user_id in self.params_cache and ('last_image_oss' in self.params_cache[user_id] or self.params_cache[user_id]['last_image_oss']!=None):
- if user_id in self.params_cache:
- if 'last_image_oss' in self.params_cache[user_id] and self.params_cache[user_id]['last_image_oss'] is not None:
- print(f'缓存处理已经oss图片的文件')
- file_url=self.params_cache[user_id]['last_image_oss']
- messages = [{
- "role": "system",
- "content": "你是一个能能描述任何图片的智能助手",
- },
- {
- "role": "user",
- "content": f'{file_url}\n{self.previous_prompt}',
- }]
- # messages=[
- # {
- # "role": "user",
- # "content": [
- # {
- # "type": "image_url",
- # "image_url": {
- # "url": f"{file_url}"
- # }
- # },
- # {
- # "type": "text",
- # "text": f"{self.previous_prompt}"
- # }
- # ]
- # }
- # ]
-
-
- self.handle_images_oos(messages, e_context)
-
- def handle_file_upload(self, messages, e_context):
- logger.info("handle_file: 向LLM发送内容总结请求")
-
- msg: ChatMessage = e_context["context"]["msg"]
- user_id = msg.from_user_id
- user_params = self.params_cache.get(user_id, {})
- prompt = user_params.get('prompt', self.file_sum_prompt)
-
- self.params_cache[user_id] = {}
-
-
- try:
- api_key = "sk-5z2L4zy9T1w90j6e3T90ANZdyN2zLWClRwFnBzWgzdrG4onx"
- # base_url = "https://api.moonshot.cn/v1",
- api_url = "https://api.moonshot.cn/v1/chat/completions"
- headers = {
- 'Content-Type': 'application/json',
- 'Authorization': f'Bearer {api_key}'
- }
-
- data={
- "model": "moonshot-v1-128k",
- "messages":messages,
- # "temperature": 0.3
- }
- response = requests.post(url=api_url, headers=headers, data=json.dumps(data))
- logger.info(f'handle_file_upload: 请求文件内容{json.dumps(messages, ensure_ascii=False)}')
- response.raise_for_status()
- response_data = response.json()
- if "choices" in response_data and len(response_data["choices"]) > 0:
- first_choice = response_data["choices"][0]
- if "message" in first_choice and "content" in first_choice["message"]:
- response_content = first_choice["message"]["content"].strip() # 获取响应内容
- reply_content = response_content.replace("\\n", "\n") # 替换 \\n 为 \n
- # self.params_cache[user_id]['last_word_messages']=messages
- # if self.params_cache[user_id]['last_word_messages']!=None:
- # self.params_cache[user_id]['last_word_messages']=messages
-
- self.previous_prompt =''
- else:
- logger.error("Content not found in the response")
- reply_content = "Content not found in the LLM API response"
- else:
- logger.error("No choices available in the response")
- reply_content = "No choices available in the LLM API response"
- except requests.exceptions.RequestException as e:
- logger.error(f"Error calling LLM API: {e}")
- reply_content = f"An error occurred while calling LLM API"
-
- reply = Reply()
- reply.type = ReplyType.TEXT
- reply.content = f"{remove_markdown(reply_content)}"
- e_context["reply"] = reply
- e_context.action = EventAction.BREAK_PASS
-
-
- def handle_images_base64(self, messages, e_context):
- logger.info("handle_file: 向LLM发送内容总结请求")
-
- msg: ChatMessage = e_context["context"]["msg"]
- user_id = msg.from_user_id
- user_params = self.params_cache.get(user_id, {})
- prompt = user_params.get('prompt', self.file_sum_prompt)
-
-
-
-
- try:
- # api_key = "sk-5z2L4zy9T1w90j6e3T90ANZdyN2zLWClRwFnBzWgzdrG4onx"
- # # base_url = "https://api.moonshot.cn/v1",
- # api_url = "https://api.moonshot.cn/v1/chat/completions"
-
-
-
- # api_key = "sk-5dyg7PMUNeoSqHH807453eB06f434c34Ba6fB4764aC8358c"
- # api_url = "http://106.15.182.218:3001/v1/chat/completions"
- # headers = {
- # 'Content-Type': 'application/json',
- # 'Authorization': f'Bearer {api_key}'
- # }
-
- # data={
- # "model": "moonshot-v1-128k",
- # "messages":messages,
- # # "temperature": 0.3
- # }
- # response = requests.post(url=api_url, headers=headers, json=data)
-
- base64_image=self.encode_image_to_base64('tmp/240926-164856.png')
- api_key = self.open_ai_api_key
- api_base = f"{self.open_ai_api_base}/chat/completions"
- logger.info(api_base)
- payload = {
- "model": "moonshot-v1-128k",
- "messages": [
- {
- "role": "user",
- "content": [
- {
- "type": "text",
- "text": self.previous_prompt
- },
- {
- "type": "image_url",
- "image_url": {
- "url": f"data:image/jpeg;base64,{base64_image}"
- }
- }
- ]
- }
- ],
- "max_tokens": 3000
- }
-
- # payload = {
- # "model": "moonshot-v1-128k",
- # "messages": messages,
- # "max_tokens": 3000
- # }
- headers = {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {api_key}"
- }
- logger.info('开始')
- response = requests.post(api_base, headers=headers, json=payload)
-
-
- # logger.info(f'handle_file_upload: 请求文件内容{json.dumps(messages, ensure_ascii=False)}')
- response.raise_for_status()
- response_data = response.json()
- if "choices" in response_data and len(response_data["choices"]) > 0:
- first_choice = response_data["choices"][0]
- if "message" in first_choice and "content" in first_choice["message"]:
- response_content = first_choice["message"]["content"].strip() # 获取响应内容
- reply_content = response_content.replace("\\n", "\n") # 替换 \\n 为 \n
- # self.params_cache[user_id]['last_word_messages']=messages
- # if self.params_cache[user_id]['last_word_messages']!=None:
- # self.params_cache[user_id]['last_word_messages']=messages
-
- self.previous_prompt =''
- else:
- logger.error("Content not found in the response")
- reply_content = "Content not found in the LLM API response"
- else:
- logger.error("No choices available in the response")
- reply_content = "No choices available in the LLM API response"
- except requests.exceptions.RequestException as e:
- logger.error(f"Error calling LLM API: {e}")
- reply_content = f"An error occurred while calling LLM API"
-
- reply = Reply()
- reply.type = ReplyType.TEXT
- reply.content = f"{remove_markdown(reply_content)}"
- e_context["reply"] = reply
- e_context.action = EventAction.BREAK_PASS
-
- def handle_images_oos(self, messages, e_context):
- logger.info("handle_file: 向LLM发送内容总结请求")
-
- msg: ChatMessage = e_context["context"]["msg"]
- user_id = msg.from_user_id
- user_params = self.params_cache.get(user_id, {})
- prompt = user_params.get('prompt', self.file_sum_prompt)
-
- self.params_cache[user_id] = {}
-
-
- try:
- api_key = self.open_ai_api_key
- api_base = f"{self.open_ai_api_base}/chat/completions"
- logger.info(api_base)
- payload = {
- "model": "7374349217580056592",
- "messages":messages,
- "max_tokens": 3000
- }
-
- # payload = {
- # "model": "moonshot-v1-128k",
- # "messages": messages,
- # "max_tokens": 3000
- # }
- headers = {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {api_key}"
- }
- # logger.info('开始')
- response = requests.post(api_base, headers=headers, json=payload)
- logger.info(f'handle_file_upload: 请求文件内容{json.dumps(messages, ensure_ascii=False)}')
- response.raise_for_status()
- response_data = response.json()
- if "choices" in response_data and len(response_data["choices"]) > 0:
- first_choice = response_data["choices"][0]
- if "message" in first_choice and "content" in first_choice["message"]:
- response_content = first_choice["message"]["content"].strip() # 获取响应内容
- reply_content = response_content.replace("\\n", "\n") # 替换 \\n 为 \n
- # self.params_cache[user_id]['last_word_messages']=messages
- # if self.params_cache[user_id]['last_word_messages']!=None:
- # self.params_cache[user_id]['last_word_messages']=messages
-
- self.previous_prompt =''
- else:
- logger.error("Content not found in the response")
- reply_content = "Content not found in the LLM API response"
- else:
- logger.error("No choices available in the response")
- reply_content = "No choices available in the LLM API response"
- except requests.exceptions.RequestException as e:
- logger.error(f"Error calling LLM API: {e}")
- reply_content = f"An error occurred while calling LLM API"
-
- reply = Reply()
- reply.type = ReplyType.TEXT
- reply.content = f"{remove_markdown(reply_content)}"
- e_context["reply"] = reply
- e_context.action = EventAction.BREAK_PASS
-
- def extract_content_by_llm(self, file_path: str, api_key: str) -> str:
- logger.info(f'大模型开始抽取文字')
- try:
- headers = {
- 'Authorization': f'Bearer {api_key}'
- }
- data = {
- 'purpose': 'file-extract',
- }
- file_name=os.path.basename(file_path)
- files = {
- 'file': (file_name, open(Path(file_path), 'rb')),
- }
- print(files)
- api_url='https://api.moonshot.cn/v1/files'
- response = requests.post(api_url, headers=headers, files=files, data=data)
- # print(response.text)
- response_data = response.json()
- file_id = response_data.get('id')
- # print(f'文件id:{file_id}')
- response=requests.get(url=f"https://api.moonshot.cn/v1/files/{file_id}/content", headers=headers)
- print(response.text)
- response_data = response.json()
- content = response_data.get('content')
- return content
- except requests.exceptions.RequestException as e:
- logger.error(f"Error calling LLM API: {e}")
- return None
-
- def encode_image_to_base64(self, image_path):
- logger.info(f"开始处理图片: {image_path}")
- try:
- with Image.open(image_path) as img:
- logger.info(f"成功打开图片. 原始大小: {img.size}")
- if img.width > 1024:
- new_size = (1024, int(img.height*1024/img.width))
- img = img.resize(new_size)
- img.save(image_path) # 保存调整大小后的图片
- logger.info(f"调整图片大小至: {new_size}")
-
- with open(image_path, "rb") as image_file:
- img_byte_arr = image_file.read()
- logger.info(f"读取图片完成. 大小: {len(img_byte_arr)} 字节")
-
- encoded = base64.b64encode(img_byte_arr).decode('ascii')
- logger.info(f"Base64编码完成. 编码后长度: {len(encoded)}")
- return encoded
- except Exception as e:
- logger.error(f"图片编码过程中发生错误: {str(e)}", exc_info=True)
- raise
-
- def remove_markdown(text):
- # 替换Markdown的粗体标记
- text = text.replace("**", "")
- # 替换Markdown的标题标记
- text = text.replace("### ", "").replace("## ", "").replace("# ", "")
- return text
-
-
- def upload_oss(access_key_id, access_key_secret, endpoint, bucket_name, local_file_path, oss_file_name, expiration_days=7):
- """
- 上传文件到阿里云OSS并设置生命周期规则,同时返回文件的公共访问地址。
-
- :param access_key_id: 阿里云AccessKey ID
- :param access_key_secret: 阿里云AccessKey Secret
- :param endpoint: OSS区域对应的Endpoint
- :param bucket_name: OSS中的Bucket名称
- :param local_file_path: 本地文件路径
- :param oss_file_name: OSS中的文件存储路径
- :param expiration_days: 文件保存天数,默认7天后删除
- :return: 文件的公共访问地址
- """
-
- # 创建Bucket实例
- auth = oss2.Auth(access_key_id, access_key_secret)
- bucket = oss2.Bucket(auth, endpoint, bucket_name)
-
- ### 1. 设置生命周期规则 ###
- rule_id = f'delete_after_{expiration_days}_days' # 规则ID
- prefix = oss_file_name.split('/')[0] + '/' # 设置规则应用的前缀为文件所在目录
-
- # 定义生命周期规则
- rule = oss2.models.LifecycleRule(rule_id, prefix, status=oss2.models.LifecycleRule.ENABLED,
- expiration=oss2.models.LifecycleExpiration(days=expiration_days))
-
- # 设置Bucket的生命周期
- lifecycle = oss2.models.BucketLifecycle([rule])
- bucket.put_bucket_lifecycle(lifecycle)
-
- print(f"已设置生命周期规则:文件将在{expiration_days}天后自动删除")
-
- ### 2. 上传文件到OSS ###
- bucket.put_object_from_file(oss_file_name, local_file_path)
-
- ### 3. 构建公共访问URL ###
- file_url = f"http://{bucket_name}.{endpoint.replace('http://', '')}/{oss_file_name}"
-
- print(f"文件上传成功,公共访问地址:{file_url}")
-
- return file_url
-
|