import requests import json import plugins from bridge.reply import Reply, ReplyType from bridge.context import ContextType from channel.chat_message import ChatMessage from plugins import * from common.log import logger from common.expired_dict import ExpiredDict import os import base64 from pathlib import Path from PIL import Image import oss2 EXTENSION_TO_TYPE = { 'pdf': 'pdf', 'doc': 'docx', 'docx': 'docx', 'md': 'md', 'txt': 'txt', 'xls': 'excel', 'xlsx': 'excel', 'csv': 'csv', 'html': 'html', 'htm': 'html', 'ppt': 'ppt', 'pptx': 'ppt' } @plugins.register( name="kimi4upload", desire_priority=-1, desc="A plugin for upload", version="0.0.01", author="", ) class file4upload(Plugin): def __init__(self): super().__init__() try: curdir = os.path.dirname(__file__) config_path = os.path.join(curdir, "config.json") if os.path.exists(config_path): with open(config_path, "r", encoding="utf-8") as f: self.config = json.load(f) else: # 使用父类的方法来加载配置 self.config = super().load_config() if not self.config: raise Exception("config.json not found") # 设置事件处理函数 self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context self.params_cache = ExpiredDict(300) # 从配置中提取所需的设置 self.keys = self.config.get("keys", {}) self.url_sum = self.config.get("url_sum", {}) self.search_sum = self.config.get("search_sum", {}) self.file_sum = self.config.get("file_sum", {}) self.image_sum = self.config.get("image_sum", {}) self.note = self.config.get("note", {}) self.sum4all_key = self.keys.get("sum4all_key", "") self.search1api_key = self.keys.get("search1api_key", "") self.gemini_key = self.keys.get("gemini_key", "") self.bibigpt_key = self.keys.get("bibigpt_key", "") self.outputLanguage = self.keys.get("outputLanguage", "zh-CN") self.opensum_key = self.keys.get("opensum_key", "") self.open_ai_api_key = self.keys.get("open_ai_api_key", "") self.model = self.keys.get("model", "gpt-3.5-turbo") self.open_ai_api_base = self.keys.get("open_ai_api_base", "https://api.openai.com/v1") self.xunfei_app_id = self.keys.get("xunfei_app_id", "") self.xunfei_api_key = self.keys.get("xunfei_api_key", "") self.xunfei_api_secret = self.keys.get("xunfei_api_secret", "") self.perplexity_key = self.keys.get("perplexity_key", "") self.flomo_key = self.keys.get("flomo_key", "") # 之前提示 self.previous_prompt='' self.file_sum_enabled = self.file_sum.get("enabled", False) self.file_sum_service = self.file_sum.get("service", "") self.max_file_size = self.file_sum.get("max_file_size", 15000) self.file_sum_group = self.file_sum.get("group", True) self.file_sum_qa_prefix = self.file_sum.get("qa_prefix", "问") self.file_sum_prompt = self.file_sum.get("prompt", "") self.image_sum_enabled = self.image_sum.get("enabled", False) self.image_sum_service = self.image_sum.get("service", "") self.image_sum_group = self.image_sum.get("group", True) self.image_sum_qa_prefix = self.image_sum.get("qa_prefix", "问") self.image_sum_prompt = self.image_sum.get("prompt", "") # 初始化成功日志 logger.info("[file4upload] inited.") except Exception as e: # 初始化失败日志 logger.warn(f"file4upload init failed: {e}") def on_handle_context(self, e_context: EventContext): context = e_context["context"] if context.type not in [ContextType.TEXT, ContextType.SHARING,ContextType.FILE,ContextType.IMAGE]: return msg: ChatMessage = e_context["context"]["msg"] user_id = msg.from_user_id content = context.content isgroup = e_context["context"].get("isgroup", False) # logger.info(f"user_id:{user_id},content:{content},isgroup:{isgroup}") # logger.info(f'上下文参数缓存键字典:{self.params_cache.keys}') # logger.info(f'user_id in self.params_cache: {user_id in self.params_cache}') # 上次提示 if context.type == ContextType.TEXT: self.previous_prompt=msg.content if isgroup and not self.file_sum_group: # 群聊中忽略处理文件 logger.info("群聊消息,文件处理功能已禁用") return logger.info("on_handle_context: 处理上下文开始") context.get("msg").prepare() # file_path = context.content # logger.info(f"on_handle_context: 获取到文件路径 {file_path}") api_key='sk-5z2L4zy9T1w90j6e3T90ANZdyN2zLWClRwFnBzWgzdrG4onx' if context.type == ContextType.IMAGE: file_path = context.content logger.info(f"on_handle_context: 获取到文件路径 {file_path}") print(f'处理首次上次的图片,准备抽取文字') file_content=self.extract_content_by_llm(file_path,api_key) self.params_cache[user_id] = {} if file_content is not None: logger.info('图片中抽取文字,使用使用图片的文字请求LLM') messages = [{ "role": "system", "content": "你是 Kimi,由 Moonshot AI 提供的人工智能助手,你更擅长中文和英文的对话。你会为用户提供安全,有帮助,准确的回答。同时,你会拒绝一切涉及恐怖主义,种族歧视,黄色暴力等问题的回答。Moonshot AI 为专有名词,不可翻译成其他语言。", },{ "role": "system", "content": file_content, }] self.params_cache[user_id]['last_word_messages']=messages self.params_cache[user_id]['last_image_oss']=None else: logger.info('不能抽取文字,使用图片oss请求LLM') # logger.info(f"on_handle_context: 获取到图片路径 {file_path}") # base64_image=self.encode_image_to_base64(file_path) # self.params_cache[user_id]['last_image_oss']=base64_image # self.params_cache[user_id]['last_word_messages']=None access_key_id = 'LTAI5tRTG6pLhTpKACJYoPR5' access_key_secret = 'E7dMzeeMxq4VQvLg7Tq7uKf3XWpYfN' # OSS区域对应的Endpoint endpoint = 'http://oss-cn-shanghai.aliyuncs.com' # 根据你的区域选择 # Bucket名称 bucket_name = 'cow-agent' local_file_path=file_path oss_file_name=f'cow/{os.path.basename(file_path)}' logger.info(f'oss_file_name:{oss_file_name}\n local_file_path :{local_file_path}') file_url = upload_oss(access_key_id, access_key_secret, endpoint, bucket_name, local_file_path, oss_file_name) logger.info(f'写入图片缓存oss 地址{file_url}') self.params_cache[user_id]['last_image_oss']=file_url self.params_cache[user_id]['last_word_messages']=None if self.previous_prompt == '': reply = Reply() reply.type = ReplyType.TEXT # reply.content = f"{remove_markdown(reply_content)}\n\n💬5min内输入{self.file_sum_qa_prefix}+问题,可继续追问" reply.content = f"您刚刚上传了一张图片,请问我有什么可以帮您的呢?" e_context["reply"] = reply e_context.action = EventAction.BREAK return else: e_context.action = EventAction.CONTINUE if context.type == ContextType.FILE: file_path = context.content logger.info(f"on_handle_context: 获取到文件路径 {file_path}") print(f'处理首次上次的文件') file_content=self.extract_content_by_llm(file_path,api_key) if file_content is not None: self.params_cache[user_id] = {} messages = [{ "role": "system", "content": "你是 Kimi,由 Moonshot AI 提供的人工智能助手,你更擅长中文和英文的对话。你会为用户提供安全,有帮助,准确的回答。同时,你会拒绝一切涉及恐怖主义,种族歧视,黄色暴力等问题的回答。Moonshot AI 为专有名词,不可翻译成其他语言。", },{ "role": "system", "content": file_content, }] self.params_cache[user_id]['last_word_messages']=messages self.params_cache[user_id]['last_image_oss']=None if self.previous_prompt == '': reply = Reply() reply.type = ReplyType.TEXT # reply.content = f"{remove_markdown(reply_content)}\n\n💬5min内输入{self.file_sum_qa_prefix}+问题,可继续追问" reply.content = f"您刚刚上传了一个文件,请问我有什么可以帮您的呢?如可以问“请总结分析这份报告文件,同时,提供治疗和健康建议。”" e_context["reply"] = reply e_context.action = EventAction.BREAK return else: e_context.action = EventAction.CONTINUE # if user_id in self.params_cache and (self.params_cache[user_id]['last_word_messages']!=None): if user_id in self.params_cache: if 'last_word_messages' in self.params_cache[user_id] and self.params_cache[user_id]['last_word_messages'] is not None: print(f'缓存处理已经上传的文件') # last_word_messages=self.params_cache[user_id]['last_word_messages'] # cache_messages=last_word_messages[:2] cache_messages=self.params_cache[user_id]['last_word_messages'] messages = [ *cache_messages, { "role": "user", "content": self.previous_prompt ,#msg.content, }, ] self.handle_file_upload(messages, e_context) # if user_id in self.params_cache and ('last_image_oss' in self.params_cache[user_id] or self.params_cache[user_id]['last_image_oss']!=None): if user_id in self.params_cache: if 'last_image_oss' in self.params_cache[user_id] and self.params_cache[user_id]['last_image_oss'] is not None: print(f'缓存处理已经oss图片的文件') file_url=self.params_cache[user_id]['last_image_oss'] messages = [{ "role": "system", "content": "你是一个能能描述任何图片的智能助手", }, { "role": "user", "content": f'{file_url}\n{self.previous_prompt}', }] # messages=[ # { # "role": "user", # "content": [ # { # "type": "image_url", # "image_url": { # "url": f"{file_url}" # } # }, # { # "type": "text", # "text": f"{self.previous_prompt}" # } # ] # } # ] self.handle_images_oos(messages, e_context) def handle_file_upload(self, messages, e_context): logger.info("handle_file: 向LLM发送内容总结请求") msg: ChatMessage = e_context["context"]["msg"] user_id = msg.from_user_id user_params = self.params_cache.get(user_id, {}) prompt = user_params.get('prompt', self.file_sum_prompt) self.params_cache[user_id] = {} try: api_key = "sk-5z2L4zy9T1w90j6e3T90ANZdyN2zLWClRwFnBzWgzdrG4onx" # base_url = "https://api.moonshot.cn/v1", api_url = "https://api.moonshot.cn/v1/chat/completions" headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {api_key}' } data={ "model": "moonshot-v1-128k", "messages":messages, # "temperature": 0.3 } response = requests.post(url=api_url, headers=headers, data=json.dumps(data)) logger.info(f'handle_file_upload: 请求文件内容{json.dumps(messages, ensure_ascii=False)}') response.raise_for_status() response_data = response.json() if "choices" in response_data and len(response_data["choices"]) > 0: first_choice = response_data["choices"][0] if "message" in first_choice and "content" in first_choice["message"]: response_content = first_choice["message"]["content"].strip() # 获取响应内容 reply_content = response_content.replace("\\n", "\n") # 替换 \\n 为 \n # self.params_cache[user_id]['last_word_messages']=messages # if self.params_cache[user_id]['last_word_messages']!=None: # self.params_cache[user_id]['last_word_messages']=messages self.previous_prompt ='' else: logger.error("Content not found in the response") reply_content = "Content not found in the LLM API response" else: logger.error("No choices available in the response") reply_content = "No choices available in the LLM API response" except requests.exceptions.RequestException as e: logger.error(f"Error calling LLM API: {e}") reply_content = f"An error occurred while calling LLM API" reply = Reply() reply.type = ReplyType.TEXT reply.content = f"{remove_markdown(reply_content)}" e_context["reply"] = reply e_context.action = EventAction.BREAK_PASS def handle_images_base64(self, messages, e_context): logger.info("handle_file: 向LLM发送内容总结请求") msg: ChatMessage = e_context["context"]["msg"] user_id = msg.from_user_id user_params = self.params_cache.get(user_id, {}) prompt = user_params.get('prompt', self.file_sum_prompt) try: # api_key = "sk-5z2L4zy9T1w90j6e3T90ANZdyN2zLWClRwFnBzWgzdrG4onx" # # base_url = "https://api.moonshot.cn/v1", # api_url = "https://api.moonshot.cn/v1/chat/completions" # api_key = "sk-5dyg7PMUNeoSqHH807453eB06f434c34Ba6fB4764aC8358c" # api_url = "http://106.15.182.218:3001/v1/chat/completions" # headers = { # 'Content-Type': 'application/json', # 'Authorization': f'Bearer {api_key}' # } # data={ # "model": "moonshot-v1-128k", # "messages":messages, # # "temperature": 0.3 # } # response = requests.post(url=api_url, headers=headers, json=data) base64_image=self.encode_image_to_base64('tmp/240926-164856.png') api_key = self.open_ai_api_key api_base = f"{self.open_ai_api_base}/chat/completions" logger.info(api_base) payload = { "model": "moonshot-v1-128k", "messages": [ { "role": "user", "content": [ { "type": "text", "text": self.previous_prompt }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], "max_tokens": 3000 } # payload = { # "model": "moonshot-v1-128k", # "messages": messages, # "max_tokens": 3000 # } headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } logger.info('开始') response = requests.post(api_base, headers=headers, json=payload) # logger.info(f'handle_file_upload: 请求文件内容{json.dumps(messages, ensure_ascii=False)}') response.raise_for_status() response_data = response.json() if "choices" in response_data and len(response_data["choices"]) > 0: first_choice = response_data["choices"][0] if "message" in first_choice and "content" in first_choice["message"]: response_content = first_choice["message"]["content"].strip() # 获取响应内容 reply_content = response_content.replace("\\n", "\n") # 替换 \\n 为 \n # self.params_cache[user_id]['last_word_messages']=messages # if self.params_cache[user_id]['last_word_messages']!=None: # self.params_cache[user_id]['last_word_messages']=messages self.previous_prompt ='' else: logger.error("Content not found in the response") reply_content = "Content not found in the LLM API response" else: logger.error("No choices available in the response") reply_content = "No choices available in the LLM API response" except requests.exceptions.RequestException as e: logger.error(f"Error calling LLM API: {e}") reply_content = f"An error occurred while calling LLM API" reply = Reply() reply.type = ReplyType.TEXT reply.content = f"{remove_markdown(reply_content)}" e_context["reply"] = reply e_context.action = EventAction.BREAK_PASS def handle_images_oos(self, messages, e_context): logger.info("handle_file: 向LLM发送内容总结请求") msg: ChatMessage = e_context["context"]["msg"] user_id = msg.from_user_id user_params = self.params_cache.get(user_id, {}) prompt = user_params.get('prompt', self.file_sum_prompt) self.params_cache[user_id] = {} try: api_key = self.open_ai_api_key api_base = f"{self.open_ai_api_base}/chat/completions" logger.info(api_base) payload = { "model": "7374349217580056592", "messages":messages, "max_tokens": 3000 } # payload = { # "model": "moonshot-v1-128k", # "messages": messages, # "max_tokens": 3000 # } headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } # logger.info('开始') response = requests.post(api_base, headers=headers, json=payload) logger.info(f'handle_file_upload: 请求文件内容{json.dumps(messages, ensure_ascii=False)}') response.raise_for_status() response_data = response.json() if "choices" in response_data and len(response_data["choices"]) > 0: first_choice = response_data["choices"][0] if "message" in first_choice and "content" in first_choice["message"]: response_content = first_choice["message"]["content"].strip() # 获取响应内容 reply_content = response_content.replace("\\n", "\n") # 替换 \\n 为 \n # self.params_cache[user_id]['last_word_messages']=messages # if self.params_cache[user_id]['last_word_messages']!=None: # self.params_cache[user_id]['last_word_messages']=messages self.previous_prompt ='' else: logger.error("Content not found in the response") reply_content = "Content not found in the LLM API response" else: logger.error("No choices available in the response") reply_content = "No choices available in the LLM API response" except requests.exceptions.RequestException as e: logger.error(f"Error calling LLM API: {e}") reply_content = f"An error occurred while calling LLM API" reply = Reply() reply.type = ReplyType.TEXT reply.content = f"{remove_markdown(reply_content)}" e_context["reply"] = reply e_context.action = EventAction.BREAK_PASS def extract_content_by_llm(self, file_path: str, api_key: str) -> str: logger.info(f'大模型开始抽取文字') try: headers = { 'Authorization': f'Bearer {api_key}' } data = { 'purpose': 'file-extract', } file_name=os.path.basename(file_path) files = { 'file': (file_name, open(Path(file_path), 'rb')), } print(files) api_url='https://api.moonshot.cn/v1/files' response = requests.post(api_url, headers=headers, files=files, data=data) # print(response.text) response_data = response.json() file_id = response_data.get('id') # print(f'文件id:{file_id}') response=requests.get(url=f"https://api.moonshot.cn/v1/files/{file_id}/content", headers=headers) print(response.text) response_data = response.json() content = response_data.get('content') return content except requests.exceptions.RequestException as e: logger.error(f"Error calling LLM API: {e}") return None def encode_image_to_base64(self, image_path): logger.info(f"开始处理图片: {image_path}") try: with Image.open(image_path) as img: logger.info(f"成功打开图片. 原始大小: {img.size}") if img.width > 1024: new_size = (1024, int(img.height*1024/img.width)) img = img.resize(new_size) img.save(image_path) # 保存调整大小后的图片 logger.info(f"调整图片大小至: {new_size}") with open(image_path, "rb") as image_file: img_byte_arr = image_file.read() logger.info(f"读取图片完成. 大小: {len(img_byte_arr)} 字节") encoded = base64.b64encode(img_byte_arr).decode('ascii') logger.info(f"Base64编码完成. 编码后长度: {len(encoded)}") return encoded except Exception as e: logger.error(f"图片编码过程中发生错误: {str(e)}", exc_info=True) raise def remove_markdown(text): # 替换Markdown的粗体标记 text = text.replace("**", "") # 替换Markdown的标题标记 text = text.replace("### ", "").replace("## ", "").replace("# ", "") return text def upload_oss(access_key_id, access_key_secret, endpoint, bucket_name, local_file_path, oss_file_name, expiration_days=7): """ 上传文件到阿里云OSS并设置生命周期规则,同时返回文件的公共访问地址。 :param access_key_id: 阿里云AccessKey ID :param access_key_secret: 阿里云AccessKey Secret :param endpoint: OSS区域对应的Endpoint :param bucket_name: OSS中的Bucket名称 :param local_file_path: 本地文件路径 :param oss_file_name: OSS中的文件存储路径 :param expiration_days: 文件保存天数,默认7天后删除 :return: 文件的公共访问地址 """ # 创建Bucket实例 auth = oss2.Auth(access_key_id, access_key_secret) bucket = oss2.Bucket(auth, endpoint, bucket_name) ### 1. 设置生命周期规则 ### rule_id = f'delete_after_{expiration_days}_days' # 规则ID prefix = oss_file_name.split('/')[0] + '/' # 设置规则应用的前缀为文件所在目录 # 定义生命周期规则 rule = oss2.models.LifecycleRule(rule_id, prefix, status=oss2.models.LifecycleRule.ENABLED, expiration=oss2.models.LifecycleExpiration(days=expiration_days)) # 设置Bucket的生命周期 lifecycle = oss2.models.BucketLifecycle([rule]) bucket.put_bucket_lifecycle(lifecycle) print(f"已设置生命周期规则:文件将在{expiration_days}天后自动删除") ### 2. 上传文件到OSS ### bucket.put_object_from_file(oss_file_name, local_file_path) ### 3. 构建公共访问URL ### file_url = f"http://{bucket_name}.{endpoint.replace('http://', '')}/{oss_file_name}" print(f"文件上传成功,公共访问地址:{file_url}") return file_url