No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

431 líneas
18KB

  1. import requests
  2. import json
  3. import plugins
  4. from bridge.reply import Reply, ReplyType
  5. from bridge.context import ContextType
  6. from channel.chat_message import ChatMessage
  7. from plugins import *
  8. from common.log import logger
  9. from common.expired_dict import ExpiredDict
  10. import os
  11. import base64
  12. from pathlib import Path
  13. from PIL import Image
  14. import oss2
  15. from lib import itchat
  16. from lib.itchat.content import *
  17. import re
  18. from bot.session_manager import Session
  19. from bot.session_manager import SessionManager
  20. from bot.chatgpt.chat_gpt_session import ChatGPTSession
  21. from common import kafka_helper
  22. import time
  23. @plugins.register(
  24. name="healthai",
  25. desire_priority=-1,
  26. desc="A plugin for upload",
  27. version="0.0.01",
  28. author="",
  29. )
  30. class healthai(Plugin):
  31. def __init__(self):
  32. super().__init__()
  33. try:
  34. curdir = os.path.dirname(__file__)
  35. config_path = os.path.join(curdir, "config.json")
  36. if os.path.exists(config_path):
  37. with open(config_path, "r", encoding="utf-8") as f:
  38. self.config = json.load(f)
  39. else:
  40. # 使用父类的方法来加载配置
  41. self.config = super().load_config()
  42. if not self.config:
  43. raise Exception("config.json not found")
  44. # 设置事件处理函数
  45. self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
  46. self.params_cache = ExpiredDict(300)
  47. # 从配置中提取所需的设置
  48. self.oss = self.config.get("oss", {})
  49. self.oss_access_key_id=self.oss.get("access_key_id","LTAI5tRTG6pLhTpKACJYoPR5")
  50. self.oss_access_key_secret=self.oss.get("access_key_secret","E7dMzeeMxq4VQvLg7Tq7uKf3XWpYfN")
  51. self.oss_endpoint=self.oss.get("endpoint","http://oss-cn-shanghai.aliyuncs.com")
  52. self.oss_bucket_name=self.oss.get("bucket_name","cow-agent")
  53. # 之前提示
  54. self.previous_prompt=''
  55. self.sessions = SessionManager(ChatGPTSession, model=conf().get("model") or "gpt-3.5-turbo")
  56. # 初始化成功日志
  57. logger.info("[healthai] inited.")
  58. except Exception as e:
  59. # 初始化失败日志
  60. logger.warn(f"healthai init failed: {e}")
  61. def on_handle_context(self, e_context: EventContext):
  62. context = e_context["context"]
  63. if context.type not in [ContextType.TEXT, ContextType.SHARING,ContextType.FILE,ContextType.IMAGE]:
  64. return
  65. msg: ChatMessage = e_context["context"]["msg"]
  66. user_id = msg.from_user_id
  67. content = context.content
  68. isgroup = e_context["context"].get("isgroup", False)
  69. context.get("msg").prepare()
  70. logger.info(f'当前缓存:self.params_cache:{self.params_cache}')
  71. print(f'输入内容:{content}')
  72. print(f'类型:{context.type}')
  73. if user_id not in self.params_cache:
  74. self.params_cache[user_id] = {}
  75. logger.info(f'初始化缓存:{self.params_cache}')
  76. if context.type == ContextType.TEXT and user_id in self.params_cache:
  77. self.params_cache[user_id]['previous_prompt']=content
  78. logger.info(f'上次提示缓存:{self.params_cache}')
  79. # if context.type == ContextType.TEXT and user_id in self.params_cache and contains_keywords(content):
  80. # self.params_cache[user_id]['previous_prompt']=content
  81. # logger.info(f'上次提示缓存:{self.params_cache}')
  82. # session_id = context["session_id"]
  83. # session = self.sessions.session_query(content, session_id)
  84. # print(f'session 消息{session.messages}')
  85. # if 'last_content' not in self.params_cache[user_id]:
  86. # reply = Reply()
  87. # reply.type = ReplyType.TEXT
  88. # reply.content = f"请上传相关报告或图片"
  89. # e_context["reply"] = reply
  90. # e_context.action = EventAction.BREAK_PASS
  91. session_id = context["session_id"]
  92. print(f'会话id:{session_id}')
  93. # friends=itchat.get_friends(update=True)[1:]
  94. # # logger.info(f'好友列表{friends}')
  95. # # 提取所有好友的 NickName
  96. # nicknames = [friend['NickName'] for friend in friends]
  97. # print(nicknames)
  98. # 打印所有 NickName
  99. # for nickname in nicknames:
  100. # print(nickname)
  101. session = self.sessions.build_session(session_id)
  102. print(f'session 消息{session.messages}')
  103. # if context.type == ContextType.TEXT and user_id in self.params_cache and contains_keywords(content):
  104. # self.params_cache[user_id]['previous_prompt']=content
  105. # logger.info(f'上次提示缓存:{self.params_cache}')
  106. # session_id = context["session_id"]
  107. # session = self.sessions.session_query(content, session_id)
  108. # print(f'session 消息{session.messages}')
  109. # if 'last_content' not in self.params_cache[user_id]:
  110. # reply = Reply()
  111. # reply.type = ReplyType.TEXT
  112. # reply.content = f"请上传相关报告或图片"
  113. # e_context["reply"] = reply
  114. # e_context.action = EventAction.BREAK_PASS
  115. if context.type in [ContextType.IMAGE]:
  116. logger.info('处理上传')
  117. file_path = context.content
  118. logger.info(f"on_handle_context: 获取到图片路径 {file_path},{user_id in self.params_cache}")
  119. if user_id in self.params_cache:
  120. if 'previous_prompt' not in self.params_cache[user_id] and not e_context['context']['isgroup']:
  121. reply = Reply()
  122. reply.type = ReplyType.TEXT
  123. reply.content = f"您刚刚上传图片,请问我有什么可以帮您的呢?"
  124. e_context["reply"] = reply
  125. e_context.action = EventAction.BREAK
  126. file_content = upload_oss(self.oss_access_key_id, self.oss_access_key_secret, self.oss_endpoint, self.oss_bucket_name, file_path, f'cow/{os.path.basename(file_path)}')
  127. # 确保 'last_content' 键存在,并且是一个列表
  128. if 'last_content' not in self.params_cache[user_id]:
  129. self.params_cache[user_id]['last_content'] = []
  130. # 添加文件内容到 'urls' 列表
  131. self.params_cache[user_id]['last_content'].append(file_content)
  132. logger.info('删除图片')
  133. os.remove(file_path)
  134. input_content = file_content
  135. input_from_user_nickname = msg.from_user_nickname
  136. input_to_user_nickname = msg.to_user_nickname
  137. input_wx_content_dialogue_message=[{"type": "image_url", "image_url": {"url": input_content}}]
  138. input_message=dialogue_message(input_from_user_nickname,input_to_user_nickname,input_wx_content_dialogue_message)
  139. kafka_helper.kafka_client.produce_message(input_message)
  140. logger.info("发送对话 %s",input_message)
  141. if context.type == ContextType.FILE:
  142. logger.info('处理文件')
  143. file_path = context.content
  144. logger.info(f"on_handle_context: 获取到文件路径 {file_path}")
  145. if user_id in self.params_cache:
  146. if 'previous_prompt' not in self.params_cache[user_id] and not e_context['context']['isgroup']:
  147. reply = Reply()
  148. reply.type = ReplyType.TEXT
  149. reply.content = f"您刚刚上传了一份文件,请问我有什么可以帮您的呢?"
  150. e_context["reply"] = reply
  151. e_context.action = EventAction.BREAK
  152. # else:
  153. print(f'准备抽取文字')
  154. file_content=extract_content_by_llm(file_path,"sk-5z2L4zy9T1w90j6e3T90ANZdyN2zLWClRwFnBzWgzdrG4onx")
  155. if file_content is None:
  156. reply = Reply()
  157. reply.type = ReplyType.TEXT
  158. reply.content = f"不能处理这份文件"
  159. e_context["reply"] = reply
  160. e_context.action = EventAction.BREAK
  161. return
  162. else:
  163. self.params_cache[user_id]['last_content']=file_content
  164. logger.info('删除文件')
  165. os.remove(file_path)
  166. # input_content = file_content
  167. # input_from_user_nickname = msg.from_user_nickname
  168. # input_to_user_nickname = msg.to_user_nickname
  169. # input_wx_content_dialogue_message=[{"type": "image_url", "image_url": {"url": input_content}}]
  170. # input_message=dialogue_message(input_from_user_nickname,input_to_user_nickname,input_wx_content_dialogue_message)
  171. # kafka_helper.kafka_client.produce_message(input_message)
  172. # logger.info("发送对话 %s",input_message)
  173. # 先回应
  174. if 'previous_prompt' in self.params_cache[user_id] and 'last_content' in self.params_cache[user_id] and contains_keywords(self.params_cache[user_id]['previous_prompt']):
  175. logger.info('先回应')
  176. receiver=user_id
  177. print(receiver)
  178. text=self.params_cache[user_id]['previous_prompt']
  179. logger.info(f'{text},{contains_keywords(text)}')
  180. itchat_content= f'@{msg.actual_user_nickname}' if e_context['context']['isgroup'] else ''
  181. itchat_content+="已经收到,立刻为您服务"
  182. flag=contains_keywords(text)
  183. if flag==True:
  184. print('发送'+itchat_content)
  185. itchat.send(itchat_content, toUserName=receiver)
  186. e_context.action = EventAction.BREAK
  187. # 图片和提示次齐全
  188. if 'previous_prompt' in self.params_cache[user_id] and 'last_content' in self.params_cache[user_id]:
  189. if contains_keywords(self.params_cache[user_id]['previous_prompt']):
  190. e_context["context"].type = ContextType.TEXT
  191. last_content=self.params_cache[user_id]['last_content']
  192. prompt=self.params_cache[user_id]['previous_prompt']
  193. # if isinstance(last_content, list):
  194. # e_context["context"].content =self.generate_openai_messages_content(last_content,prompt)
  195. # elif isinstance(last_content, str):
  196. # e_context["context"].content ="<content>"+last_content+"</content>"+'\n\t'+"<ask>"+prompt+"</ask>"
  197. # else:
  198. # return "urls is neither a list nor a string"
  199. e_context["context"].content =self.generate_openai_messages_content(last_content,prompt)
  200. logger.info(f'插件处理上传文件或图片')
  201. e_context.action = EventAction.CONTINUE
  202. # 清空清空缓存
  203. self.params_cache.clear()
  204. logger.info(f'清空缓存后:{self.params_cache}')
  205. else:
  206. if not e_context['context']['isgroup']:
  207. reply = Reply()
  208. reply.type = ReplyType.TEXT
  209. # reply.content = f"{remove_markdown(reply_content)}\n\n💬5min内输入{self.file_sum_qa_prefix}+问题,可继续追问"
  210. reply.content = f"您刚刚上传了,请问我有什么可以帮您的呢?"
  211. e_context["reply"] = reply
  212. e_context.action = EventAction.BREAK
  213. return
  214. def generate_openai_messages_content(self, last_content,prompt):
  215. content = []
  216. if isinstance(last_content, list):
  217. # 遍历每个 URL,生成对应的消息结构
  218. for url in last_content:
  219. if url.endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
  220. # 对于图片,生成 "image_url" 类型的消息
  221. content.append({
  222. "type": "image_url",
  223. "image_url": {
  224. "url": url
  225. }
  226. })
  227. else:
  228. # 对于其他文件,生成 "file_url" 或类似的处理方式
  229. content.append({
  230. "type": "file_url",
  231. "file_url": {
  232. "url": url
  233. }
  234. })
  235. else:
  236. prompt="<content>"+last_content+"</content>"+'\n\t'+"<ask>"+prompt+"</ask>"
  237. # 遍历每个 URL,生成对应的消息结构
  238. # for url in urls:
  239. # if url.endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
  240. # # 对于图片,生成 "image_url" 类型的消息
  241. # content.append({
  242. # "type": "image_url",
  243. # "image_url": {
  244. # "url": url
  245. # }
  246. # })
  247. # else:
  248. # # 对于其他文件,生成 "file_url" 或类似的处理方式
  249. # content.append({
  250. # "type": "file_url",
  251. # "file_url": {
  252. # "url": url
  253. # }
  254. # })
  255. # 添加额外的文本说明
  256. content.append({
  257. "type": "text",
  258. "text": prompt
  259. })
  260. return json.dumps(content, ensure_ascii=False)
  261. def remove_markdown(text):
  262. # 替换Markdown的粗体标记
  263. text = text.replace("**", "")
  264. # 替换Markdown的标题标记
  265. text = text.replace("### ", "").replace("## ", "").replace("# ", "")
  266. return text
  267. def extract_content_by_llm(file_path: str, api_key: str) -> str:
  268. logger.info(f'大模型开始抽取文字')
  269. try:
  270. headers = {
  271. 'Authorization': f'Bearer {api_key}'
  272. }
  273. data = {
  274. 'purpose': 'file-extract',
  275. }
  276. file_name=os.path.basename(file_path)
  277. files = {
  278. 'file': (file_name, open(Path(file_path), 'rb')),
  279. }
  280. # print(files)
  281. api_url='https://api.moonshot.cn/v1/files'
  282. response = requests.post(api_url, headers=headers, files=files, data=data)
  283. response_data = response.json()
  284. file_id = response_data.get('id')
  285. response=requests.get(url=f"https://api.moonshot.cn/v1/files/{file_id}/content", headers=headers)
  286. print(response.text)
  287. response_data = response.json()
  288. content = response_data.get('content')
  289. return content
  290. except requests.exceptions.RequestException as e:
  291. logger.error(f"Error calling LLM API: {e}")
  292. return None
  293. def upload_oss(access_key_id, access_key_secret, endpoint, bucket_name, local_file_path, oss_file_name, expiration_days=7):
  294. """
  295. 上传文件到阿里云OSS并设置生命周期规则,同时返回文件的公共访问地址。
  296. :param access_key_id: 阿里云AccessKey ID
  297. :param access_key_secret: 阿里云AccessKey Secret
  298. :param endpoint: OSS区域对应的Endpoint
  299. :param bucket_name: OSS中的Bucket名称
  300. :param local_file_path: 本地文件路径
  301. :param oss_file_name: OSS中的文件存储路径
  302. :param expiration_days: 文件保存天数,默认7天后删除
  303. :return: 文件的公共访问地址
  304. """
  305. # 创建Bucket实例
  306. auth = oss2.Auth(access_key_id, access_key_secret)
  307. bucket = oss2.Bucket(auth, endpoint, bucket_name)
  308. ### 1. 设置生命周期规则 ###
  309. rule_id = f'delete_after_{expiration_days}_days' # 规则ID
  310. prefix = oss_file_name.split('/')[0] + '/' # 设置规则应用的前缀为文件所在目录
  311. # 定义生命周期规则
  312. rule = oss2.models.LifecycleRule(rule_id, prefix, status=oss2.models.LifecycleRule.ENABLED,
  313. expiration=oss2.models.LifecycleExpiration(days=expiration_days))
  314. # 设置Bucket的生命周期
  315. lifecycle = oss2.models.BucketLifecycle([rule])
  316. bucket.put_bucket_lifecycle(lifecycle)
  317. print(f"已设置生命周期规则:文件将在{expiration_days}天后自动删除")
  318. ### 2. 上传文件到OSS ###
  319. bucket.put_object_from_file(oss_file_name, local_file_path)
  320. ### 3. 构建公共访问URL ###
  321. file_url = f"http://{bucket_name}.{endpoint.replace('http://', '')}/{oss_file_name}"
  322. print(f"文件上传成功,公共访问地址:{file_url}")
  323. return file_url
  324. def contains_keywords_by_re(text):
  325. # 匹配<ask>标签中的内容
  326. # match = re.search(r'<ask>(.*?)</ask>', text)
  327. match = re.search(r'(.*?)', text)
  328. if match:
  329. content = match.group(1)
  330. # 检查关键词
  331. keywords = ['分析', '总结', '报告', '描述']
  332. for keyword in keywords:
  333. if keyword in content:
  334. return True
  335. return False
  336. def contains_keywords(text):
  337. keywords = ["分析", "总结", "报告", "描述","说说","讲述","讲讲","讲一下","图片"]
  338. return any(keyword in text for keyword in keywords)
  339. def dialogue_message(nickname_from,nickname_to,wx_content):
  340. """
  341. 构造消息的 JSON 数据
  342. :param contents: list,包含多个消息内容,每个内容为字典,如:
  343. [{"type": "text", "text": "AAAAAAA"},
  344. {"type": "image_url", "image_url": {"url": "https://AAAAA.jpg"}},
  345. {"type":"file","file_url":{"url":"https://AAAAA.pdf"}}
  346. ]
  347. :return: JSON 字符串
  348. """
  349. # 获取当前时间戳,精确到毫秒
  350. current_timestamp = int(time.time() * 1000)
  351. # 获取当前时间,格式化为 "YYYY-MM-DD HH:MM:SS"
  352. current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  353. # 构造 JSON 数据
  354. data = {
  355. "messageId": str(current_timestamp),
  356. "topic": "topic.aiops.wx",
  357. "time": current_time,
  358. "data": {
  359. "msg_type": "dialogue",
  360. "content": {
  361. "nickname_from": nickname_from,
  362. "nickname_to": nickname_to,
  363. "wx_content":wx_content
  364. }
  365. }
  366. }
  367. return json.dumps(data, separators=(',', ':'), ensure_ascii=False)