You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

healthai.txt 18KB

2 weeks ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. import requests
  2. import json
  3. import plugins
  4. from bridge.reply import Reply, ReplyType
  5. from bridge.context import ContextType
  6. from channel.chat_message import ChatMessage
  7. from plugins import *
  8. from common.log import logger
  9. from common.expired_dict import ExpiredDict
  10. import os
  11. import base64
  12. from pathlib import Path
  13. from PIL import Image
  14. import oss2
  15. from lib import itchat
  16. from lib.itchat.content import *
  17. import re
  18. @plugins.register(
  19. name="healthai",
  20. desire_priority=-1,
  21. desc="A plugin for upload",
  22. version="0.0.01",
  23. author="",
  24. )
  25. class healthai(Plugin):
  26. def __init__(self):
  27. super().__init__()
  28. try:
  29. curdir = os.path.dirname(__file__)
  30. config_path = os.path.join(curdir, "config.json")
  31. if os.path.exists(config_path):
  32. with open(config_path, "r", encoding="utf-8") as f:
  33. self.config = json.load(f)
  34. else:
  35. # 使用父类的方法来加载配置
  36. self.config = super().load_config()
  37. if not self.config:
  38. raise Exception("config.json not found")
  39. # 设置事件处理函数
  40. self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
  41. self.params_cache = ExpiredDict(300)
  42. # 从配置中提取所需的设置
  43. self.oss = self.config.get("oss", {})
  44. self.oss_access_key_id=self.oss.get("access_key_id","LTAI5tRTG6pLhTpKACJYoPR5")
  45. self.oss_access_key_secret=self.oss.get("access_key_secret","E7dMzeeMxq4VQvLg7Tq7uKf3XWpYfN")
  46. self.oss_endpoint=self.oss.get("endpoint","http://oss-cn-shanghai.aliyuncs.com")
  47. self.oss_bucket_name=self.oss.get("bucket_name","cow-agent")
  48. # 之前提示
  49. self.previous_prompt=''
  50. # 初始化成功日志
  51. logger.info("[file4upload] inited.")
  52. except Exception as e:
  53. # 初始化失败日志
  54. logger.warn(f"file4upload init failed: {e}")
  55. def on_handle_context(self, e_context: EventContext):
  56. context = e_context["context"]
  57. if context.type not in [ContextType.TEXT, ContextType.SHARING,ContextType.FILE,ContextType.IMAGE]:
  58. return
  59. msg: ChatMessage = e_context["context"]["msg"]
  60. user_id = msg.from_user_id
  61. content = context.content
  62. isgroup = e_context["context"].get("isgroup", False)
  63. context.get("msg").prepare()
  64. logger.info(f'当前缓存:self.params_cache:{self.params_cache}')
  65. if user_id not in self.params_cache:
  66. self.params_cache[user_id] = {}
  67. logger.info(f'初始化缓存:{self.params_cache}')
  68. if context.type == ContextType.TEXT and user_id in self.params_cache and contains_keywords(content):
  69. self.params_cache[user_id]['previous_prompt']=msg.content
  70. logger.info(f'上次提示缓存:{self.params_cache}')
  71. # if context.type in [ContextType.FILE,ContextType.IMAGE]:
  72. # logger.info('处理上传')
  73. # file_path = context.content
  74. # logger.info(f"on_handle_context: 获取到图片路径 {file_path},{user_id in self.params_cache}")
  75. # if user_id in self.params_cache:
  76. # if 'previous_prompt' not in self.params_cache[user_id] and not e_context['context']['isgroup']:
  77. # reply = Reply()
  78. # reply.type = ReplyType.TEXT
  79. # if context.type==ContextType.FILE:
  80. # reply.content = f"您刚刚上传文件,请问我有什么可以帮您的呢?"
  81. # else:
  82. # reply.content = f"您刚刚上传图片,请问我有什么可以帮您的呢?"
  83. # e_context["reply"] = reply
  84. # e_context.action = EventAction.BREAK
  85. # file_content = upload_oss(self.oss_access_key_id, self.oss_access_key_secret, self.oss_endpoint, self.oss_bucket_name, file_path, f'cow/{os.path.basename(file_path)}')
  86. # # 确保 'urls' 键存在,并且是一个列表
  87. # if 'urls' not in self.params_cache[user_id]:
  88. # self.params_cache[user_id]['urls'] = []
  89. # # 添加文件内容到 'urls' 列表
  90. # self.params_cache[user_id]['urls'].append(file_content)
  91. # logger.info('删除图片')
  92. # os.remove(file_path)
  93. if context.type in [ContextType.IMAGE]:
  94. logger.info('处理上传')
  95. file_path = context.content
  96. logger.info(f"on_handle_context: 获取到图片路径 {file_path},{user_id in self.params_cache}")
  97. if user_id in self.params_cache:
  98. if 'previous_prompt' not in self.params_cache[user_id] and not e_context['context']['isgroup']:
  99. reply = Reply()
  100. reply.type = ReplyType.TEXT
  101. reply.content = f"您刚刚上传图片,请问我有什么可以帮您的呢?"
  102. e_context["reply"] = reply
  103. e_context.action = EventAction.BREAK
  104. file_content = upload_oss(self.oss_access_key_id, self.oss_access_key_secret, self.oss_endpoint, self.oss_bucket_name, file_path, f'cow/{os.path.basename(file_path)}')
  105. # 确保 'last_content' 键存在,并且是一个列表
  106. if 'last_content' not in self.params_cache[user_id]:
  107. self.params_cache[user_id]['last_content'] = []
  108. # 添加文件内容到 'urls' 列表
  109. self.params_cache[user_id]['last_content'].append(file_content)
  110. logger.info('删除图片')
  111. os.remove(file_path)
  112. if context.type == ContextType.FILE:
  113. logger.info('处理图片')
  114. file_path = context.content
  115. logger.info(f"on_handle_context: 获取到文件路径 {file_path}")
  116. if user_id in self.params_cache:
  117. if 'previous_prompt' not in self.params_cache[user_id] and not e_context['context']['isgroup']:
  118. reply = Reply()
  119. reply.type = ReplyType.TEXT
  120. reply.content = f"您刚刚上传了一份文件,请问我有什么可以帮您的呢?"
  121. e_context["reply"] = reply
  122. e_context.action = EventAction.BREAK
  123. # else:
  124. print(f'准备抽取文字')
  125. file_content=extract_content_by_llm(file_path,"sk-5z2L4zy9T1w90j6e3T90ANZdyN2zLWClRwFnBzWgzdrG4onx")
  126. if file_content is None:
  127. reply = Reply()
  128. reply.type = ReplyType.TEXT
  129. reply.content = f"不能处理这份文件"
  130. e_context["reply"] = reply
  131. e_context.action = EventAction.BREAK
  132. return
  133. else:
  134. self.params_cache[user_id]['last_content']=file_content
  135. logger.info('删除文件')
  136. os.remove(file_path)
  137. if 'previous_prompt' in self.params_cache[user_id] and 'last_content' in self.params_cache[user_id] and contains_keywords(self.params_cache[user_id]['previous_prompt']):
  138. logger.info('先回应')
  139. receiver=user_id
  140. print(receiver)
  141. text=self.params_cache[user_id]['previous_prompt']
  142. logger.info(f'{text},{contains_keywords(text)}')
  143. itchat_content= f'@{msg.actual_user_nickname}' if e_context['context']['isgroup'] else '[小蕴]'
  144. itchat_content+="已经收到,立刻为您服务"
  145. flag=contains_keywords(text)
  146. if flag==True:
  147. print('发送'+itchat_content)
  148. itchat.send(itchat_content, toUserName=receiver)
  149. e_context.action = EventAction.BREAK
  150. if 'previous_prompt' in self.params_cache[user_id] and 'last_content' in self.params_cache[user_id]:
  151. if contains_keywords(self.params_cache[user_id]['previous_prompt']):
  152. e_context["context"].type = ContextType.TEXT
  153. last_content=self.params_cache[user_id]['last_content']
  154. prompt=self.params_cache[user_id]['previous_prompt']
  155. # if isinstance(last_content, list):
  156. # e_context["context"].content =self.generate_openai_messages_content(last_content,prompt)
  157. # elif isinstance(last_content, str):
  158. # e_context["context"].content ="<content>"+last_content+"</content>"+'\n\t'+"<ask>"+prompt+"</ask>"
  159. # else:
  160. # return "urls is neither a list nor a string"
  161. e_context["context"].content =self.generate_openai_messages_content(last_content,prompt)
  162. logger.info(f'插件处理上传文件或图片')
  163. e_context.action = EventAction.CONTINUE
  164. # 清空清空缓存
  165. self.params_cache.clear()
  166. logger.info(f'清空缓存后:{self.params_cache}')
  167. else:
  168. if not e_context['context']['isgroup']:
  169. reply = Reply()
  170. reply.type = ReplyType.TEXT
  171. # reply.content = f"{remove_markdown(reply_content)}\n\n💬5min内输入{self.file_sum_qa_prefix}+问题,可继续追问"
  172. reply.content = f"您刚刚上传了,请问我有什么可以帮您的呢?"
  173. e_context["reply"] = reply
  174. e_context.action = EventAction.BREAK
  175. return
  176. def on_handle_context2(self, e_context: EventContext):
  177. context = e_context["context"]
  178. # 检查 context 类型
  179. if context.type not in {ContextType.TEXT, ContextType.SHARING, ContextType.FILE, ContextType.IMAGE}:
  180. return
  181. msg: ChatMessage = context["msg"]
  182. user_id = msg.from_user_id
  183. content = context.content
  184. is_group = context.get("isgroup", False)
  185. # 准备消息
  186. context.get("msg").prepare()
  187. logger.info(f'当前缓存:self.params_cache:{self.params_cache}')
  188. # 初始化用户缓存
  189. user_cache = self.params_cache.setdefault(user_id, {})
  190. if not user_cache:
  191. logger.info(f'初始化缓存:{self.params_cache}')
  192. previous_prompt = user_cache.get('previous_prompt')
  193. last_content = user_cache.get('last_content')
  194. # 更新 previous_prompt
  195. if context.type == ContextType.TEXT and previous_prompt and contains_keywords(previous_prompt):
  196. user_cache['previous_prompt'] = msg.content
  197. # 处理 previous_prompt 和 last_content
  198. if previous_prompt and last_content and contains_keywords(previous_prompt):
  199. logger.info('先回应')
  200. receiver = user_id
  201. itchat_content = f'@{msg.actual_user_nickname}' if is_group else '[小蕴]'
  202. itchat_content += "已经收到,立刻为您服务"
  203. if contains_keywords(previous_prompt):
  204. logger.info(f'发送消息: {itchat_content}')
  205. itchat.send(itchat_content, toUserName=receiver)
  206. e_context.action = EventAction.BREAK
  207. # 清空缓存
  208. self.params_cache.clear()
  209. logger.info(f'清空缓存后:{self.params_cache}')
  210. else:
  211. if not is_group:
  212. reply = Reply()
  213. reply.type = ReplyType.TEXT
  214. reply.content = "您刚刚上传了,请问我有什么可以帮您的呢?"
  215. e_context["reply"] = reply
  216. e_context.action = EventAction.BREAK
  217. if context.type in [ContextType.FILE,ContextType.IMAGE]:
  218. logger.info('处理上传')
  219. file_path = context.content
  220. logger.info(f"on_handle_context: 获取到图片路径 {file_path},{user_id in self.params_cache}")
  221. if user_id in self.params_cache:
  222. if 'previous_prompt' not in self.params_cache[user_id] and not e_context['context']['isgroup']:
  223. reply = Reply()
  224. reply.type = ReplyType.TEXT
  225. if context.type==ContextType.FILE:
  226. reply.content = f"您刚刚上传文件,请问我有什么可以帮您的呢?"
  227. else:
  228. reply.content = f"您刚刚上传图片,请问我有什么可以帮您的呢?"
  229. e_context["reply"] = reply
  230. e_context.action = EventAction.BREAK
  231. file_content = upload_oss(self.oss_access_key_id, self.oss_access_key_secret, self.oss_endpoint, self.oss_bucket_name, file_path, f'cow/{os.path.basename(file_path)}')
  232. # 确保 'urls' 键存在,并且是一个列表
  233. if 'urls' not in self.params_cache[user_id]:
  234. self.params_cache[user_id]['urls'] = []
  235. # 添加文件内容到 'urls' 列表
  236. self.params_cache[user_id]['urls'].append(file_content)
  237. logger.info('删除图片')
  238. os.remove(file_path)
  239. def generate_openai_messages_content(self, last_content,prompt):
  240. content = []
  241. if isinstance(last_content, list):
  242. # 遍历每个 URL,生成对应的消息结构
  243. for url in last_content:
  244. if url.endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
  245. # 对于图片,生成 "image_url" 类型的消息
  246. content.append({
  247. "type": "image_url",
  248. "image_url": {
  249. "url": url
  250. }
  251. })
  252. else:
  253. # 对于其他文件,生成 "file_url" 或类似的处理方式
  254. content.append({
  255. "type": "file_url",
  256. "file_url": {
  257. "url": url
  258. }
  259. })
  260. else:
  261. prompt="<content>"+last_content+"</content>"+'\n\t'+"<ask>"+prompt+"</ask>"
  262. # 遍历每个 URL,生成对应的消息结构
  263. # for url in urls:
  264. # if url.endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
  265. # # 对于图片,生成 "image_url" 类型的消息
  266. # content.append({
  267. # "type": "image_url",
  268. # "image_url": {
  269. # "url": url
  270. # }
  271. # })
  272. # else:
  273. # # 对于其他文件,生成 "file_url" 或类似的处理方式
  274. # content.append({
  275. # "type": "file_url",
  276. # "file_url": {
  277. # "url": url
  278. # }
  279. # })
  280. # 添加额外的文本说明
  281. content.append({
  282. "type": "text",
  283. "text": prompt
  284. })
  285. return json.dumps(content, ensure_ascii=False)
  286. def remove_markdown(text):
  287. # 替换Markdown的粗体标记
  288. text = text.replace("**", "")
  289. # 替换Markdown的标题标记
  290. text = text.replace("### ", "").replace("## ", "").replace("# ", "")
  291. return text
  292. def extract_content_by_llm(file_path: str, api_key: str) -> str:
  293. logger.info(f'大模型开始抽取文字')
  294. try:
  295. headers = {
  296. 'Authorization': f'Bearer {api_key}'
  297. }
  298. data = {
  299. 'purpose': 'file-extract',
  300. }
  301. file_name=os.path.basename(file_path)
  302. files = {
  303. 'file': (file_name, open(Path(file_path), 'rb')),
  304. }
  305. # print(files)
  306. api_url='https://api.moonshot.cn/v1/files'
  307. response = requests.post(api_url, headers=headers, files=files, data=data)
  308. response_data = response.json()
  309. file_id = response_data.get('id')
  310. response=requests.get(url=f"https://api.moonshot.cn/v1/files/{file_id}/content", headers=headers)
  311. print(response.text)
  312. response_data = response.json()
  313. content = response_data.get('content')
  314. return content
  315. except requests.exceptions.RequestException as e:
  316. logger.error(f"Error calling LLM API: {e}")
  317. return None
  318. def upload_oss(access_key_id, access_key_secret, endpoint, bucket_name, local_file_path, oss_file_name, expiration_days=7):
  319. """
  320. 上传文件到阿里云OSS并设置生命周期规则,同时返回文件的公共访问地址。
  321. :param access_key_id: 阿里云AccessKey ID
  322. :param access_key_secret: 阿里云AccessKey Secret
  323. :param endpoint: OSS区域对应的Endpoint
  324. :param bucket_name: OSS中的Bucket名称
  325. :param local_file_path: 本地文件路径
  326. :param oss_file_name: OSS中的文件存储路径
  327. :param expiration_days: 文件保存天数,默认7天后删除
  328. :return: 文件的公共访问地址
  329. """
  330. # 创建Bucket实例
  331. auth = oss2.Auth(access_key_id, access_key_secret)
  332. bucket = oss2.Bucket(auth, endpoint, bucket_name)
  333. ### 1. 设置生命周期规则 ###
  334. rule_id = f'delete_after_{expiration_days}_days' # 规则ID
  335. prefix = oss_file_name.split('/')[0] + '/' # 设置规则应用的前缀为文件所在目录
  336. # 定义生命周期规则
  337. rule = oss2.models.LifecycleRule(rule_id, prefix, status=oss2.models.LifecycleRule.ENABLED,
  338. expiration=oss2.models.LifecycleExpiration(days=expiration_days))
  339. # 设置Bucket的生命周期
  340. lifecycle = oss2.models.BucketLifecycle([rule])
  341. bucket.put_bucket_lifecycle(lifecycle)
  342. print(f"已设置生命周期规则:文件将在{expiration_days}天后自动删除")
  343. ### 2. 上传文件到OSS ###
  344. bucket.put_object_from_file(oss_file_name, local_file_path)
  345. ### 3. 构建公共访问URL ###
  346. file_url = f"http://{bucket_name}.{endpoint.replace('http://', '')}/{oss_file_name}"
  347. print(f"文件上传成功,公共访问地址:{file_url}")
  348. return file_url
  349. def contains_keywords_by_re(text):
  350. # 匹配<ask>标签中的内容
  351. # match = re.search(r'<ask>(.*?)</ask>', text)
  352. match = re.search(r'(.*?)', text)
  353. if match:
  354. content = match.group(1)
  355. # 检查关键词
  356. keywords = ['分析', '总结', '报告', '描述']
  357. for keyword in keywords:
  358. if keyword in content:
  359. return True
  360. return False
  361. def contains_keywords(text):
  362. keywords = ["分析", "总结", "报告", "描述","说说","讲述","讲讲","讲一下"]
  363. return any(keyword in text for keyword in keywords)