You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

397 lines
20KB

  1. import os
  2. import re
  3. import threading
  4. import time
  5. from asyncio import CancelledError
  6. from concurrent.futures import Future, ThreadPoolExecutor
  7. from bridge.context import *
  8. from bridge.reply import *
  9. from channel.channel import Channel
  10. from common.dequeue import Dequeue
  11. from common import memory
  12. from plugins import *
  13. try:
  14. from voice.audio_convert import any_to_wav
  15. except Exception as e:
  16. pass
  17. handler_pool = ThreadPoolExecutor(max_workers=8) # 处理消息的线程池
  18. # 抽象类, 它包含了与消息通道无关的通用处理逻辑
  19. class ChatChannel(Channel):
  20. name = None # 登录的用户名
  21. user_id = None # 登录的用户id
  22. futures = {} # 记录每个session_id提交到线程池的future对象, 用于重置会话时把没执行的future取消掉,正在执行的不会被取消
  23. sessions = {} # 用于控制并发,每个session_id同时只能有一个context在处理
  24. lock = threading.Lock() # 用于控制对sessions的访问
  25. def __init__(self):
  26. _thread = threading.Thread(target=self.consume)
  27. _thread.setDaemon(True)
  28. _thread.start()
  29. # 根据消息构造context,消息内容相关的触发项写在这里
  30. def _compose_context(self, ctype: ContextType, content, **kwargs):
  31. context = Context(ctype, content)
  32. context.kwargs = kwargs
  33. # context首次传入时,origin_ctype是None,
  34. # 引入的起因是:当输入语音时,会嵌套生成两个context,第一步语音转文本,第二步通过文本生成文字回复。
  35. # origin_ctype用于第二步文本回复时,判断是否需要匹配前缀,如果是私聊的语音,就不需要匹配前缀
  36. if "origin_ctype" not in context:
  37. context["origin_ctype"] = ctype
  38. # context首次传入时,receiver是None,根据类型设置receiver
  39. first_in = "receiver" not in context
  40. # 群名匹配过程,设置session_id和receiver
  41. if first_in: # context首次传入时,receiver是None,根据类型设置receiver
  42. config = conf()
  43. cmsg = context["msg"]
  44. user_data = conf().get_user_data(cmsg.from_user_id)
  45. context["openai_api_key"] = user_data.get("openai_api_key")
  46. context["gpt_model"] = user_data.get("gpt_model")
  47. if context.get("isgroup", False):
  48. group_name = cmsg.other_user_nickname
  49. group_id = cmsg.other_user_id
  50. group_name_white_list = config.get("group_name_white_list", [])
  51. group_name_keyword_white_list = config.get("group_name_keyword_white_list", [])
  52. if any(
  53. [
  54. group_name in group_name_white_list,
  55. "ALL_GROUP" in group_name_white_list,
  56. check_contain(group_name, group_name_keyword_white_list),
  57. ]
  58. ):
  59. group_chat_in_one_session = conf().get("group_chat_in_one_session", [])
  60. session_id = cmsg.actual_user_id
  61. if any(
  62. [
  63. group_name in group_chat_in_one_session,
  64. "ALL_GROUP" in group_chat_in_one_session,
  65. ]
  66. ):
  67. session_id = group_id
  68. else:
  69. logger.debug(f"No need reply, groupName not in whitelist, group_name={group_name}")
  70. return None
  71. context["session_id"] = session_id
  72. context["receiver"] = group_id
  73. else:
  74. context["session_id"] = cmsg.other_user_id
  75. context["receiver"] = cmsg.other_user_id
  76. e_context = PluginManager().emit_event(EventContext(Event.ON_RECEIVE_MESSAGE, {"channel": self, "context": context}))
  77. context = e_context["context"]
  78. if e_context.is_pass() or context is None:
  79. return context
  80. if cmsg.from_user_id == self.user_id and not config.get("trigger_by_self", True):
  81. logger.debug("[chat_channel]self message skipped")
  82. return None
  83. # 消息内容匹配过程,并处理content
  84. if ctype == ContextType.TEXT:
  85. if first_in and "」\n- - - - - - -" in content: # 初次匹配 过滤引用消息
  86. logger.debug(content)
  87. logger.debug("[chat_channel]reference query skipped")
  88. return None
  89. nick_name_black_list = conf().get("nick_name_black_list", [])
  90. if context.get("isgroup", False): # 群聊
  91. # 校验关键字
  92. match_prefix = check_prefix(content, conf().get("group_chat_prefix"))
  93. match_contain = check_contain(content, conf().get("group_chat_keyword"))
  94. flag = False
  95. if context["msg"].to_user_id != context["msg"].actual_user_id:
  96. if match_prefix is not None or match_contain is not None:
  97. flag = True
  98. if match_prefix:
  99. content = content.replace(match_prefix, "", 1).strip()
  100. if context["msg"].is_at:
  101. nick_name = context["msg"].actual_user_nickname
  102. if nick_name and nick_name in nick_name_black_list:
  103. # 黑名单过滤
  104. logger.warning(f"[chat_channel] Nickname {nick_name} in In BlackList, ignore")
  105. return None
  106. logger.info("[chat_channel]receive group at")
  107. if not conf().get("group_at_off", False):
  108. flag = True
  109. self.name = self.name if self.name is not None else "" # 部分渠道self.name可能没有赋值
  110. pattern = f"@{re.escape(self.name)}(\u2005|\u0020)"
  111. subtract_res = re.sub(pattern, r"", content)
  112. if isinstance(context["msg"].at_list, list):
  113. for at in context["msg"].at_list:
  114. pattern = f"@{re.escape(at)}(\u2005|\u0020)"
  115. subtract_res = re.sub(pattern, r"", subtract_res)
  116. if subtract_res == content and context["msg"].self_display_name:
  117. # 前缀移除后没有变化,使用群昵称再次移除
  118. pattern = f"@{re.escape(context['msg'].self_display_name)}(\u2005|\u0020)"
  119. subtract_res = re.sub(pattern, r"", content)
  120. content = subtract_res
  121. if not flag:
  122. if context["origin_ctype"] == ContextType.VOICE:
  123. logger.info("[chat_channel]receive group voice, but checkprefix didn't match")
  124. return None
  125. else: # 单聊
  126. nick_name = context["msg"].from_user_nickname
  127. if nick_name and nick_name in nick_name_black_list:
  128. # 黑名单过滤
  129. logger.warning(f"[chat_channel] Nickname '{nick_name}' in In BlackList, ignore")
  130. return None
  131. match_prefix = check_prefix(content, conf().get("single_chat_prefix", [""]))
  132. if match_prefix is not None: # 判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容
  133. content = content.replace(match_prefix, "", 1).strip()
  134. elif context["origin_ctype"] == ContextType.VOICE: # 如果源消息是私聊的语音消息,允许不匹配前缀,放宽条件
  135. pass
  136. else:
  137. return None
  138. content = content.strip()
  139. img_match_prefix = check_prefix(content, conf().get("image_create_prefix",[""]))
  140. if img_match_prefix:
  141. content = content.replace(img_match_prefix, "", 1)
  142. context.type = ContextType.IMAGE_CREATE
  143. else:
  144. context.type = ContextType.TEXT
  145. context.content = content.strip()
  146. if "desire_rtype" not in context and conf().get("always_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
  147. context["desire_rtype"] = ReplyType.VOICE
  148. elif context.type == ContextType.VOICE:
  149. if "desire_rtype" not in context and conf().get("voice_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
  150. context["desire_rtype"] = ReplyType.VOICE
  151. return context
  152. def _handle(self, context: Context):
  153. if context is None or not context.content:
  154. return
  155. logger.debug("[chat_channel] ready to handle context: {}".format(context))
  156. # reply的构建步骤
  157. reply = self._generate_reply(context)
  158. logger.debug("[chat_channel] ready to decorate reply: {}".format(reply))
  159. # reply的包装步骤
  160. if reply and reply.content:
  161. reply = self._decorate_reply(context, reply)
  162. # reply的发送步骤
  163. self._send_reply(context, reply)
  164. def _generate_reply(self, context: Context, reply: Reply = Reply()) -> Reply:
  165. e_context = PluginManager().emit_event(
  166. EventContext(
  167. Event.ON_HANDLE_CONTEXT,
  168. {"channel": self, "context": context, "reply": reply},
  169. )
  170. )
  171. reply = e_context["reply"]
  172. if not e_context.is_pass():
  173. logger.debug("[chat_channel] ready to handle context: type={}, content={}".format(context.type, context.content))
  174. if context.type == ContextType.TEXT or context.type == ContextType.IMAGE_CREATE: # 文字和图片消息
  175. context["channel"] = e_context["channel"]
  176. reply = super().build_reply_content(context.content, context)
  177. elif context.type == ContextType.VOICE: # 语音消息
  178. cmsg = context["msg"]
  179. cmsg.prepare()
  180. file_path = context.content
  181. wav_path = os.path.splitext(file_path)[0] + ".wav"
  182. try:
  183. any_to_wav(file_path, wav_path)
  184. except Exception as e: # 转换失败,直接使用mp3,对于某些api,mp3也可以识别
  185. logger.warning("[chat_channel]any to wav error, use raw path. " + str(e))
  186. wav_path = file_path
  187. # 语音识别
  188. reply = super().build_voice_to_text(wav_path)
  189. # 删除临时文件
  190. try:
  191. os.remove(file_path)
  192. if wav_path != file_path:
  193. os.remove(wav_path)
  194. except Exception as e:
  195. pass
  196. # logger.warning("[chat_channel]delete temp file error: " + str(e))
  197. if reply.type == ReplyType.TEXT:
  198. new_context = self._compose_context(ContextType.TEXT, reply.content, **context.kwargs)
  199. if new_context:
  200. reply = self._generate_reply(new_context)
  201. else:
  202. return
  203. elif context.type == ContextType.IMAGE: # 图片消息,当前仅做下载保存到本地的逻辑
  204. memory.USER_IMAGE_CACHE[context["session_id"]] = {
  205. "path": context.content,
  206. "msg": context.get("msg")
  207. }
  208. elif context.type == ContextType.SHARING: # 分享信息,当前无默认逻辑
  209. pass
  210. elif context.type == ContextType.FUNCTION or context.type == ContextType.FILE: # 文件消息及函数调用等,当前无默认逻辑
  211. pass
  212. else:
  213. logger.warning("[chat_channel] unknown context type: {}".format(context.type))
  214. return
  215. return reply
  216. def _decorate_reply(self, context: Context, reply: Reply) -> Reply:
  217. if reply and reply.type:
  218. e_context = PluginManager().emit_event(
  219. EventContext(
  220. Event.ON_DECORATE_REPLY,
  221. {"channel": self, "context": context, "reply": reply},
  222. )
  223. )
  224. reply = e_context["reply"]
  225. desire_rtype = context.get("desire_rtype")
  226. if not e_context.is_pass() and reply and reply.type:
  227. if reply.type in self.NOT_SUPPORT_REPLYTYPE:
  228. logger.error("[chat_channel]reply type not support: " + str(reply.type))
  229. reply.type = ReplyType.ERROR
  230. reply.content = "不支持发送的消息类型: " + str(reply.type)
  231. if reply.type == ReplyType.TEXT:
  232. reply_text = reply.content
  233. if desire_rtype == ReplyType.VOICE and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
  234. reply = super().build_text_to_voice(reply.content)
  235. return self._decorate_reply(context, reply)
  236. if context.get("isgroup", False):
  237. if not context.get("no_need_at", False):
  238. reply_text = "@" + context["msg"].actual_user_nickname + "\n" + reply_text.strip()
  239. reply_text = conf().get("group_chat_reply_prefix", "") + reply_text + conf().get("group_chat_reply_suffix", "")
  240. else:
  241. reply_text = conf().get("single_chat_reply_prefix", "") + reply_text + conf().get("single_chat_reply_suffix", "")
  242. reply.content = reply_text
  243. elif reply.type == ReplyType.ERROR or reply.type == ReplyType.INFO:
  244. reply.content = "[" + str(reply.type) + "]\n" + reply.content
  245. elif reply.type == ReplyType.IMAGE_URL or reply.type == ReplyType.VOICE or reply.type == ReplyType.IMAGE or reply.type == ReplyType.FILE or reply.type == ReplyType.VIDEO or reply.type == ReplyType.VIDEO_URL:
  246. pass
  247. else:
  248. logger.error("[chat_channel] unknown reply type: {}".format(reply.type))
  249. return
  250. if desire_rtype and desire_rtype != reply.type and reply.type not in [ReplyType.ERROR, ReplyType.INFO]:
  251. logger.warning("[chat_channel] desire_rtype: {}, but reply type: {}".format(context.get("desire_rtype"), reply.type))
  252. return reply
  253. def _send_reply(self, context: Context, reply: Reply):
  254. if reply and reply.type:
  255. e_context = PluginManager().emit_event(
  256. EventContext(
  257. Event.ON_SEND_REPLY,
  258. {"channel": self, "context": context, "reply": reply},
  259. )
  260. )
  261. reply = e_context["reply"]
  262. if not e_context.is_pass() and reply and reply.type:
  263. logger.debug("[chat_channel] ready to send reply: {}, context: {}".format(reply, context))
  264. self._send(reply, context)
  265. def _send(self, reply: Reply, context: Context, retry_cnt=0):
  266. try:
  267. self.send(reply, context)
  268. except Exception as e:
  269. logger.error("[chat_channel] sendMsg error: {}".format(str(e)))
  270. if isinstance(e, NotImplementedError):
  271. return
  272. logger.exception(e)
  273. if retry_cnt < 2:
  274. time.sleep(3 + 3 * retry_cnt)
  275. self._send(reply, context, retry_cnt + 1)
  276. def _success_callback(self, session_id, **kwargs): # 线程正常结束时的回调函数
  277. logger.debug("Worker return success, session_id = {}".format(session_id))
  278. def _fail_callback(self, session_id, exception, **kwargs): # 线程异常结束时的回调函数
  279. logger.exception("Worker return exception: {}".format(exception))
  280. def _thread_pool_callback(self, session_id, **kwargs):
  281. def func(worker: Future):
  282. try:
  283. worker_exception = worker.exception()
  284. if worker_exception:
  285. self._fail_callback(session_id, exception=worker_exception, **kwargs)
  286. else:
  287. self._success_callback(session_id, **kwargs)
  288. except CancelledError as e:
  289. logger.info("Worker cancelled, session_id = {}".format(session_id))
  290. except Exception as e:
  291. logger.exception("Worker raise exception: {}".format(e))
  292. with self.lock:
  293. self.sessions[session_id][1].release()
  294. return func
  295. def produce(self, context: Context):
  296. session_id = context["session_id"]
  297. with self.lock:
  298. if session_id not in self.sessions:
  299. self.sessions[session_id] = [
  300. Dequeue(),
  301. threading.BoundedSemaphore(conf().get("concurrency_in_session", 4)),
  302. ]
  303. if context.type == ContextType.TEXT and context.content.startswith("#"):
  304. self.sessions[session_id][0].putleft(context) # 优先处理管理命令
  305. else:
  306. self.sessions[session_id][0].put(context)
  307. # 消费者函数,单独线程,用于从消息队列中取出消息并处理
  308. def consume(self):
  309. while True:
  310. with self.lock:
  311. session_ids = list(self.sessions.keys())
  312. for session_id in session_ids:
  313. context_queue, semaphore = self.sessions[session_id]
  314. if semaphore.acquire(blocking=False): # 等线程处理完毕才能删除
  315. if not context_queue.empty():
  316. context = context_queue.get()
  317. logger.debug("[chat_channel] consume context: {}".format(context))
  318. future: Future = handler_pool.submit(self._handle, context)
  319. future.add_done_callback(self._thread_pool_callback(session_id, context=context))
  320. if session_id not in self.futures:
  321. self.futures[session_id] = []
  322. self.futures[session_id].append(future)
  323. elif semaphore._initial_value == semaphore._value + 1: # 除了当前,没有任务再申请到信号量,说明所有任务都处理完毕
  324. self.futures[session_id] = [t for t in self.futures[session_id] if not t.done()]
  325. assert len(self.futures[session_id]) == 0, "thread pool error"
  326. del self.sessions[session_id]
  327. else:
  328. semaphore.release()
  329. time.sleep(0.1)
  330. # 取消session_id对应的所有任务,只能取消排队的消息和已提交线程池但未执行的任务
  331. def cancel_session(self, session_id):
  332. with self.lock:
  333. if session_id in self.sessions:
  334. for future in self.futures[session_id]:
  335. future.cancel()
  336. cnt = self.sessions[session_id][0].qsize()
  337. if cnt > 0:
  338. logger.info("Cancel {} messages in session {}".format(cnt, session_id))
  339. self.sessions[session_id][0] = Dequeue()
  340. def cancel_all_session(self):
  341. with self.lock:
  342. for session_id in self.sessions:
  343. for future in self.futures[session_id]:
  344. future.cancel()
  345. cnt = self.sessions[session_id][0].qsize()
  346. if cnt > 0:
  347. logger.info("Cancel {} messages in session {}".format(cnt, session_id))
  348. self.sessions[session_id][0] = Dequeue()
  349. def check_prefix(content, prefix_list):
  350. if not prefix_list:
  351. return None
  352. for prefix in prefix_list:
  353. if content.startswith(prefix):
  354. return prefix
  355. return None
  356. def check_contain(content, keyword_list):
  357. if not keyword_list:
  358. return None
  359. for ky in keyword_list:
  360. if content.find(ky) != -1:
  361. return True
  362. return None