You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

chat_channel.py 24KB

1 year ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. import os
  2. import re
  3. import threading
  4. import time
  5. from asyncio import CancelledError
  6. from concurrent.futures import Future, ThreadPoolExecutor
  7. from bridge.context import *
  8. from bridge.reply import *
  9. from channel.channel import Channel
  10. from common.dequeue import Dequeue
  11. from common import memory
  12. from plugins import *
  13. try:
  14. from voice.audio_convert import any_to_wav
  15. except Exception as e:
  16. pass
  17. handler_pool = ThreadPoolExecutor(max_workers=8) # 处理消息的线程池
  18. # 抽象类, 它包含了与消息通道无关的通用处理逻辑
  19. class ChatChannel(Channel):
  20. name = None # 登录的用户名
  21. user_id = None # 登录的用户id
  22. futures = {} # 记录每个session_id提交到线程池的future对象, 用于重置会话时把没执行的future取消掉,正在执行的不会被取消
  23. sessions = {} # 用于控制并发,每个session_id同时只能有一个context在处理
  24. lock = threading.Lock() # 用于控制对sessions的访问
  25. def __init__(self):
  26. _thread = threading.Thread(target=self.consume)
  27. _thread.setDaemon(True)
  28. _thread.start()
  29. # 根据消息构造context,消息内容相关的触发项写在这里
  30. def _compose_context(self, ctype: ContextType, content, **kwargs):
  31. context = Context(ctype, content)
  32. context.kwargs = kwargs
  33. # context首次传入时,origin_ctype是None,
  34. # 引入的起因是:当输入语音时,会嵌套生成两个context,第一步语音转文本,第二步通过文本生成文字回复。
  35. # origin_ctype用于第二步文本回复时,判断是否需要匹配前缀,如果是私聊的语音,就不需要匹配前缀
  36. if "origin_ctype" not in context:
  37. context["origin_ctype"] = ctype
  38. # context首次传入时,receiver是None,根据类型设置receiver
  39. first_in = "receiver" not in context
  40. # 群名匹配过程,设置session_id和receiver
  41. if first_in: # context首次传入时,receiver是None,根据类型设置receiver
  42. config = conf()
  43. cmsg = context["msg"]
  44. user_data = conf().get_user_data(cmsg.from_user_id)
  45. context["openai_api_key"] = user_data.get("openai_api_key")
  46. context["gpt_model"] = user_data.get("gpt_model")
  47. if context.get("isgroup", False):
  48. group_name = cmsg.other_user_nickname
  49. group_id = cmsg.other_user_id
  50. group_name_white_list = config.get("group_name_white_list", [])
  51. group_name_keyword_white_list = config.get("group_name_keyword_white_list", [])
  52. if any(
  53. [
  54. group_name in group_name_white_list,
  55. "ALL_GROUP" in group_name_white_list,
  56. check_contain(group_name, group_name_keyword_white_list),
  57. ]
  58. ):
  59. group_chat_in_one_session = conf().get("group_chat_in_one_session", [])
  60. session_id = cmsg.actual_user_id
  61. if any(
  62. [
  63. group_name in group_chat_in_one_session,
  64. "ALL_GROUP" in group_chat_in_one_session,
  65. ]
  66. ):
  67. session_id = group_id
  68. else:
  69. logger.debug(f"No need reply, groupName not in whitelist, group_name={group_name}")
  70. return None
  71. context["session_id"] = session_id
  72. context["receiver"] = group_id
  73. else:
  74. context["session_id"] = cmsg.other_user_id
  75. context["receiver"] = cmsg.other_user_id
  76. e_context = PluginManager().emit_event(EventContext(Event.ON_RECEIVE_MESSAGE, {"channel": self, "context": context}))
  77. context = e_context["context"]
  78. if e_context.is_pass() or context is None:
  79. return context
  80. if cmsg.from_user_id == self.user_id and not config.get("trigger_by_self", True):
  81. logger.debug("[chat_channel]self message skipped")
  82. return None
  83. # 消息内容匹配过程,并处理content
  84. if ctype == ContextType.TEXT:
  85. if first_in and "」\n- - - - - - -" in content: # 初次匹配 过滤引用消息
  86. logger.debug(content)
  87. logger.debug("[chat_channel]reference query skipped")
  88. return None
  89. nick_name_black_list = conf().get("nick_name_black_list", [])
  90. if context.get("isgroup", False): # 群聊
  91. # 校验关键字
  92. match_prefix = check_prefix(content, conf().get("group_chat_prefix",[""]))
  93. match_contain = check_contain(content, conf().get("group_chat_keyword",[""]))
  94. flag = False
  95. if context["msg"].to_user_id != context["msg"].actual_user_id:
  96. if match_prefix is not None or match_contain is not None:
  97. flag = True
  98. if match_prefix:
  99. content = content.replace(match_prefix, "", 1).strip()
  100. if context["msg"].is_at:
  101. nick_name = context["msg"].actual_user_nickname
  102. if nick_name and nick_name in nick_name_black_list:
  103. # 黑名单过滤
  104. logger.warning(f"[chat_channel] Nickname {nick_name} in In BlackList, ignore")
  105. return None
  106. logger.info("[chat_channel]receive group at")
  107. if not conf().get("group_at_off", False):
  108. flag = True
  109. pattern = f"@{re.escape(self.name)}(\u2005|\u0020)"
  110. subtract_res = re.sub(pattern, r"", content)
  111. if isinstance(context["msg"].at_list, list):
  112. for at in context["msg"].at_list:
  113. pattern = f"@{re.escape(at)}(\u2005|\u0020)"
  114. subtract_res = re.sub(pattern, r"", subtract_res)
  115. if subtract_res == content and context["msg"].self_display_name:
  116. # 前缀移除后没有变化,使用群昵称再次移除
  117. pattern = f"@{re.escape(context['msg'].self_display_name)}(\u2005|\u0020)"
  118. subtract_res = re.sub(pattern, r"", content)
  119. content = subtract_res
  120. if not flag:
  121. if context["origin_ctype"] == ContextType.VOICE:
  122. logger.info("[chat_channel]receive group voice, but checkprefix didn't match")
  123. return None
  124. else: # 单聊
  125. nick_name = context["msg"].from_user_nickname
  126. if nick_name and nick_name in nick_name_black_list:
  127. # 黑名单过滤
  128. logger.warning(f"[chat_channel] Nickname '{nick_name}' in In BlackList, ignore")
  129. return None
  130. match_prefix = check_prefix(content, conf().get("single_chat_prefix", [""]))
  131. if match_prefix is not None: # 判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容
  132. content = content.replace(match_prefix, "", 1).strip()
  133. elif context["origin_ctype"] == ContextType.VOICE: # 如果源消息是私聊的语音消息,允许不匹配前缀,放宽条件
  134. pass
  135. else:
  136. return None
  137. content = content.strip()
  138. img_match_prefix = check_prefix(content, conf().get("image_create_prefix",[""]))
  139. if img_match_prefix:
  140. content = content.replace(img_match_prefix, "", 1)
  141. context.type = ContextType.IMAGE_CREATE
  142. else:
  143. context.type = ContextType.TEXT
  144. context.content = content.strip()
  145. if "desire_rtype" not in context and conf().get("always_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
  146. context["desire_rtype"] = ReplyType.VOICE
  147. elif context.type == ContextType.VOICE:
  148. if "desire_rtype" not in context and conf().get("voice_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
  149. context["desire_rtype"] = ReplyType.VOICE
  150. elif context.type == ContextType.IMAGE:
  151. if first_in and "」\n- - - - - - -" in content: # 初次匹配 过滤引用消息
  152. logger.debug(content)
  153. logger.debug("[chat_channel]reference query skipped")
  154. return None
  155. nick_name_black_list = conf().get("nick_name_black_list", [])
  156. if context.get("isgroup", False): # 群聊
  157. # 校验关键字
  158. match_prefix = check_prefix(content, conf().get("group_chat_prefix",[""]))
  159. match_contain = check_contain(content, conf().get("group_chat_keyword",[""]))
  160. flag = False
  161. if context["msg"].to_user_id != context["msg"].actual_user_id:
  162. if match_prefix is not None or match_contain is not None:
  163. flag = True
  164. if match_prefix:
  165. content = content.replace(match_prefix, "", 1).strip()
  166. if context["msg"].is_at:
  167. nick_name = context["msg"].actual_user_nickname
  168. if nick_name and nick_name in nick_name_black_list:
  169. # 黑名单过滤
  170. logger.warning(f"[chat_channel] Nickname {nick_name} in In BlackList, ignore")
  171. return None
  172. logger.info("[chat_channel]receive group at")
  173. if not conf().get("group_at_off", False):
  174. flag = True
  175. pattern = f"@{re.escape(self.name)}(\u2005|\u0020)"
  176. subtract_res = re.sub(pattern, r"", content)
  177. if isinstance(context["msg"].at_list, list):
  178. for at in context["msg"].at_list:
  179. pattern = f"@{re.escape(at)}(\u2005|\u0020)"
  180. subtract_res = re.sub(pattern, r"", subtract_res)
  181. if subtract_res == content and context["msg"].self_display_name:
  182. # 前缀移除后没有变化,使用群昵称再次移除
  183. pattern = f"@{re.escape(context['msg'].self_display_name)}(\u2005|\u0020)"
  184. subtract_res = re.sub(pattern, r"", content)
  185. content = subtract_res
  186. if not flag:
  187. if context["origin_ctype"] == ContextType.VOICE:
  188. logger.info("[chat_channel]receive group voice, but checkprefix didn't match")
  189. return None
  190. else: # 单聊
  191. nick_name = context["msg"].from_user_nickname
  192. if nick_name and nick_name in nick_name_black_list:
  193. # 黑名单过滤
  194. logger.warning(f"[chat_channel] Nickname '{nick_name}' in In BlackList, ignore")
  195. return None
  196. match_prefix = check_prefix(content, conf().get("single_chat_prefix", [""]))
  197. if match_prefix is not None: # 判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容
  198. content = content.replace(match_prefix, "", 1).strip()
  199. elif context["origin_ctype"] == ContextType.VOICE: # 如果源消息是私聊的语音消息,允许不匹配前缀,放宽条件
  200. pass
  201. else:
  202. return None
  203. content = content.strip()
  204. img_match_prefix = check_prefix(content, conf().get("image_create_prefix",[""]))
  205. if img_match_prefix:
  206. content = content.replace(img_match_prefix, "", 1)
  207. context.type = ContextType.IMAGE_CREATE
  208. else:
  209. context.type = ContextType.IMAGE
  210. context.content = content.strip()
  211. if "desire_rtype" not in context and conf().get("always_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
  212. context["desire_rtype"] = ReplyType.VOICE
  213. return context
  214. def _handle(self, context: Context):
  215. if context is None or not context.content:
  216. return
  217. logger.debug("[chat_channel] ready to handle context: {}".format(context))
  218. # reply的构建步骤
  219. reply = self._generate_reply(context)
  220. logger.debug("[chat_channel] ready to decorate reply: {}".format(reply))
  221. # reply的包装步骤
  222. if reply and reply.content:
  223. reply = self._decorate_reply(context, reply)
  224. # reply的发送步骤
  225. self._send_reply(context, reply)
  226. def _generate_reply(self, context: Context, reply: Reply = Reply()) -> Reply:
  227. e_context = PluginManager().emit_event(
  228. EventContext(
  229. Event.ON_HANDLE_CONTEXT,
  230. {"channel": self, "context": context, "reply": reply},
  231. )
  232. )
  233. reply = e_context["reply"]
  234. if not e_context.is_pass():
  235. logger.debug("[chat_channel] ready to handle context: type={}, content={}".format(context.type, context.content))
  236. if context.type == ContextType.TEXT or context.type == ContextType.IMAGE_CREATE: # 文字和图片消息
  237. context["channel"] = e_context["channel"]
  238. reply = super().build_reply_content(context.content, context)
  239. elif context.type == ContextType.VOICE: # 语音消息
  240. cmsg = context["msg"]
  241. cmsg.prepare()
  242. file_path = context.content
  243. wav_path = os.path.splitext(file_path)[0] + ".wav"
  244. try:
  245. any_to_wav(file_path, wav_path)
  246. except Exception as e: # 转换失败,直接使用mp3,对于某些api,mp3也可以识别
  247. logger.warning("[chat_channel]any to wav error, use raw path. " + str(e))
  248. wav_path = file_path
  249. # 语音识别
  250. reply = super().build_voice_to_text(wav_path)
  251. # 删除临时文件
  252. try:
  253. os.remove(file_path)
  254. if wav_path != file_path:
  255. os.remove(wav_path)
  256. except Exception as e:
  257. pass
  258. # logger.warning("[chat_channel]delete temp file error: " + str(e))
  259. if reply.type == ReplyType.TEXT:
  260. new_context = self._compose_context(ContextType.TEXT, reply.content, **context.kwargs)
  261. if new_context:
  262. reply = self._generate_reply(new_context)
  263. else:
  264. return
  265. elif context.type == ContextType.IMAGE: # 图片消息,当前仅做下载保存到本地的逻辑
  266. memory.USER_IMAGE_CACHE[context["session_id"]] = {
  267. "path": context.content,
  268. "msg": context.get("msg")
  269. }
  270. elif context.type == ContextType.SHARING: # 分享信息,当前无默认逻辑
  271. pass
  272. elif context.type == ContextType.FUNCTION or context.type == ContextType.FILE: # 文件消息及函数调用等,当前无默认逻辑
  273. pass
  274. else:
  275. logger.warning("[chat_channel] unknown context type: {}".format(context.type))
  276. return
  277. return reply
  278. def _decorate_reply(self, context: Context, reply: Reply) -> Reply:
  279. if reply and reply.type:
  280. e_context = PluginManager().emit_event(
  281. EventContext(
  282. Event.ON_DECORATE_REPLY,
  283. {"channel": self, "context": context, "reply": reply},
  284. )
  285. )
  286. reply = e_context["reply"]
  287. desire_rtype = context.get("desire_rtype")
  288. if not e_context.is_pass() and reply and reply.type:
  289. if reply.type in self.NOT_SUPPORT_REPLYTYPE:
  290. logger.error("[chat_channel]reply type not support: " + str(reply.type))
  291. reply.type = ReplyType.ERROR
  292. reply.content = "不支持发送的消息类型: " + str(reply.type)
  293. if reply.type == ReplyType.TEXT:
  294. reply_text = reply.content
  295. if desire_rtype == ReplyType.VOICE and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
  296. reply = super().build_text_to_voice(reply.content)
  297. return self._decorate_reply(context, reply)
  298. if context.get("isgroup", False):
  299. if not context.get("no_need_at", False):
  300. reply_text = "@" + context["msg"].actual_user_nickname + "\n" + reply_text.strip()
  301. reply_text = conf().get("group_chat_reply_prefix", [""])[0] + reply_text + conf().get("group_chat_reply_suffix", [""])[0]
  302. else:
  303. reply_text = conf().get("single_chat_reply_prefix", [""])[0] + reply_text + conf().get("single_chat_reply_suffix", [""])[0]
  304. reply.content = reply_text
  305. elif reply.type == ReplyType.ERROR or reply.type == ReplyType.INFO:
  306. reply.content = "[" + str(reply.type) + "]\n" + reply.content
  307. elif reply.type == ReplyType.IMAGE_URL or reply.type == ReplyType.VOICE or reply.type == ReplyType.IMAGE or reply.type == ReplyType.FILE or reply.type == ReplyType.VIDEO or reply.type == ReplyType.VIDEO_URL:
  308. pass
  309. else:
  310. logger.error("[chat_channel] unknown reply type: {}".format(reply.type))
  311. return
  312. if desire_rtype and desire_rtype != reply.type and reply.type not in [ReplyType.ERROR, ReplyType.INFO]:
  313. logger.warning("[chat_channel] desire_rtype: {}, but reply type: {}".format(context.get("desire_rtype"), reply.type))
  314. return reply
  315. def _send_reply(self, context: Context, reply: Reply):
  316. if reply and reply.type:
  317. e_context = PluginManager().emit_event(
  318. EventContext(
  319. Event.ON_SEND_REPLY,
  320. {"channel": self, "context": context, "reply": reply},
  321. )
  322. )
  323. reply = e_context["reply"]
  324. if not e_context.is_pass() and reply and reply.type:
  325. logger.debug("[chat_channel] ready to send reply: {}, context: {}".format(reply, context))
  326. self._send(reply, context)
  327. def _send(self, reply: Reply, context: Context, retry_cnt=0):
  328. try:
  329. self.send(reply, context)
  330. except Exception as e:
  331. logger.error("[chat_channel] sendMsg error: {}".format(str(e)))
  332. if isinstance(e, NotImplementedError):
  333. return
  334. logger.exception(e)
  335. if retry_cnt < 2:
  336. time.sleep(3 + 3 * retry_cnt)
  337. self._send(reply, context, retry_cnt + 1)
  338. def _success_callback(self, session_id, **kwargs): # 线程正常结束时的回调函数
  339. logger.debug("Worker return success, session_id = {}".format(session_id))
  340. def _fail_callback(self, session_id, exception, **kwargs): # 线程异常结束时的回调函数
  341. logger.exception("Worker return exception: {}".format(exception))
  342. def _thread_pool_callback(self, session_id, **kwargs):
  343. def func(worker: Future):
  344. try:
  345. worker_exception = worker.exception()
  346. if worker_exception:
  347. self._fail_callback(session_id, exception=worker_exception, **kwargs)
  348. else:
  349. self._success_callback(session_id, **kwargs)
  350. except CancelledError as e:
  351. logger.info("Worker cancelled, session_id = {}".format(session_id))
  352. except Exception as e:
  353. logger.exception("Worker raise exception: {}".format(e))
  354. with self.lock:
  355. self.sessions[session_id][1].release()
  356. return func
  357. def produce(self, context: Context):
  358. session_id = context["session_id"]
  359. with self.lock:
  360. if session_id not in self.sessions:
  361. self.sessions[session_id] = [
  362. Dequeue(),
  363. threading.BoundedSemaphore(conf().get("concurrency_in_session", 4)),
  364. ]
  365. if context.type == ContextType.TEXT and context.content.startswith("#"):
  366. self.sessions[session_id][0].putleft(context) # 优先处理管理命令
  367. else:
  368. self.sessions[session_id][0].put(context)
  369. # 消费者函数,单独线程,用于从消息队列中取出消息并处理
  370. def consume(self):
  371. while True:
  372. with self.lock:
  373. session_ids = list(self.sessions.keys())
  374. for session_id in session_ids:
  375. context_queue, semaphore = self.sessions[session_id]
  376. if semaphore.acquire(blocking=False): # 等线程处理完毕才能删除
  377. if not context_queue.empty():
  378. context = context_queue.get()
  379. logger.debug("[chat_channel] consume context: {}".format(context))
  380. future: Future = handler_pool.submit(self._handle, context)
  381. future.add_done_callback(self._thread_pool_callback(session_id, context=context))
  382. if session_id not in self.futures:
  383. self.futures[session_id] = []
  384. self.futures[session_id].append(future)
  385. elif semaphore._initial_value == semaphore._value + 1: # 除了当前,没有任务再申请到信号量,说明所有任务都处理完毕
  386. self.futures[session_id] = [t for t in self.futures[session_id] if not t.done()]
  387. assert len(self.futures[session_id]) == 0, "thread pool error"
  388. del self.sessions[session_id]
  389. else:
  390. semaphore.release()
  391. time.sleep(0.1)
  392. # 取消session_id对应的所有任务,只能取消排队的消息和已提交线程池但未执行的任务
  393. def cancel_session(self, session_id):
  394. with self.lock:
  395. if session_id in self.sessions:
  396. for future in self.futures[session_id]:
  397. future.cancel()
  398. cnt = self.sessions[session_id][0].qsize()
  399. if cnt > 0:
  400. logger.info("Cancel {} messages in session {}".format(cnt, session_id))
  401. self.sessions[session_id][0] = Dequeue()
  402. def cancel_all_session(self):
  403. with self.lock:
  404. for session_id in self.sessions:
  405. for future in self.futures[session_id]:
  406. future.cancel()
  407. cnt = self.sessions[session_id][0].qsize()
  408. if cnt > 0:
  409. logger.info("Cancel {} messages in session {}".format(cnt, session_id))
  410. self.sessions[session_id][0] = Dequeue()
  411. def check_prefix(content, prefix_list):
  412. if not prefix_list:
  413. return None
  414. for prefix in prefix_list:
  415. if content.startswith(prefix):
  416. return prefix
  417. return None
  418. def check_contain(content, keyword_list):
  419. if not keyword_list:
  420. return None
  421. for ky in keyword_list:
  422. if content.find(ky) != -1:
  423. return True
  424. return None