|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361 |
- import os
- import re
- import threading
- import time
- from asyncio import CancelledError
- from concurrent.futures import Future, ThreadPoolExecutor
-
- from bridge.context import *
- from bridge.reply import *
- from channel.channel import Channel
- from common.dequeue import Dequeue
- from common.log import logger
- from config import conf
- from plugins import *
-
- try:
- from voice.audio_convert import any_to_wav
- except Exception as e:
- pass
-
-
- # 抽象类, 它包含了与消息通道无关的通用处理逻辑
- class ChatChannel(Channel):
- name = None # 登录的用户名
- user_id = None # 登录的用户id
- futures = {} # 记录每个session_id提交到线程池的future对象, 用于重置会话时把没执行的future取消掉,正在执行的不会被取消
- sessions = {} # 用于控制并发,每个session_id同时只能有一个context在处理
- lock = threading.Lock() # 用于控制对sessions的访问
- handler_pool = ThreadPoolExecutor(max_workers=8) # 处理消息的线程池
-
- def __init__(self):
- _thread = threading.Thread(target=self.consume)
- _thread.setDaemon(True)
- _thread.start()
-
- # 根据消息构造context,消息内容相关的触发项写在这里
- def _compose_context(self, ctype: ContextType, content, **kwargs):
- context = Context(ctype, content)
- context.kwargs = kwargs
- # context首次传入时,origin_ctype是None,
- # 引入的起因是:当输入语音时,会嵌套生成两个context,第一步语音转文本,第二步通过文本生成文字回复。
- # origin_ctype用于第二步文本回复时,判断是否需要匹配前缀,如果是私聊的语音,就不需要匹配前缀
- if "origin_ctype" not in context:
- context["origin_ctype"] = ctype
- # context首次传入时,receiver是None,根据类型设置receiver
- first_in = "receiver" not in context
- # 群名匹配过程,设置session_id和receiver
- if first_in: # context首次传入时,receiver是None,根据类型设置receiver
- config = conf()
- cmsg = context["msg"]
- user_data = conf().get_user_data(cmsg.from_user_id)
- context["openai_api_key"] = user_data.get("openai_api_key")
- context["gpt_model"] = user_data.get("gpt_model")
- if context.get("isgroup", False):
- group_name = cmsg.other_user_nickname
- group_id = cmsg.other_user_id
-
- group_name_white_list = config.get("group_name_white_list", [])
- group_name_keyword_white_list = config.get("group_name_keyword_white_list", [])
- if any(
- [
- group_name in group_name_white_list,
- "ALL_GROUP" in group_name_white_list,
- check_contain(group_name, group_name_keyword_white_list),
- ]
- ):
- group_chat_in_one_session = conf().get("group_chat_in_one_session", [])
- session_id = cmsg.actual_user_id
- if any(
- [
- group_name in group_chat_in_one_session,
- "ALL_GROUP" in group_chat_in_one_session,
- ]
- ):
- session_id = group_id
- else:
- return None
- context["session_id"] = session_id
- context["receiver"] = group_id
- else:
- context["session_id"] = cmsg.other_user_id
- context["receiver"] = cmsg.other_user_id
- e_context = PluginManager().emit_event(EventContext(Event.ON_RECEIVE_MESSAGE, {"channel": self, "context": context}))
- context = e_context["context"]
- if e_context.is_pass() or context is None:
- return context
- if cmsg.from_user_id == self.user_id and not config.get("trigger_by_self", True):
- logger.debug("[WX]self message skipped")
- return None
-
- # 消息内容匹配过程,并处理content
- if ctype == ContextType.TEXT:
- if first_in and "」\n- - - - - - -" in content: # 初次匹配 过滤引用消息
- logger.debug("[WX]reference query skipped")
- return None
-
- if context.get("isgroup", False): # 群聊
- # 校验关键字
- match_prefix = check_prefix(content, conf().get("group_chat_prefix"))
- match_contain = check_contain(content, conf().get("group_chat_keyword"))
- flag = False
- if match_prefix is not None or match_contain is not None:
- flag = True
- if match_prefix:
- content = content.replace(match_prefix, "", 1).strip()
- if context["msg"].is_at:
- logger.info("[WX]receive group at")
- if not conf().get("group_at_off", False):
- flag = True
- pattern = f"@{re.escape(self.name)}(\u2005|\u0020)"
- content = re.sub(pattern, r"", content)
-
- if not flag:
- if context["origin_ctype"] == ContextType.VOICE:
- logger.info("[WX]receive group voice, but checkprefix didn't match")
- return None
- else: # 单聊
- match_prefix = check_prefix(content, conf().get("single_chat_prefix", [""]))
- if match_prefix is not None: # 判断如果匹配到自定义前缀,则返回过滤掉前缀+空格后的内容
- content = content.replace(match_prefix, "", 1).strip()
- elif context["origin_ctype"] == ContextType.VOICE: # 如果源消息是私聊的语音消息,允许不匹配前缀,放宽条件
- pass
- else:
- return None
- content = content.strip()
- img_match_prefix = check_prefix(content, conf().get("image_create_prefix"))
- if img_match_prefix:
- content = content.replace(img_match_prefix, "", 1)
- context.type = ContextType.IMAGE_CREATE
- else:
- context.type = ContextType.TEXT
- context.content = content.strip()
- if "desire_rtype" not in context and conf().get("always_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
- context["desire_rtype"] = ReplyType.VOICE
- elif context.type == ContextType.VOICE:
- if "desire_rtype" not in context and conf().get("voice_reply_voice") and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
- context["desire_rtype"] = ReplyType.VOICE
-
- return context
-
- def _handle(self, context: Context):
- if context is None or not context.content:
- return
- logger.debug("[WX] ready to handle context: {}".format(context))
- # reply的构建步骤
- reply = self._generate_reply(context)
-
- logger.debug("[WX] ready to decorate reply: {}".format(reply))
- # reply的包装步骤
- reply = self._decorate_reply(context, reply)
-
- # reply的发送步骤
- self._send_reply(context, reply)
-
- def _generate_reply(self, context: Context, reply: Reply = Reply()) -> Reply:
- e_context = PluginManager().emit_event(
- EventContext(
- Event.ON_HANDLE_CONTEXT,
- {"channel": self, "context": context, "reply": reply},
- )
- )
- reply = e_context["reply"]
- if not e_context.is_pass():
- logger.debug("[WX] ready to handle context: type={}, content={}".format(context.type, context.content))
- if context.type == ContextType.TEXT or context.type == ContextType.IMAGE_CREATE: # 文字和图片消息
- reply = super().build_reply_content(context.content, context)
- elif context.type == ContextType.VOICE: # 语音消息
- cmsg = context["msg"]
- cmsg.prepare()
- file_path = context.content
- wav_path = os.path.splitext(file_path)[0] + ".wav"
- try:
- any_to_wav(file_path, wav_path)
- except Exception as e: # 转换失败,直接使用mp3,对于某些api,mp3也可以识别
- logger.warning("[WX]any to wav error, use raw path. " + str(e))
- wav_path = file_path
- # 语音识别
- reply = super().build_voice_to_text(wav_path)
- # 删除临时文件
- try:
- os.remove(file_path)
- if wav_path != file_path:
- os.remove(wav_path)
- except Exception as e:
- pass
- # logger.warning("[WX]delete temp file error: " + str(e))
-
- if reply.type == ReplyType.TEXT:
- new_context = self._compose_context(ContextType.TEXT, reply.content, **context.kwargs)
- if new_context:
- reply = self._generate_reply(new_context)
- else:
- return
- elif context.type == ContextType.IMAGE: # 图片消息,当前无默认逻辑
- pass
- else:
- logger.error("[WX] unknown context type: {}".format(context.type))
- return
- return reply
-
- def _decorate_reply(self, context: Context, reply: Reply) -> Reply:
- if reply and reply.type:
- e_context = PluginManager().emit_event(
- EventContext(
- Event.ON_DECORATE_REPLY,
- {"channel": self, "context": context, "reply": reply},
- )
- )
- reply = e_context["reply"]
- desire_rtype = context.get("desire_rtype")
- if not e_context.is_pass() and reply and reply.type:
- if reply.type in self.NOT_SUPPORT_REPLYTYPE:
- logger.error("[WX]reply type not support: " + str(reply.type))
- reply.type = ReplyType.ERROR
- reply.content = "不支持发送的消息类型: " + str(reply.type)
-
- if reply.type == ReplyType.TEXT:
- reply_text = reply.content
- if desire_rtype == ReplyType.VOICE and ReplyType.VOICE not in self.NOT_SUPPORT_REPLYTYPE:
- reply = super().build_text_to_voice(reply.content)
- return self._decorate_reply(context, reply)
- if context.get("isgroup", False):
- reply_text = "@" + context["msg"].actual_user_nickname + " " + reply_text.strip()
- reply_text = conf().get("group_chat_reply_prefix", "") + reply_text
- else:
- reply_text = conf().get("single_chat_reply_prefix", "") + reply_text
- reply.content = reply_text
- elif reply.type == ReplyType.ERROR or reply.type == ReplyType.INFO:
- reply.content = "[" + str(reply.type) + "]\n" + reply.content
- elif reply.type == ReplyType.IMAGE_URL or reply.type == ReplyType.VOICE or reply.type == ReplyType.IMAGE:
- pass
- else:
- logger.error("[WX] unknown reply type: {}".format(reply.type))
- return
- if desire_rtype and desire_rtype != reply.type and reply.type not in [ReplyType.ERROR, ReplyType.INFO]:
- logger.warning("[WX] desire_rtype: {}, but reply type: {}".format(context.get("desire_rtype"), reply.type))
- return reply
-
- def _send_reply(self, context: Context, reply: Reply):
- if reply and reply.type:
- e_context = PluginManager().emit_event(
- EventContext(
- Event.ON_SEND_REPLY,
- {"channel": self, "context": context, "reply": reply},
- )
- )
- reply = e_context["reply"]
- if not e_context.is_pass() and reply and reply.type:
- logger.debug("[WX] ready to send reply: {}, context: {}".format(reply, context))
- self._send(reply, context)
-
- def _send(self, reply: Reply, context: Context, retry_cnt=0):
- try:
- self.send(reply, context)
- except Exception as e:
- logger.error("[WX] sendMsg error: {}".format(str(e)))
- if isinstance(e, NotImplementedError):
- return
- logger.exception(e)
- if retry_cnt < 2:
- time.sleep(3 + 3 * retry_cnt)
- self._send(reply, context, retry_cnt + 1)
-
- def _success_callback(self, session_id, **kwargs): # 线程正常结束时的回调函数
- logger.debug("Worker return success, session_id = {}".format(session_id))
-
- def _fail_callback(self, session_id, exception, **kwargs): # 线程异常结束时的回调函数
- logger.exception("Worker return exception: {}".format(exception))
-
- def _thread_pool_callback(self, session_id, **kwargs):
- def func(worker: Future):
- try:
- worker_exception = worker.exception()
- if worker_exception:
- self._fail_callback(session_id, exception=worker_exception, **kwargs)
- else:
- self._success_callback(session_id, **kwargs)
- except CancelledError as e:
- logger.info("Worker cancelled, session_id = {}".format(session_id))
- except Exception as e:
- logger.exception("Worker raise exception: {}".format(e))
- with self.lock:
- self.sessions[session_id][1].release()
-
- return func
-
- def produce(self, context: Context):
- session_id = context["session_id"]
- with self.lock:
- if session_id not in self.sessions:
- self.sessions[session_id] = [
- Dequeue(),
- threading.BoundedSemaphore(conf().get("concurrency_in_session", 4)),
- ]
- if context.type == ContextType.TEXT and context.content.startswith("#"):
- self.sessions[session_id][0].putleft(context) # 优先处理管理命令
- else:
- self.sessions[session_id][0].put(context)
-
- # 消费者函数,单独线程,用于从消息队列中取出消息并处理
- def consume(self):
- while True:
- with self.lock:
- session_ids = list(self.sessions.keys())
- for session_id in session_ids:
- context_queue, semaphore = self.sessions[session_id]
- if semaphore.acquire(blocking=False): # 等线程处理完毕才能删除
- if not context_queue.empty():
- context = context_queue.get()
- logger.debug("[WX] consume context: {}".format(context))
- future: Future = self.handler_pool.submit(self._handle, context)
- future.add_done_callback(self._thread_pool_callback(session_id, context=context))
- if session_id not in self.futures:
- self.futures[session_id] = []
- self.futures[session_id].append(future)
- elif semaphore._initial_value == semaphore._value + 1: # 除了当前,没有任务再申请到信号量,说明所有任务都处理完毕
- self.futures[session_id] = [t for t in self.futures[session_id] if not t.done()]
- assert len(self.futures[session_id]) == 0, "thread pool error"
- del self.sessions[session_id]
- else:
- semaphore.release()
- time.sleep(0.1)
-
- # 取消session_id对应的所有任务,只能取消排队的消息和已提交线程池但未执行的任务
- def cancel_session(self, session_id):
- with self.lock:
- if session_id in self.sessions:
- for future in self.futures[session_id]:
- future.cancel()
- cnt = self.sessions[session_id][0].qsize()
- if cnt > 0:
- logger.info("Cancel {} messages in session {}".format(cnt, session_id))
- self.sessions[session_id][0] = Dequeue()
-
- def cancel_all_session(self):
- with self.lock:
- for session_id in self.sessions:
- for future in self.futures[session_id]:
- future.cancel()
- cnt = self.sessions[session_id][0].qsize()
- if cnt > 0:
- logger.info("Cancel {} messages in session {}".format(cnt, session_id))
- self.sessions[session_id][0] = Dequeue()
-
-
- def check_prefix(content, prefix_list):
- if not prefix_list:
- return None
- for prefix in prefix_list:
- if content.startswith(prefix):
- return prefix
- return None
-
-
- def check_contain(content, keyword_list):
- if not keyword_list:
- return None
- for ky in keyword_list:
- if content.find(ky) != -1:
- return True
- return None
|