From 8915149d361cf0260f11b21e406091d968c7b0a7 Mon Sep 17 00:00:00 2001
From: lanvent <lanvent@qq.com>
Date: Tue, 14 Mar 2023 17:30:30 +0800
Subject: [PATCH] plugin: add banwords plugin

---
 plugins/banwords/.gitignore            |   1 +
 plugins/banwords/README.md             |   9 +
 plugins/banwords/WordsSearch.py        | 250 +++++++++++++++++++++++++
 plugins/banwords/__init__.py           |   0
 plugins/banwords/banwords.py           |  63 +++++++
 plugins/banwords/banwords.txt.template |   3 +
 plugins/banwords/config.json.template  |   3 +
 plugins/godcmd/config.json.template    |   4 +
 plugins/godcmd/godcmd.py               |   6 +-
 9 files changed, 338 insertions(+), 1 deletion(-)
 create mode 100644 plugins/banwords/.gitignore
 create mode 100644 plugins/banwords/README.md
 create mode 100644 plugins/banwords/WordsSearch.py
 create mode 100644 plugins/banwords/__init__.py
 create mode 100644 plugins/banwords/banwords.py
 create mode 100644 plugins/banwords/banwords.txt.template
 create mode 100644 plugins/banwords/config.json.template
 create mode 100644 plugins/godcmd/config.json.template

diff --git a/plugins/banwords/.gitignore b/plugins/banwords/.gitignore
new file mode 100644
index 0000000..a6593bf
--- /dev/null
+++ b/plugins/banwords/.gitignore
@@ -0,0 +1 @@
+banwords.txt
\ No newline at end of file
diff --git a/plugins/banwords/README.md b/plugins/banwords/README.md
new file mode 100644
index 0000000..9c7e498
--- /dev/null
+++ b/plugins/banwords/README.md
@@ -0,0 +1,9 @@
+### 说明
+简易的敏感词插件，暂不支持分词，请自行导入词库到插件文件夹中的`banwords.txt`，每行一个词，一个参考词库是[1](https://github.com/cjh0613/tencent-sensitive-words/blob/main/sensitive_words_lines.txt)。
+
+`config.json`中能够填写默认的处理行为，目前行为有：
+- `ignore` : 无视这条消息。
+- `replace` : 将消息中的敏感词替换成"*"，并回复违规。
+
+### 致谢
+搜索功能实现来自https://github.com/toolgood/ToolGood.Words
\ No newline at end of file
diff --git a/plugins/banwords/WordsSearch.py b/plugins/banwords/WordsSearch.py
new file mode 100644
index 0000000..d41d6e7
--- /dev/null
+++ b/plugins/banwords/WordsSearch.py
@@ -0,0 +1,250 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# ToolGood.Words.WordsSearch.py
+# 2020, Lin Zhijun, https://github.com/toolgood/ToolGood.Words
+# Licensed under the Apache License 2.0
+# 更新日志
+# 2020.04.06 第一次提交
+# 2020.05.16 修改，支持大于0xffff的字符
+
+__all__ = ['WordsSearch']
+__author__ = 'Lin Zhijun'
+__date__ = '2020.05.16'
+
+class TrieNode():
+    def __init__(self):
+        self.Index = 0
+        self.Index = 0
+        self.Layer = 0
+        self.End = False
+        self.Char = ''
+        self.Results = []
+        self.m_values = {}
+        self.Failure = None
+        self.Parent = None
+
+    def Add(self,c):
+        if c in self.m_values :
+            return self.m_values[c]
+        node = TrieNode()
+        node.Parent = self
+        node.Char = c
+        self.m_values[c] = node
+        return node
+
+    def SetResults(self,index):
+        if (self.End == False):
+            self.End = True
+        self.Results.append(index)
+
+class TrieNode2():
+    def __init__(self):
+        self.End = False
+        self.Results = []
+        self.m_values = {}
+        self.minflag = 0xffff
+        self.maxflag = 0
+
+    def Add(self,c,node3):
+        if (self.minflag > c):
+            self.minflag = c
+        if (self.maxflag < c):
+             self.maxflag = c
+        self.m_values[c] = node3
+
+    def SetResults(self,index):
+        if (self.End == False) :
+            self.End = True
+        if (index in self.Results )==False : 
+            self.Results.append(index)
+
+    def HasKey(self,c):
+        return c in self.m_values
+        
+ 
+    def TryGetValue(self,c):
+        if (self.minflag <= c and self.maxflag >= c):
+            if c in self.m_values:
+                return self.m_values[c]
+        return None
+
+
+class WordsSearch():
+    def __init__(self):
+        self._first = {}
+        self._keywords = []
+        self._indexs=[]
+    
+    def SetKeywords(self,keywords):
+        self._keywords = keywords
+        self._indexs=[]
+        for i in range(len(keywords)):
+            self._indexs.append(i)
+
+        root = TrieNode()
+        allNodeLayer={}
+
+        for i in range(len(self._keywords)): # for (i = 0; i < _keywords.length; i++) 
+            p = self._keywords[i]
+            nd = root
+            for j in range(len(p)): # for (j = 0; j < p.length; j++) 
+                nd = nd.Add(ord(p[j]))
+                if (nd.Layer == 0):
+                    nd.Layer = j + 1
+                    if nd.Layer in allNodeLayer:
+                        allNodeLayer[nd.Layer].append(nd)
+                    else:
+                        allNodeLayer[nd.Layer]=[]
+                        allNodeLayer[nd.Layer].append(nd)
+            nd.SetResults(i)
+
+
+        allNode = []
+        allNode.append(root)
+        for key in allNodeLayer.keys():
+            for nd in allNodeLayer[key]:
+                allNode.append(nd)
+        allNodeLayer=None
+
+        for i in range(len(allNode)): # for (i = 0; i < allNode.length; i++) 
+            if i==0 :
+                continue
+            nd=allNode[i]
+            nd.Index = i
+            r = nd.Parent.Failure
+            c = nd.Char
+            while (r != None and (c in r.m_values)==False):
+                r = r.Failure
+            if (r == None):
+                nd.Failure = root
+            else:
+                nd.Failure = r.m_values[c]
+                for key2 in nd.Failure.Results :
+                    nd.SetResults(key2)
+        root.Failure = root
+
+        allNode2 = []
+        for i in range(len(allNode)): # for (i = 0; i < allNode.length; i++) 
+            allNode2.append( TrieNode2())
+        
+        for i in range(len(allNode2)): # for (i = 0; i < allNode2.length; i++) 
+            oldNode = allNode[i]
+            newNode = allNode2[i]
+
+            for key in oldNode.m_values :
+                index = oldNode.m_values[key].Index
+                newNode.Add(key, allNode2[index])
+            
+            for index in range(len(oldNode.Results)): # for (index = 0; index < oldNode.Results.length; index++) 
+                item = oldNode.Results[index]
+                newNode.SetResults(item)
+            
+            oldNode=oldNode.Failure
+            while oldNode != root:
+                for key in oldNode.m_values :
+                    if (newNode.HasKey(key) == False):
+                        index = oldNode.m_values[key].Index
+                        newNode.Add(key, allNode2[index])
+                for index in range(len(oldNode.Results)): 
+                    item = oldNode.Results[index]
+                    newNode.SetResults(item)
+                oldNode=oldNode.Failure
+        allNode = None
+        root = None
+
+        # first = []
+        # for index in range(65535):# for (index = 0; index < 0xffff; index++) 
+        #     first.append(None)
+        
+        # for key in allNode2[0].m_values :
+        #     first[key] = allNode2[0].m_values[key]
+        
+        self._first = allNode2[0]
+    
+
+    def FindFirst(self,text):
+        ptr = None
+        for index in range(len(text)): # for (index = 0; index < text.length; index++) 
+            t =ord(text[index]) # text.charCodeAt(index)
+            tn = None
+            if (ptr == None):
+                tn = self._first.TryGetValue(t)
+            else:
+                tn = ptr.TryGetValue(t)
+                if (tn==None):
+                    tn = self._first.TryGetValue(t)
+                
+            
+            if (tn != None):
+                if (tn.End):
+                    item = tn.Results[0]
+                    keyword = self._keywords[item]
+                    return { "Keyword": keyword, "Success": True, "End": index, "Start": index + 1 - len(keyword), "Index": self._indexs[item] }
+            ptr = tn
+        return None
+
+    def FindAll(self,text):
+        ptr = None
+        list = []
+
+        for index in range(len(text)): # for (index = 0; index < text.length; index++) 
+            t =ord(text[index]) # text.charCodeAt(index)
+            tn = None
+            if (ptr == None):
+                tn = self._first.TryGetValue(t)
+            else:
+                tn = ptr.TryGetValue(t)
+                if (tn==None):
+                    tn = self._first.TryGetValue(t)
+                
+            
+            if (tn != None):
+                if (tn.End):
+                    for j in range(len(tn.Results)): # for (j = 0; j < tn.Results.length; j++) 
+                        item = tn.Results[j]
+                        keyword = self._keywords[item]
+                        list.append({ "Keyword": keyword, "Success": True, "End": index, "Start": index + 1 - len(keyword), "Index": self._indexs[item] })
+            ptr = tn
+        return list
+
+
+    def ContainsAny(self,text):
+        ptr = None
+        for index in range(len(text)): # for (index = 0; index < text.length; index++) 
+            t =ord(text[index]) # text.charCodeAt(index)
+            tn = None
+            if (ptr == None):
+                tn = self._first.TryGetValue(t)
+            else:
+                tn = ptr.TryGetValue(t)
+                if (tn==None):
+                    tn = self._first.TryGetValue(t)
+            
+            if (tn != None):
+                if (tn.End):
+                    return True
+            ptr = tn
+        return False
+    
+    def Replace(self,text, replaceChar = '*'):
+        result = list(text) 
+
+        ptr = None
+        for i in range(len(text)): # for (i = 0; i < text.length; i++) 
+            t =ord(text[i]) # text.charCodeAt(index)
+            tn = None
+            if (ptr == None):
+                tn = self._first.TryGetValue(t)
+            else:
+                tn = ptr.TryGetValue(t)
+                if (tn==None):
+                    tn = self._first.TryGetValue(t)
+            
+            if (tn != None):
+                if (tn.End):
+                    maxLength = len( self._keywords[tn.Results[0]])
+                    start = i + 1 - maxLength
+                    for j in range(start,i+1): # for (j = start; j <= i; j++) 
+                        result[j] = replaceChar
+            ptr = tn
+        return ''.join(result) 
\ No newline at end of file
diff --git a/plugins/banwords/__init__.py b/plugins/banwords/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/plugins/banwords/banwords.py b/plugins/banwords/banwords.py
new file mode 100644
index 0000000..2b4a711
--- /dev/null
+++ b/plugins/banwords/banwords.py
@@ -0,0 +1,63 @@
+# encoding:utf-8
+
+import json
+import os
+from bridge.context import ContextType
+from bridge.reply import Reply, ReplyType
+import plugins
+from plugins import *
+from common.log import logger
+from .WordsSearch import WordsSearch
+
+
+@plugins.register(name="Banwords", desc="判断消息中是否有敏感词、决定是否回复。", version="1.0", author="lanvent", desire_priority= 100)
+class Banwords(Plugin):
+    def __init__(self):
+        super().__init__()
+        try:
+            curdir=os.path.dirname(__file__)
+            config_path=os.path.join(curdir,"config.json")
+            conf=None
+            if not os.path.exists(config_path):
+                conf={"action":"ignore"}
+                with open(config_path,"w") as f:
+                    json.dump(conf,f,indent=4)
+            else:
+                with open(config_path,"r") as f:
+                    conf=json.load(f)
+            self.searchr = WordsSearch()
+            self.action = conf["action"]
+            banwords_path = os.path.join(curdir,"banwords.txt")
+            with open(banwords_path, 'r', encoding='utf-8') as f:
+                words=[]
+                for line in f:
+                    word = line.strip()
+                    if word:
+                        words.append(word)
+            self.searchr.SetKeywords(words)
+            self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
+            logger.info("[Banwords] inited")
+        except Exception as e:
+            logger.error("Banwords init failed: %s" % e)
+        
+
+
+    def on_handle_context(self, e_context: EventContext):
+
+        if e_context['context'].type not in [ContextType.TEXT,ContextType.IMAGE_CREATE]:
+            return
+        
+        content = e_context['context'].content
+        logger.debug("[Banwords] on_handle_context. content: %s" % content)
+        if self.action == "ignore":
+            f = self.searchr.FindFirst(content)
+            if f:
+                logger.info("Banwords: %s" % f["Keyword"])
+                e_context.action = EventAction.BREAK_PASS
+                return
+        elif self.action == "replace":
+            if self.searchr.ContainsAny(content):
+                reply = Reply(ReplyType.INFO, "发言中包含敏感词，请重试: \n"+self.searchr.Replace(content))
+                e_context['reply'] = reply
+                e_context.action = EventAction.BREAK_PASS
+                return
\ No newline at end of file
diff --git a/plugins/banwords/banwords.txt.template b/plugins/banwords/banwords.txt.template
new file mode 100644
index 0000000..9b2e8ed
--- /dev/null
+++ b/plugins/banwords/banwords.txt.template
@@ -0,0 +1,3 @@
+nipples
+pennis
+法轮功
\ No newline at end of file
diff --git a/plugins/banwords/config.json.template b/plugins/banwords/config.json.template
new file mode 100644
index 0000000..000fdda
--- /dev/null
+++ b/plugins/banwords/config.json.template
@@ -0,0 +1,3 @@
+{
+    "action": "ignore"
+}
\ No newline at end of file
diff --git a/plugins/godcmd/config.json.template b/plugins/godcmd/config.json.template
new file mode 100644
index 0000000..5240738
--- /dev/null
+++ b/plugins/godcmd/config.json.template
@@ -0,0 +1,4 @@
+{
+    "password": "",
+    "admin_users": []
+}
\ No newline at end of file
diff --git a/plugins/godcmd/godcmd.py b/plugins/godcmd/godcmd.py
index 296b9a9..2a942df 100644
--- a/plugins/godcmd/godcmd.py
+++ b/plugins/godcmd/godcmd.py
@@ -273,9 +273,13 @@ class Godcmd(Plugin):
         
         if isadmin:
             return False,"管理员账号无需认证"
-
+        
+        if len(self.password) == 0:
+            return False,"未设置口令，无法认证"
+        
         if len(args) != 1:
             return False,"请提供口令"
+        
         password = args[0]
         if password == self.password:
             self.admin_users.append(userid)