From 583440b82b10469a6711031de61d8f2592808523 Mon Sep 17 00:00:00 2001 From: lanvent Date: Sun, 16 Apr 2023 19:04:21 +0800 Subject: [PATCH] banwords: move WordsSearch to lib --- plugins/banwords/WordsSearch.py | 250 -------------------------------- plugins/banwords/banwords.py | 2 +- 2 files changed, 1 insertion(+), 251 deletions(-) delete mode 100644 plugins/banwords/WordsSearch.py diff --git a/plugins/banwords/WordsSearch.py b/plugins/banwords/WordsSearch.py deleted file mode 100644 index d41d6e7..0000000 --- a/plugins/banwords/WordsSearch.py +++ /dev/null @@ -1,250 +0,0 @@ -#!/usr/bin/env python -# -*- coding:utf-8 -*- -# ToolGood.Words.WordsSearch.py -# 2020, Lin Zhijun, https://github.com/toolgood/ToolGood.Words -# Licensed under the Apache License 2.0 -# 更新日志 -# 2020.04.06 第一次提交 -# 2020.05.16 修改,支持大于0xffff的字符 - -__all__ = ['WordsSearch'] -__author__ = 'Lin Zhijun' -__date__ = '2020.05.16' - -class TrieNode(): - def __init__(self): - self.Index = 0 - self.Index = 0 - self.Layer = 0 - self.End = False - self.Char = '' - self.Results = [] - self.m_values = {} - self.Failure = None - self.Parent = None - - def Add(self,c): - if c in self.m_values : - return self.m_values[c] - node = TrieNode() - node.Parent = self - node.Char = c - self.m_values[c] = node - return node - - def SetResults(self,index): - if (self.End == False): - self.End = True - self.Results.append(index) - -class TrieNode2(): - def __init__(self): - self.End = False - self.Results = [] - self.m_values = {} - self.minflag = 0xffff - self.maxflag = 0 - - def Add(self,c,node3): - if (self.minflag > c): - self.minflag = c - if (self.maxflag < c): - self.maxflag = c - self.m_values[c] = node3 - - def SetResults(self,index): - if (self.End == False) : - self.End = True - if (index in self.Results )==False : - self.Results.append(index) - - def HasKey(self,c): - return c in self.m_values - - - def TryGetValue(self,c): - if (self.minflag <= c and self.maxflag >= c): - if c in self.m_values: - return self.m_values[c] - return None - - -class WordsSearch(): - def __init__(self): - self._first = {} - self._keywords = [] - self._indexs=[] - - def SetKeywords(self,keywords): - self._keywords = keywords - self._indexs=[] - for i in range(len(keywords)): - self._indexs.append(i) - - root = TrieNode() - allNodeLayer={} - - for i in range(len(self._keywords)): # for (i = 0; i < _keywords.length; i++) - p = self._keywords[i] - nd = root - for j in range(len(p)): # for (j = 0; j < p.length; j++) - nd = nd.Add(ord(p[j])) - if (nd.Layer == 0): - nd.Layer = j + 1 - if nd.Layer in allNodeLayer: - allNodeLayer[nd.Layer].append(nd) - else: - allNodeLayer[nd.Layer]=[] - allNodeLayer[nd.Layer].append(nd) - nd.SetResults(i) - - - allNode = [] - allNode.append(root) - for key in allNodeLayer.keys(): - for nd in allNodeLayer[key]: - allNode.append(nd) - allNodeLayer=None - - for i in range(len(allNode)): # for (i = 0; i < allNode.length; i++) - if i==0 : - continue - nd=allNode[i] - nd.Index = i - r = nd.Parent.Failure - c = nd.Char - while (r != None and (c in r.m_values)==False): - r = r.Failure - if (r == None): - nd.Failure = root - else: - nd.Failure = r.m_values[c] - for key2 in nd.Failure.Results : - nd.SetResults(key2) - root.Failure = root - - allNode2 = [] - for i in range(len(allNode)): # for (i = 0; i < allNode.length; i++) - allNode2.append( TrieNode2()) - - for i in range(len(allNode2)): # for (i = 0; i < allNode2.length; i++) - oldNode = allNode[i] - newNode = allNode2[i] - - for key in oldNode.m_values : - index = oldNode.m_values[key].Index - newNode.Add(key, allNode2[index]) - - for index in range(len(oldNode.Results)): # for (index = 0; index < oldNode.Results.length; index++) - item = oldNode.Results[index] - newNode.SetResults(item) - - oldNode=oldNode.Failure - while oldNode != root: - for key in oldNode.m_values : - if (newNode.HasKey(key) == False): - index = oldNode.m_values[key].Index - newNode.Add(key, allNode2[index]) - for index in range(len(oldNode.Results)): - item = oldNode.Results[index] - newNode.SetResults(item) - oldNode=oldNode.Failure - allNode = None - root = None - - # first = [] - # for index in range(65535):# for (index = 0; index < 0xffff; index++) - # first.append(None) - - # for key in allNode2[0].m_values : - # first[key] = allNode2[0].m_values[key] - - self._first = allNode2[0] - - - def FindFirst(self,text): - ptr = None - for index in range(len(text)): # for (index = 0; index < text.length; index++) - t =ord(text[index]) # text.charCodeAt(index) - tn = None - if (ptr == None): - tn = self._first.TryGetValue(t) - else: - tn = ptr.TryGetValue(t) - if (tn==None): - tn = self._first.TryGetValue(t) - - - if (tn != None): - if (tn.End): - item = tn.Results[0] - keyword = self._keywords[item] - return { "Keyword": keyword, "Success": True, "End": index, "Start": index + 1 - len(keyword), "Index": self._indexs[item] } - ptr = tn - return None - - def FindAll(self,text): - ptr = None - list = [] - - for index in range(len(text)): # for (index = 0; index < text.length; index++) - t =ord(text[index]) # text.charCodeAt(index) - tn = None - if (ptr == None): - tn = self._first.TryGetValue(t) - else: - tn = ptr.TryGetValue(t) - if (tn==None): - tn = self._first.TryGetValue(t) - - - if (tn != None): - if (tn.End): - for j in range(len(tn.Results)): # for (j = 0; j < tn.Results.length; j++) - item = tn.Results[j] - keyword = self._keywords[item] - list.append({ "Keyword": keyword, "Success": True, "End": index, "Start": index + 1 - len(keyword), "Index": self._indexs[item] }) - ptr = tn - return list - - - def ContainsAny(self,text): - ptr = None - for index in range(len(text)): # for (index = 0; index < text.length; index++) - t =ord(text[index]) # text.charCodeAt(index) - tn = None - if (ptr == None): - tn = self._first.TryGetValue(t) - else: - tn = ptr.TryGetValue(t) - if (tn==None): - tn = self._first.TryGetValue(t) - - if (tn != None): - if (tn.End): - return True - ptr = tn - return False - - def Replace(self,text, replaceChar = '*'): - result = list(text) - - ptr = None - for i in range(len(text)): # for (i = 0; i < text.length; i++) - t =ord(text[i]) # text.charCodeAt(index) - tn = None - if (ptr == None): - tn = self._first.TryGetValue(t) - else: - tn = ptr.TryGetValue(t) - if (tn==None): - tn = self._first.TryGetValue(t) - - if (tn != None): - if (tn.End): - maxLength = len( self._keywords[tn.Results[0]]) - start = i + 1 - maxLength - for j in range(start,i+1): # for (j = start; j <= i; j++) - result[j] = replaceChar - ptr = tn - return ''.join(result) \ No newline at end of file diff --git a/plugins/banwords/banwords.py b/plugins/banwords/banwords.py index 2d94af4..c1af17a 100644 --- a/plugins/banwords/banwords.py +++ b/plugins/banwords/banwords.py @@ -7,7 +7,7 @@ from bridge.reply import Reply, ReplyType import plugins from plugins import * from common.log import logger -from .WordsSearch import WordsSearch +from .lib.WordsSearch import WordsSearch @plugins.register(name="Banwords", desire_priority=100, hidden=True, desc="判断消息中是否有敏感词、决定是否回复。", version="1.0", author="lanvent")