Przeglądaj źródła

banwords: move WordsSearch to lib

master
lanvent 1 rok temu
rodzic
commit
583440b82b
2 zmienionych plików z 1 dodań i 251 usunięć
  1. +0
    -250
      plugins/banwords/WordsSearch.py
  2. +1
    -1
      plugins/banwords/banwords.py

+ 0
- 250
plugins/banwords/WordsSearch.py Wyświetl plik

@@ -1,250 +0,0 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# ToolGood.Words.WordsSearch.py
# 2020, Lin Zhijun, https://github.com/toolgood/ToolGood.Words
# Licensed under the Apache License 2.0
# 更新日志
# 2020.04.06 第一次提交
# 2020.05.16 修改,支持大于0xffff的字符

__all__ = ['WordsSearch']
__author__ = 'Lin Zhijun'
__date__ = '2020.05.16'

class TrieNode():
def __init__(self):
self.Index = 0
self.Index = 0
self.Layer = 0
self.End = False
self.Char = ''
self.Results = []
self.m_values = {}
self.Failure = None
self.Parent = None

def Add(self,c):
if c in self.m_values :
return self.m_values[c]
node = TrieNode()
node.Parent = self
node.Char = c
self.m_values[c] = node
return node

def SetResults(self,index):
if (self.End == False):
self.End = True
self.Results.append(index)

class TrieNode2():
def __init__(self):
self.End = False
self.Results = []
self.m_values = {}
self.minflag = 0xffff
self.maxflag = 0

def Add(self,c,node3):
if (self.minflag > c):
self.minflag = c
if (self.maxflag < c):
self.maxflag = c
self.m_values[c] = node3

def SetResults(self,index):
if (self.End == False) :
self.End = True
if (index in self.Results )==False :
self.Results.append(index)

def HasKey(self,c):
return c in self.m_values
def TryGetValue(self,c):
if (self.minflag <= c and self.maxflag >= c):
if c in self.m_values:
return self.m_values[c]
return None


class WordsSearch():
def __init__(self):
self._first = {}
self._keywords = []
self._indexs=[]
def SetKeywords(self,keywords):
self._keywords = keywords
self._indexs=[]
for i in range(len(keywords)):
self._indexs.append(i)

root = TrieNode()
allNodeLayer={}

for i in range(len(self._keywords)): # for (i = 0; i < _keywords.length; i++)
p = self._keywords[i]
nd = root
for j in range(len(p)): # for (j = 0; j < p.length; j++)
nd = nd.Add(ord(p[j]))
if (nd.Layer == 0):
nd.Layer = j + 1
if nd.Layer in allNodeLayer:
allNodeLayer[nd.Layer].append(nd)
else:
allNodeLayer[nd.Layer]=[]
allNodeLayer[nd.Layer].append(nd)
nd.SetResults(i)


allNode = []
allNode.append(root)
for key in allNodeLayer.keys():
for nd in allNodeLayer[key]:
allNode.append(nd)
allNodeLayer=None

for i in range(len(allNode)): # for (i = 0; i < allNode.length; i++)
if i==0 :
continue
nd=allNode[i]
nd.Index = i
r = nd.Parent.Failure
c = nd.Char
while (r != None and (c in r.m_values)==False):
r = r.Failure
if (r == None):
nd.Failure = root
else:
nd.Failure = r.m_values[c]
for key2 in nd.Failure.Results :
nd.SetResults(key2)
root.Failure = root

allNode2 = []
for i in range(len(allNode)): # for (i = 0; i < allNode.length; i++)
allNode2.append( TrieNode2())
for i in range(len(allNode2)): # for (i = 0; i < allNode2.length; i++)
oldNode = allNode[i]
newNode = allNode2[i]

for key in oldNode.m_values :
index = oldNode.m_values[key].Index
newNode.Add(key, allNode2[index])
for index in range(len(oldNode.Results)): # for (index = 0; index < oldNode.Results.length; index++)
item = oldNode.Results[index]
newNode.SetResults(item)
oldNode=oldNode.Failure
while oldNode != root:
for key in oldNode.m_values :
if (newNode.HasKey(key) == False):
index = oldNode.m_values[key].Index
newNode.Add(key, allNode2[index])
for index in range(len(oldNode.Results)):
item = oldNode.Results[index]
newNode.SetResults(item)
oldNode=oldNode.Failure
allNode = None
root = None

# first = []
# for index in range(65535):# for (index = 0; index < 0xffff; index++)
# first.append(None)
# for key in allNode2[0].m_values :
# first[key] = allNode2[0].m_values[key]
self._first = allNode2[0]

def FindFirst(self,text):
ptr = None
for index in range(len(text)): # for (index = 0; index < text.length; index++)
t =ord(text[index]) # text.charCodeAt(index)
tn = None
if (ptr == None):
tn = self._first.TryGetValue(t)
else:
tn = ptr.TryGetValue(t)
if (tn==None):
tn = self._first.TryGetValue(t)
if (tn != None):
if (tn.End):
item = tn.Results[0]
keyword = self._keywords[item]
return { "Keyword": keyword, "Success": True, "End": index, "Start": index + 1 - len(keyword), "Index": self._indexs[item] }
ptr = tn
return None

def FindAll(self,text):
ptr = None
list = []

for index in range(len(text)): # for (index = 0; index < text.length; index++)
t =ord(text[index]) # text.charCodeAt(index)
tn = None
if (ptr == None):
tn = self._first.TryGetValue(t)
else:
tn = ptr.TryGetValue(t)
if (tn==None):
tn = self._first.TryGetValue(t)
if (tn != None):
if (tn.End):
for j in range(len(tn.Results)): # for (j = 0; j < tn.Results.length; j++)
item = tn.Results[j]
keyword = self._keywords[item]
list.append({ "Keyword": keyword, "Success": True, "End": index, "Start": index + 1 - len(keyword), "Index": self._indexs[item] })
ptr = tn
return list


def ContainsAny(self,text):
ptr = None
for index in range(len(text)): # for (index = 0; index < text.length; index++)
t =ord(text[index]) # text.charCodeAt(index)
tn = None
if (ptr == None):
tn = self._first.TryGetValue(t)
else:
tn = ptr.TryGetValue(t)
if (tn==None):
tn = self._first.TryGetValue(t)
if (tn != None):
if (tn.End):
return True
ptr = tn
return False
def Replace(self,text, replaceChar = '*'):
result = list(text)

ptr = None
for i in range(len(text)): # for (i = 0; i < text.length; i++)
t =ord(text[i]) # text.charCodeAt(index)
tn = None
if (ptr == None):
tn = self._first.TryGetValue(t)
else:
tn = ptr.TryGetValue(t)
if (tn==None):
tn = self._first.TryGetValue(t)
if (tn != None):
if (tn.End):
maxLength = len( self._keywords[tn.Results[0]])
start = i + 1 - maxLength
for j in range(start,i+1): # for (j = start; j <= i; j++)
result[j] = replaceChar
ptr = tn
return ''.join(result)

+ 1
- 1
plugins/banwords/banwords.py Wyświetl plik

@@ -7,7 +7,7 @@ from bridge.reply import Reply, ReplyType
import plugins
from plugins import *
from common.log import logger
from .WordsSearch import WordsSearch
from .lib.WordsSearch import WordsSearch


@plugins.register(name="Banwords", desire_priority=100, hidden=True, desc="判断消息中是否有敏感词、决定是否回复。", version="1.0", author="lanvent")


Ładowanie…
Anuluj
Zapisz