goProject/.svn/pristine/21/213b0a00bea424ffed61b3775e9a3885c7c78fac.svn-base

220 lines
5.2 KiB
Plaintext
Raw Normal View History

2025-01-06 16:21:36 +08:00
package dfaUtil
import "strings"
/*
DFA util, is used to verify whether a sentence has invalid words.
The underlying data structure is trie.
https://en.wikipedia.org/wiki/Trie
*/
// dfa util
type DFAUtil struct {
// The root node
root *trieNode
}
// 搜索语句
// 由于go不支持tuple所以为了避免定义多余的struct特别使用两个list来分别返回匹配的索引的上界和下界
// 在处理此方法的返回值时,需要两者配合使用
// 参数:
//
// sentence语句字符串
//
// 返回:
//
// 搜索到的开始位置列表
// 搜索到的结束位置列表
func (this *DFAUtil) SearchSentence(sentence string) (startIndexList, endIndexList []int) {
sentenceRuneList := []rune(sentence)
for i := 0; i < len(sentenceRuneList); {
//按序匹配每个字
end := this.searchSentenceByStart(i, sentenceRuneList)
if end < 0 {
//匹配失败,继续匹配下一个字
i++
} else {
//匹配成功,记录索引位置
startIndexList = append(startIndexList, i)
endIndexList = append(endIndexList, end)
//从匹配到的字后面开始找
i = end + 1
}
}
return
}
// 从指定的开始位置搜索语句
// 参数:
//
// start开始匹配的位置
// sentenceRuneList语句字列表
//
// 返回:
//
// 匹配到的结束位置,未匹配到返回-1
func (this *DFAUtil) searchSentenceByStart(start int, sentenceRuneList []rune) (endIndex int) {
//当前节点,从根节点开始找
currNode := this.root
//是否匹配到
var isMatched bool
//按顺序匹配字
for i := start; i < len(sentenceRuneList); {
child, exists := currNode.children[sentenceRuneList[i]]
//未匹配到则结束,跳出循环(可能匹配到过词结尾)
if !exists {
break
}
//是否是词末尾,如果是则先记录下来,因为还可能匹配到更长的词
//比如["金鳞"、"金鳞岂是池中物"] => 匹配"金鳞岂是池中物",匹配到"金鳞"不应该停下来,应继续匹配更长的词
if child.isEndOfWord {
endIndex = i
isMatched = true
}
//是否已经到词末尾
if len(child.children) == 0 {
return endIndex
} else {
//继续与后面的字匹配
currNode = child
}
//增加索引匹配下一个位置
i++
}
//匹配结束,若曾经匹配到词末尾,则直接返回匹配到的位置
if isMatched {
return endIndex
} else {
//没有匹配到词末尾,则返回匹配失败
return -1
}
}
// Insert new word into object
func (this *DFAUtil) InsertWord(word []rune) {
currNode := this.root
for _, c := range word {
if cildNode, exist := currNode.children[c]; !exist {
cildNode = newtrieNode()
currNode.children[c] = cildNode
currNode = cildNode
} else {
currNode = cildNode
}
}
currNode.isEndOfWord = true
}
// Check if there is any word in the trie that starts with the given prefix.
func (this *DFAUtil) StartsWith(prefix []rune) bool {
currNode := this.root
for _, c := range prefix {
if childNode, exist := currNode.children[c]; !exist {
return false
} else {
currNode = childNode
}
}
return true
}
// Judge if input sentence contains some special caracter
// Return:
// Matc or not
func (this *DFAUtil) IsMatch(sentence string) bool {
startIndexList, _ := this.SearchSentence(sentence)
return len(startIndexList) > 0
}
// Handle sentence. Use specified caracter to replace those sensitive caracters.
// input: Input sentence
// replaceCh: candidate
// Return:
// Sentence after manipulation
func (this *DFAUtil) HandleWord(sentence string, replaceCh rune) string {
startIndexList, endIndexList := this.SearchSentence(sentence)
if len(startIndexList) == 0 {
return sentence
}
// Manipulate
sentenceList := []rune(sentence)
for i := 0; i < len(startIndexList); i++ {
for index := startIndexList[i]; index <= endIndexList[i]; index++ {
sentenceList[index] = replaceCh
}
}
return string(sentenceList)
}
// Handle sentence. Use specified caracter to replace those sensitive caracters.
// input: Input sentence
// replaceCh: candidate
// Return:
// Sentence after manipulation
func (this *DFAUtil) HandleWordUseStr(input string, replaceCh string) string {
input2 := strings.ToUpper(input)
startIndexList, endIndexList := this.SearchSentence(input2)
if len(startIndexList) == 0 {
return input
}
// Manipulate
inputRune := []rune(input)
replaceChList := []rune(replaceCh)
//上一次替换掉的数量
lastReplaceCount := 0
for i := 0; i < len(startIndexList); i++ {
//替换字的索引
index := len(replaceChList)
//开始位置--加上替换的词的索引
starIndex := startIndexList[i] + (i * index) - lastReplaceCount
//结束位置
endIndex := endIndexList[i] + (i * index) - lastReplaceCount
//结束字符串
sentenceAttr := string(inputRune[endIndex+1:])
//替换范围字符串
inputRune = append(inputRune[:starIndex], replaceChList...)
inputRune = append(inputRune, []rune(sentenceAttr)...)
lastReplaceCount = endIndex + 1 - starIndex
}
return string(inputRune)
}
// Create new DfaUtil object
// wordList:word list
func NewDFAUtil(wordList []string) *DFAUtil {
this := &DFAUtil{
root: newtrieNode(),
}
for _, word := range wordList {
wordRuneList := []rune(word)
if len(wordRuneList) > 0 {
this.InsertWord(wordRuneList)
}
}
return this
}