goProject/trunk/goutil/dfaUtil/dfa.go
皮蛋13361098506 1b77f62820 初始化项目
2025-01-06 16:01:02 +08:00

220 lines
5.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package dfaUtil
import "strings"
/*
DFA util, is used to verify whether a sentence has invalid words.
The underlying data structure is trie.
https://en.wikipedia.org/wiki/Trie
*/
// dfa util
type DFAUtil struct {
// The root node
root *trieNode
}
// 搜索语句
// 由于go不支持tuple所以为了避免定义多余的struct特别使用两个list来分别返回匹配的索引的上界和下界
// 在处理此方法的返回值时,需要两者配合使用
// 参数:
//
// sentence语句字符串
//
// 返回:
//
// 搜索到的开始位置列表
// 搜索到的结束位置列表
func (this *DFAUtil) SearchSentence(sentence string) (startIndexList, endIndexList []int) {
sentenceRuneList := []rune(sentence)
for i := 0; i < len(sentenceRuneList); {
//按序匹配每个字
end := this.searchSentenceByStart(i, sentenceRuneList)
if end < 0 {
//匹配失败,继续匹配下一个字
i++
} else {
//匹配成功,记录索引位置
startIndexList = append(startIndexList, i)
endIndexList = append(endIndexList, end)
//从匹配到的字后面开始找
i = end + 1
}
}
return
}
// 从指定的开始位置搜索语句
// 参数:
//
// start开始匹配的位置
// sentenceRuneList语句字列表
//
// 返回:
//
// 匹配到的结束位置,未匹配到返回-1
func (this *DFAUtil) searchSentenceByStart(start int, sentenceRuneList []rune) (endIndex int) {
//当前节点,从根节点开始找
currNode := this.root
//是否匹配到
var isMatched bool
//按顺序匹配字
for i := start; i < len(sentenceRuneList); {
child, exists := currNode.children[sentenceRuneList[i]]
//未匹配到则结束,跳出循环(可能匹配到过词结尾)
if !exists {
break
}
//是否是词末尾,如果是则先记录下来,因为还可能匹配到更长的词
//比如["金鳞"、"金鳞岂是池中物"] => 匹配"金鳞岂是池中物",匹配到"金鳞"不应该停下来,应继续匹配更长的词
if child.isEndOfWord {
endIndex = i
isMatched = true
}
//是否已经到词末尾
if len(child.children) == 0 {
return endIndex
} else {
//继续与后面的字匹配
currNode = child
}
//增加索引匹配下一个位置
i++
}
//匹配结束,若曾经匹配到词末尾,则直接返回匹配到的位置
if isMatched {
return endIndex
} else {
//没有匹配到词末尾,则返回匹配失败
return -1
}
}
// Insert new word into object
func (this *DFAUtil) InsertWord(word []rune) {
currNode := this.root
for _, c := range word {
if cildNode, exist := currNode.children[c]; !exist {
cildNode = newtrieNode()
currNode.children[c] = cildNode
currNode = cildNode
} else {
currNode = cildNode
}
}
currNode.isEndOfWord = true
}
// Check if there is any word in the trie that starts with the given prefix.
func (this *DFAUtil) StartsWith(prefix []rune) bool {
currNode := this.root
for _, c := range prefix {
if childNode, exist := currNode.children[c]; !exist {
return false
} else {
currNode = childNode
}
}
return true
}
// Judge if input sentence contains some special caracter
// Return:
// Matc or not
func (this *DFAUtil) IsMatch(sentence string) bool {
startIndexList, _ := this.SearchSentence(sentence)
return len(startIndexList) > 0
}
// Handle sentence. Use specified caracter to replace those sensitive caracters.
// input: Input sentence
// replaceCh: candidate
// Return:
// Sentence after manipulation
func (this *DFAUtil) HandleWord(sentence string, replaceCh rune) string {
startIndexList, endIndexList := this.SearchSentence(sentence)
if len(startIndexList) == 0 {
return sentence
}
// Manipulate
sentenceList := []rune(sentence)
for i := 0; i < len(startIndexList); i++ {
for index := startIndexList[i]; index <= endIndexList[i]; index++ {
sentenceList[index] = replaceCh
}
}
return string(sentenceList)
}
// Handle sentence. Use specified caracter to replace those sensitive caracters.
// input: Input sentence
// replaceCh: candidate
// Return:
// Sentence after manipulation
func (this *DFAUtil) HandleWordUseStr(input string, replaceCh string) string {
input2 := strings.ToUpper(input)
startIndexList, endIndexList := this.SearchSentence(input2)
if len(startIndexList) == 0 {
return input
}
// Manipulate
inputRune := []rune(input)
replaceChList := []rune(replaceCh)
//上一次替换掉的数量
lastReplaceCount := 0
for i := 0; i < len(startIndexList); i++ {
//替换字的索引
index := len(replaceChList)
//开始位置--加上替换的词的索引
starIndex := startIndexList[i] + (i * index) - lastReplaceCount
//结束位置
endIndex := endIndexList[i] + (i * index) - lastReplaceCount
//结束字符串
sentenceAttr := string(inputRune[endIndex+1:])
//替换范围字符串
inputRune = append(inputRune[:starIndex], replaceChList...)
inputRune = append(inputRune, []rune(sentenceAttr)...)
lastReplaceCount = endIndex + 1 - starIndex
}
return string(inputRune)
}
// Create new DfaUtil object
// wordList:word list
func NewDFAUtil(wordList []string) *DFAUtil {
this := &DFAUtil{
root: newtrieNode(),
}
for _, word := range wordList {
wordRuneList := []rune(word)
if len(wordRuneList) > 0 {
this.InsertWord(wordRuneList)
}
}
return this
}