package dfaUtil import "strings" /* DFA util, is used to verify whether a sentence has invalid words. The underlying data structure is trie. https://en.wikipedia.org/wiki/Trie */ // dfa util type DFAUtil struct { // The root node root *trieNode } // 搜索语句 // 由于go不支持tuple,所以为了避免定义多余的struct,特别使用两个list来分别返回匹配的索引的上界和下界 // 在处理此方法的返回值时,需要两者配合使用 // 参数: // // sentence:语句字符串 // // 返回: // // 搜索到的开始位置列表 // 搜索到的结束位置列表 func (this *DFAUtil) SearchSentence(sentence string) (startIndexList, endIndexList []int) { sentenceRuneList := []rune(sentence) for i := 0; i < len(sentenceRuneList); { //按序匹配每个字 end := this.searchSentenceByStart(i, sentenceRuneList) if end < 0 { //匹配失败,继续匹配下一个字 i++ } else { //匹配成功,记录索引位置 startIndexList = append(startIndexList, i) endIndexList = append(endIndexList, end) //从匹配到的字后面开始找 i = end + 1 } } return } // 从指定的开始位置搜索语句 // 参数: // // start:开始匹配的位置 // sentenceRuneList:语句字列表 // // 返回: // // 匹配到的结束位置,未匹配到返回-1 func (this *DFAUtil) searchSentenceByStart(start int, sentenceRuneList []rune) (endIndex int) { //当前节点,从根节点开始找 currNode := this.root //是否匹配到 var isMatched bool //按顺序匹配字 for i := start; i < len(sentenceRuneList); { child, exists := currNode.children[sentenceRuneList[i]] //未匹配到则结束,跳出循环(可能匹配到过词结尾) if !exists { break } //是否是词末尾,如果是则先记录下来,因为还可能匹配到更长的词 //比如["金鳞"、"金鳞岂是池中物"] => 匹配"金鳞岂是池中物",匹配到"金鳞"不应该停下来,应继续匹配更长的词 if child.isEndOfWord { endIndex = i isMatched = true } //是否已经到词末尾 if len(child.children) == 0 { return endIndex } else { //继续与后面的字匹配 currNode = child } //增加索引匹配下一个位置 i++ } //匹配结束,若曾经匹配到词末尾,则直接返回匹配到的位置 if isMatched { return endIndex } else { //没有匹配到词末尾,则返回匹配失败 return -1 } } // Insert new word into object func (this *DFAUtil) InsertWord(word []rune) { currNode := this.root for _, c := range word { if cildNode, exist := currNode.children[c]; !exist { cildNode = newtrieNode() currNode.children[c] = cildNode currNode = cildNode } else { currNode = cildNode } } currNode.isEndOfWord = true } // Check if there is any word in the trie that starts with the given prefix. func (this *DFAUtil) StartsWith(prefix []rune) bool { currNode := this.root for _, c := range prefix { if childNode, exist := currNode.children[c]; !exist { return false } else { currNode = childNode } } return true } // Judge if input sentence contains some special caracter // Return: // Matc or not func (this *DFAUtil) IsMatch(sentence string) bool { startIndexList, _ := this.SearchSentence(sentence) return len(startIndexList) > 0 } // Handle sentence. Use specified caracter to replace those sensitive caracters. // input: Input sentence // replaceCh: candidate // Return: // Sentence after manipulation func (this *DFAUtil) HandleWord(sentence string, replaceCh rune) string { startIndexList, endIndexList := this.SearchSentence(sentence) if len(startIndexList) == 0 { return sentence } // Manipulate sentenceList := []rune(sentence) for i := 0; i < len(startIndexList); i++ { for index := startIndexList[i]; index <= endIndexList[i]; index++ { sentenceList[index] = replaceCh } } return string(sentenceList) } // Handle sentence. Use specified caracter to replace those sensitive caracters. // input: Input sentence // replaceCh: candidate // Return: // Sentence after manipulation func (this *DFAUtil) HandleWordUseStr(input string, replaceCh string) string { input2 := strings.ToUpper(input) startIndexList, endIndexList := this.SearchSentence(input2) if len(startIndexList) == 0 { return input } // Manipulate inputRune := []rune(input) replaceChList := []rune(replaceCh) //上一次替换掉的数量 lastReplaceCount := 0 for i := 0; i < len(startIndexList); i++ { //替换字的索引 index := len(replaceChList) //开始位置--加上替换的词的索引 starIndex := startIndexList[i] + (i * index) - lastReplaceCount //结束位置 endIndex := endIndexList[i] + (i * index) - lastReplaceCount //结束字符串 sentenceAttr := string(inputRune[endIndex+1:]) //替换范围字符串 inputRune = append(inputRune[:starIndex], replaceChList...) inputRune = append(inputRune, []rune(sentenceAttr)...) lastReplaceCount = endIndex + 1 - starIndex } return string(inputRune) } // Create new DfaUtil object // wordList:word list func NewDFAUtil(wordList []string) *DFAUtil { this := &DFAUtil{ root: newtrieNode(), } for _, word := range wordList { wordRuneList := []rune(word) if len(wordRuneList) > 0 { this.InsertWord(wordRuneList) } } return this }