共计 12631 个字符,预计需要花费 32 分钟才能阅读完成。
java 敏感词过滤
敏感词:“美元”,“中国”,“北京大学”,“北大”,“南京大学”
DFAUtils
`import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
public class DFAUtils {
/**
* 增加敏感词到算法树
*/
public static void addSensitiveWord(String sensitiveWord) {if (null == sensitiveWord || sensitiveWord.length() == 0) {return;}
char[] chars = sensitiveWord.toCharArray();
Map<Character, Map> parentMap = sensitiveWordsMap;
Map<Character, Map> current = null;
synchronized (lock) {for (int i = 0; i < chars.length; i++) {if (i == 0) {if (sensitiveWordsMap.size() == 0) {
/* 增加第一个敏感词的第一个字符执行此 code */
if (chars.length == 1) {Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
sensitiveWordsMap.put(chars[0], endMap);
} else {sensitiveWordsMap.put(chars[0], null);
}
} else {current = parentMap.get(chars[0]);
if (null == current) {if (chars.length == 1) {Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
sensitiveWordsMap.put(chars[0], endMap);
break;
} else {sensitiveWordsMap.put(chars[0], null);
}
} else {if (chars.length == 1) {current.put(null, null);
break;
}
}
}
} else {if (null == current) {Map<Character, Map> childMap = new HashMap<Character, Map>();
if (i == chars.length - 1) {Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
childMap.put(chars[i], endMap);
parentMap.put(chars[i - 1], childMap);
break;
} else {childMap.put(chars[i], null);
parentMap.put(chars[i - 1], childMap);
parentMap = childMap;
current = null;
}
} else {Map<Character, Map> childMap = current.get(chars[i]);
if (null == childMap) {if (i == chars.length - 1) {Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
current.put(chars[i], endMap);
} else {current.put(chars[i], null);
parentMap = current;
current = null;
}
} else {if (i == chars.length - 1) {childMap.put(null, null);
} else {
parentMap = current;
current = childMap;
}
}
}
}
}
}
}
/**
* 查看敏感词 (找到合乎敏感词则返回 -- 单个字符敏感词前后不是中文字符才算敏感词)
*/
public static String checkSensitiveWord(String content) {if (null == content || content.length() == 0 || sensitiveWordsMap.size() == 0) {return null;}
char[] chars = content.toCharArray();
boolean isContain = Boolean.FALSE;
StringBuilder sbResult = new StringBuilder();
for (int i = 0; i < chars.length; i++) {if (sensitiveWordsMap.containsKey(chars[i])) {Map<Character, Map> currentMap = sensitiveWordsMap.get(chars[i]);
sbResult.append(chars[i]);
if (null == currentMap) {break;} else {if (currentMap.containsKey(null)) {if (sbResult.length() == 1) {
/* 前一个字符或后一个字符是否是中文字符 */
boolean before = Boolean.FALSE;
if (i - 1 < 0) {before = Boolean.TRUE;} else {if (chars[i - 1] < 13312 || chars[i - 1] > 40895) {before = Boolean.TRUE;}
}
boolean after = Boolean.FALSE;
if (i + 1 >= chars.length) {after = Boolean.TRUE;} else {if (chars[i + 1] < 13312 || chars[i + 1] > 40895) {after = Boolean.TRUE;}
}
if (before && after) {
isContain = Boolean.TRUE;
break;
}
/* From 以后 index 开始匹配是否存在敏感词 */
int j = i + 1;
for (; j < chars.length; j++) {if (currentMap.containsKey(chars[j])) {sbResult.append(chars[j]);
currentMap = currentMap.get(chars[j]);
if (currentMap.containsKey(null)) {
isContain = Boolean.TRUE;
break;
} else {continue;}
} else {break;}
}
} else {
isContain = Boolean.TRUE;
break;
}
} else {
/* From 以后 index 开始匹配是否存在敏感词 */
int j = i + 1;
for (; j < chars.length; j++) {if (currentMap.containsKey(chars[j])) {sbResult.append(chars[j]);
currentMap = currentMap.get(chars[j]);
if (currentMap.containsKey(null)) {
isContain = Boolean.TRUE;
break;
} else {continue;}
} else {break;}
}
}
if (isContain) {break;} else {sbResult.setLength(0);
}
}
}
}
if (isContain) {return sbResult.toString();
} else {return null;}
}
/**
* 删除算法树的敏感词
*/
public static void delSensitiveWord(String sensitiveWord) {if (null == sensitiveWord || sensitiveWord.length() == 0 || sensitiveWordsMap.size() == 0) {return;}
int delIndex = 0;
char[] chars = sensitiveWord.toCharArray();
Map<Character, Map> current = sensitiveWordsMap;
synchronized (lock) {
int i = 0;
for (; i < chars.length; i++) {if (current.containsKey(chars[i])) {if (current.get(chars[i]).size() > 1) {delIndex = i;}
} else {break;}
current = current.get(chars[i]);
}
if (!current.containsKey(null)) {return;}
current = sensitiveWordsMap;
if (i == chars.length) {for (i = 0; i < delIndex; i++) {current = current.get(chars[i]);
}
if (i == chars.length) {current.remove(chars[i]);
} else {if (i == 0 && chars.length == 1) {if (current.get(chars[i]).size() == 1) {current.remove(chars[i]);
} else {current.get(chars[i]).remove(null);
}
} else {if (i + 1 == chars.length) {current.get(chars[i]).remove(null);
} else {current.get(chars[i]).remove(chars[i + 1]);
}
}
}
}
}
}
/**
* 获取算法树的敏感词
*/
public static LinkedList<String> getSevsitiveWords() {LinkedList<String> listWords = new LinkedList<String>();
if (sensitiveWordsMap.size() == 0) {return listWords;}
StringBuilder sbWord = new StringBuilder();
getSevsitiveWords(sensitiveWordsMap, listWords, sbWord);
return listWords;
}
/**
* 算法树是否蕴含对应的敏感词
*/
public static boolean containSensitiveWord(String sensitiveWord) {if (null == sensitiveWord || sensitiveWord.length() == 0 || sensitiveWordsMap.size() == 0) {return false;}
return sensitiveWord.equals(checkSensitiveWord(sensitiveWord));
}
/**
* 清空算法树
*/
public static void clearSensitiveWord() {synchronized (lock) {sensitiveWordsMap = new HashMap<Character, Map>();
}
}
/**
* 递归获取算法树的敏感词
*/
private static void getSevsitiveWords(Map<Character, Map> childMap, LinkedList<String> listWords,
StringBuilder sbWord) {if (childMap.size() == 1 && childMap.containsKey(null)) {listWords.add(sbWord.toString());
sbWord.setLength(sbWord.length() - 1);
return;
}
for (Map.Entry<Character, Map> entry : childMap.entrySet()) {Character keyChar = entry.getKey();
Map<Character, Map> valueMap = entry.getValue();
if (null == keyChar) {continue;}
sbWord.append(keyChar);
if (valueMap.containsKey(null)) {listWords.add(sbWord.toString());
if (valueMap.size() == 1) {sbWord.setLength(sbWord.length() - 1);
} else {getSevsitiveWords(valueMap, listWords, sbWord);
sbWord.setLength(sbWord.length() - 1);
}
} else {getSevsitiveWords(valueMap, listWords, sbWord);
sbWord.setLength(sbWord.length() - 1);
}
}
}
private final static Object lock = new Object();
private static Map<Character, Map> sensitiveWordsMap = new HashMap<Character, Map>();}`
DFAUtilsTest
外汇名词解释 https://www.fx61.com/definitions
import org.junit.Assert;
import org.junit.Test;
import java.util.LinkedList;
public class DFAUtilsTest {
/*==========================AddSensitiveWord-start==========================*/
@Test
public void testAddSensitiveWord01() {DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.addSensitiveWord("中哈");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中哈");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.addSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.addSensitiveWord("人");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(4, listWords.size());
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(5, listWords.size());
}
/*==========================AddSensitiveWord-end============================*/
/*==========================CheckSensitiveWord-start==========================*/
@Test
public void testCheckSensitiveWord01() {DFAUtils.clearSensitiveWord();
String sencitivaWord = null;
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("大");
DFAUtils.addSensitiveWord("大学");
DFAUtils.addSensitiveWord("中中中国中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(8, listWords.size());
sencitivaWord = DFAUtils.checkSensitiveWord("滚");
Assert.assertEquals("滚", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("翻滚");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("滚");
Assert.assertEquals("滚", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("滚");
Assert.assertEquals("滚", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("体操");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("你好滚滚");
Assert.assertEquals("滚滚", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("滚你好滚");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("滚轮胎");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("你你国国");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("中中国中中 中中中中国中中中");
Assert.assertEquals("中中中国中中中", sencitivaWord);
}
/*==========================CheckSensitiveWord-start==========================*/
/*==========================DelSensitiveWor-start==========================*/
@Test
public void testDelSensitiveWord01() {DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("国");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
}
@Test
public void testDelSensitiveWord02() {DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
}
@Test
public void testDelSensitiveWord03() {DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("中中");
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
}
@Test
public void testDelSensitiveWord04() {DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("中中中 111");
DFAUtils.addSensitiveWord("中中");
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("中中中 111");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中中中 111");
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
}
/*==========================DelSensitiveWor-end============================*/
/*==========================ContainSensitiveWord-start==========================*/
@Test
public void testContainSensitiveWord01() {DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("滚");
DFAUtils.addSensitiveWord("中中中国中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(7, listWords.size());
Assert.assertEquals(false, DFAUtils.containSensitiveWord(" "));
Assert.assertEquals(true, DFAUtils.containSensitiveWord("操"));
}
/*==========================ContainSensitiveWord-end============================*/
}
正文完