共计 12631 个字符,预计需要花费 32 分钟才能阅读完成。
java 敏感词过滤
敏感词:“美元”,“中国”,“北京大学”,“北大”,“南京大学”
DFAUtils
`import java.util.HashMap; | |
import java.util.LinkedList; | |
import java.util.Map; | |
public class DFAUtils { | |
/** | |
* 增加敏感词到算法树 | |
*/ | |
public static void addSensitiveWord(String sensitiveWord) {if (null == sensitiveWord || sensitiveWord.length() == 0) {return;} | |
char[] chars = sensitiveWord.toCharArray(); | |
Map<Character, Map> parentMap = sensitiveWordsMap; | |
Map<Character, Map> current = null; | |
synchronized (lock) {for (int i = 0; i < chars.length; i++) {if (i == 0) {if (sensitiveWordsMap.size() == 0) { | |
/* 增加第一个敏感词的第一个字符执行此 code */ | |
if (chars.length == 1) {Map<Character, Map> endMap = new HashMap<>(1); | |
endMap.put(null, null); | |
sensitiveWordsMap.put(chars[0], endMap); | |
} else {sensitiveWordsMap.put(chars[0], null); | |
} | |
} else {current = parentMap.get(chars[0]); | |
if (null == current) {if (chars.length == 1) {Map<Character, Map> endMap = new HashMap<>(1); | |
endMap.put(null, null); | |
sensitiveWordsMap.put(chars[0], endMap); | |
break; | |
} else {sensitiveWordsMap.put(chars[0], null); | |
} | |
} else {if (chars.length == 1) {current.put(null, null); | |
break; | |
} | |
} | |
} | |
} else {if (null == current) {Map<Character, Map> childMap = new HashMap<Character, Map>(); | |
if (i == chars.length - 1) {Map<Character, Map> endMap = new HashMap<>(1); | |
endMap.put(null, null); | |
childMap.put(chars[i], endMap); | |
parentMap.put(chars[i - 1], childMap); | |
break; | |
} else {childMap.put(chars[i], null); | |
parentMap.put(chars[i - 1], childMap); | |
parentMap = childMap; | |
current = null; | |
} | |
} else {Map<Character, Map> childMap = current.get(chars[i]); | |
if (null == childMap) {if (i == chars.length - 1) {Map<Character, Map> endMap = new HashMap<>(1); | |
endMap.put(null, null); | |
current.put(chars[i], endMap); | |
} else {current.put(chars[i], null); | |
parentMap = current; | |
current = null; | |
} | |
} else {if (i == chars.length - 1) {childMap.put(null, null); | |
} else { | |
parentMap = current; | |
current = childMap; | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
/** | |
* 查看敏感词 (找到合乎敏感词则返回 -- 单个字符敏感词前后不是中文字符才算敏感词) | |
*/ | |
public static String checkSensitiveWord(String content) {if (null == content || content.length() == 0 || sensitiveWordsMap.size() == 0) {return null;} | |
char[] chars = content.toCharArray(); | |
boolean isContain = Boolean.FALSE; | |
StringBuilder sbResult = new StringBuilder(); | |
for (int i = 0; i < chars.length; i++) {if (sensitiveWordsMap.containsKey(chars[i])) {Map<Character, Map> currentMap = sensitiveWordsMap.get(chars[i]); | |
sbResult.append(chars[i]); | |
if (null == currentMap) {break;} else {if (currentMap.containsKey(null)) {if (sbResult.length() == 1) { | |
/* 前一个字符或后一个字符是否是中文字符 */ | |
boolean before = Boolean.FALSE; | |
if (i - 1 < 0) {before = Boolean.TRUE;} else {if (chars[i - 1] < 13312 || chars[i - 1] > 40895) {before = Boolean.TRUE;} | |
} | |
boolean after = Boolean.FALSE; | |
if (i + 1 >= chars.length) {after = Boolean.TRUE;} else {if (chars[i + 1] < 13312 || chars[i + 1] > 40895) {after = Boolean.TRUE;} | |
} | |
if (before && after) { | |
isContain = Boolean.TRUE; | |
break; | |
} | |
/* From 以后 index 开始匹配是否存在敏感词 */ | |
int j = i + 1; | |
for (; j < chars.length; j++) {if (currentMap.containsKey(chars[j])) {sbResult.append(chars[j]); | |
currentMap = currentMap.get(chars[j]); | |
if (currentMap.containsKey(null)) { | |
isContain = Boolean.TRUE; | |
break; | |
} else {continue;} | |
} else {break;} | |
} | |
} else { | |
isContain = Boolean.TRUE; | |
break; | |
} | |
} else { | |
/* From 以后 index 开始匹配是否存在敏感词 */ | |
int j = i + 1; | |
for (; j < chars.length; j++) {if (currentMap.containsKey(chars[j])) {sbResult.append(chars[j]); | |
currentMap = currentMap.get(chars[j]); | |
if (currentMap.containsKey(null)) { | |
isContain = Boolean.TRUE; | |
break; | |
} else {continue;} | |
} else {break;} | |
} | |
} | |
if (isContain) {break;} else {sbResult.setLength(0); | |
} | |
} | |
} | |
} | |
if (isContain) {return sbResult.toString(); | |
} else {return null;} | |
} | |
/** | |
* 删除算法树的敏感词 | |
*/ | |
public static void delSensitiveWord(String sensitiveWord) {if (null == sensitiveWord || sensitiveWord.length() == 0 || sensitiveWordsMap.size() == 0) {return;} | |
int delIndex = 0; | |
char[] chars = sensitiveWord.toCharArray(); | |
Map<Character, Map> current = sensitiveWordsMap; | |
synchronized (lock) { | |
int i = 0; | |
for (; i < chars.length; i++) {if (current.containsKey(chars[i])) {if (current.get(chars[i]).size() > 1) {delIndex = i;} | |
} else {break;} | |
current = current.get(chars[i]); | |
} | |
if (!current.containsKey(null)) {return;} | |
current = sensitiveWordsMap; | |
if (i == chars.length) {for (i = 0; i < delIndex; i++) {current = current.get(chars[i]); | |
} | |
if (i == chars.length) {current.remove(chars[i]); | |
} else {if (i == 0 && chars.length == 1) {if (current.get(chars[i]).size() == 1) {current.remove(chars[i]); | |
} else {current.get(chars[i]).remove(null); | |
} | |
} else {if (i + 1 == chars.length) {current.get(chars[i]).remove(null); | |
} else {current.get(chars[i]).remove(chars[i + 1]); | |
} | |
} | |
} | |
} | |
} | |
} | |
/** | |
* 获取算法树的敏感词 | |
*/ | |
public static LinkedList<String> getSevsitiveWords() {LinkedList<String> listWords = new LinkedList<String>(); | |
if (sensitiveWordsMap.size() == 0) {return listWords;} | |
StringBuilder sbWord = new StringBuilder(); | |
getSevsitiveWords(sensitiveWordsMap, listWords, sbWord); | |
return listWords; | |
} | |
/** | |
* 算法树是否蕴含对应的敏感词 | |
*/ | |
public static boolean containSensitiveWord(String sensitiveWord) {if (null == sensitiveWord || sensitiveWord.length() == 0 || sensitiveWordsMap.size() == 0) {return false;} | |
return sensitiveWord.equals(checkSensitiveWord(sensitiveWord)); | |
} | |
/** | |
* 清空算法树 | |
*/ | |
public static void clearSensitiveWord() {synchronized (lock) {sensitiveWordsMap = new HashMap<Character, Map>(); | |
} | |
} | |
/** | |
* 递归获取算法树的敏感词 | |
*/ | |
private static void getSevsitiveWords(Map<Character, Map> childMap, LinkedList<String> listWords, | |
StringBuilder sbWord) {if (childMap.size() == 1 && childMap.containsKey(null)) {listWords.add(sbWord.toString()); | |
sbWord.setLength(sbWord.length() - 1); | |
return; | |
} | |
for (Map.Entry<Character, Map> entry : childMap.entrySet()) {Character keyChar = entry.getKey(); | |
Map<Character, Map> valueMap = entry.getValue(); | |
if (null == keyChar) {continue;} | |
sbWord.append(keyChar); | |
if (valueMap.containsKey(null)) {listWords.add(sbWord.toString()); | |
if (valueMap.size() == 1) {sbWord.setLength(sbWord.length() - 1); | |
} else {getSevsitiveWords(valueMap, listWords, sbWord); | |
sbWord.setLength(sbWord.length() - 1); | |
} | |
} else {getSevsitiveWords(valueMap, listWords, sbWord); | |
sbWord.setLength(sbWord.length() - 1); | |
} | |
} | |
} | |
private final static Object lock = new Object(); | |
private static Map<Character, Map> sensitiveWordsMap = new HashMap<Character, Map>();}` | |
DFAUtilsTest
外汇名词解释 https://www.fx61.com/definitions
import org.junit.Assert; | |
import org.junit.Test; | |
import java.util.LinkedList; | |
public class DFAUtilsTest { | |
/*==========================AddSensitiveWord-start==========================*/ | |
@Test | |
public void testAddSensitiveWord01() {DFAUtils.clearSensitiveWord(); | |
LinkedList<String> listWords = null; | |
DFAUtils.addSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.addSensitiveWord("中哈"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.addSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.addSensitiveWord("中哈"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.delSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.addSensitiveWord("中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.addSensitiveWord("中中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(3, listWords.size()); | |
DFAUtils.addSensitiveWord("人"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(4, listWords.size()); | |
DFAUtils.addSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(5, listWords.size()); | |
} | |
/*==========================AddSensitiveWord-end============================*/ | |
/*==========================CheckSensitiveWord-start==========================*/ | |
@Test | |
public void testCheckSensitiveWord01() {DFAUtils.clearSensitiveWord(); | |
String sencitivaWord = null; | |
LinkedList<String> listWords = null; | |
DFAUtils.addSensitiveWord("大"); | |
DFAUtils.addSensitiveWord("大学"); | |
DFAUtils.addSensitiveWord("中中中国中中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(8, listWords.size()); | |
sencitivaWord = DFAUtils.checkSensitiveWord("滚"); | |
Assert.assertEquals("滚", sencitivaWord); | |
sencitivaWord = DFAUtils.checkSensitiveWord("翻滚"); | |
Assert.assertEquals(null, sencitivaWord); | |
sencitivaWord = DFAUtils.checkSensitiveWord("滚"); | |
Assert.assertEquals("滚", sencitivaWord); | |
sencitivaWord = DFAUtils.checkSensitiveWord("滚"); | |
Assert.assertEquals("滚", sencitivaWord); | |
sencitivaWord = DFAUtils.checkSensitiveWord("体操"); | |
Assert.assertEquals(null, sencitivaWord); | |
sencitivaWord = DFAUtils.checkSensitiveWord("你好滚滚"); | |
Assert.assertEquals("滚滚", sencitivaWord); | |
sencitivaWord = DFAUtils.checkSensitiveWord("滚你好滚"); | |
Assert.assertEquals(null, sencitivaWord); | |
sencitivaWord = DFAUtils.checkSensitiveWord("滚轮胎"); | |
Assert.assertEquals(null, sencitivaWord); | |
sencitivaWord = DFAUtils.checkSensitiveWord("你你国国"); | |
Assert.assertEquals(null, sencitivaWord); | |
sencitivaWord = DFAUtils.checkSensitiveWord("中中国中中 中中中中国中中中"); | |
Assert.assertEquals("中中中国中中中", sencitivaWord); | |
} | |
/*==========================CheckSensitiveWord-start==========================*/ | |
/*==========================DelSensitiveWor-start==========================*/ | |
@Test | |
public void testDelSensitiveWord01() {DFAUtils.clearSensitiveWord(); | |
LinkedList<String> listWords = null; | |
DFAUtils.addSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord(""); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord("国"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(0, listWords.size()); | |
} | |
@Test | |
public void testDelSensitiveWord02() {DFAUtils.clearSensitiveWord(); | |
LinkedList<String> listWords = null; | |
DFAUtils.addSensitiveWord("中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord(""); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord("中中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord("中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(0, listWords.size()); | |
} | |
@Test | |
public void testDelSensitiveWord03() {DFAUtils.clearSensitiveWord(); | |
LinkedList<String> listWords = null; | |
DFAUtils.addSensitiveWord("中中"); | |
DFAUtils.addSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.delSensitiveWord(""); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.delSensitiveWord("中中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.delSensitiveWord("中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.delSensitiveWord("中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.delSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord("中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(0, listWords.size()); | |
DFAUtils.addSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord("中中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord("中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(1, listWords.size()); | |
DFAUtils.delSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(0, listWords.size()); | |
} | |
@Test | |
public void testDelSensitiveWord04() {DFAUtils.clearSensitiveWord(); | |
LinkedList<String> listWords = null; | |
DFAUtils.addSensitiveWord("中中中 111"); | |
DFAUtils.addSensitiveWord("中中"); | |
DFAUtils.addSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(3, listWords.size()); | |
DFAUtils.delSensitiveWord(""); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(3, listWords.size()); | |
DFAUtils.delSensitiveWord("中中中 111"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.addSensitiveWord("中中中 111"); | |
DFAUtils.delSensitiveWord("中中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(3, listWords.size()); | |
DFAUtils.delSensitiveWord("中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.addSensitiveWord("中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(3, listWords.size()); | |
DFAUtils.delSensitiveWord("中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
DFAUtils.delSensitiveWord("中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(2, listWords.size()); | |
} | |
/*==========================DelSensitiveWor-end============================*/ | |
/*==========================ContainSensitiveWord-start==========================*/ | |
@Test | |
public void testContainSensitiveWord01() {DFAUtils.clearSensitiveWord(); | |
LinkedList<String> listWords = null; | |
DFAUtils.addSensitiveWord("滚"); | |
DFAUtils.addSensitiveWord("中中中国中中中"); | |
listWords = DFAUtils.getSevsitiveWords(); | |
Assert.assertEquals(7, listWords.size()); | |
Assert.assertEquals(false, DFAUtils.containSensitiveWord(" ")); | |
Assert.assertEquals(true, DFAUtils.containSensitiveWord("操")); | |
} | |
/*==========================ContainSensitiveWord-end============================*/ | |
} |
正文完