java敏感词过滤
敏感词:“美元”,“中国”,“北京大学”,“北大”,“南京大学”
DFAUtils
`import java.util.HashMap;import java.util.LinkedList;import java.util.Map;public class DFAUtils { /** * 增加敏感词到算法树 */ public static void addSensitiveWord(String sensitiveWord) { if (null == sensitiveWord || sensitiveWord.length() == 0) { return; } char[] chars = sensitiveWord.toCharArray(); Map<Character, Map> parentMap = sensitiveWordsMap; Map<Character, Map> current = null; synchronized (lock) { for (int i = 0; i < chars.length; i++) { if (i == 0) { if (sensitiveWordsMap.size() == 0) { /* 增加第一个敏感词的第一个字符执行此code */ if (chars.length == 1) { Map<Character, Map> endMap = new HashMap<>(1); endMap.put(null, null); sensitiveWordsMap.put(chars[0], endMap); } else { sensitiveWordsMap.put(chars[0], null); } } else { current = parentMap.get(chars[0]); if (null == current) { if (chars.length == 1) { Map<Character, Map> endMap = new HashMap<>(1); endMap.put(null, null); sensitiveWordsMap.put(chars[0], endMap); break; } else { sensitiveWordsMap.put(chars[0], null); } } else { if (chars.length == 1) { current.put(null, null); break; } } } } else { if (null == current) { Map<Character, Map> childMap = new HashMap<Character, Map>(); if (i == chars.length - 1) { Map<Character, Map> endMap = new HashMap<>(1); endMap.put(null, null); childMap.put(chars[i], endMap); parentMap.put(chars[i - 1], childMap); break; } else { childMap.put(chars[i], null); parentMap.put(chars[i - 1], childMap); parentMap = childMap; current = null; } } else { Map<Character, Map> childMap = current.get(chars[i]); if (null == childMap) { if (i == chars.length - 1) { Map<Character, Map> endMap = new HashMap<>(1); endMap.put(null, null); current.put(chars[i], endMap); } else { current.put(chars[i], null); parentMap = current; current = null; } } else { if (i == chars.length - 1) { childMap.put(null, null); } else { parentMap = current; current = childMap; } } } } } } } /** * 查看敏感词(找到合乎敏感词则返回--单个字符敏感词前后不是中文字符才算敏感词) */ public static String checkSensitiveWord(String content) { if (null == content || content.length() == 0 || sensitiveWordsMap.size() == 0) { return null; } char[] chars = content.toCharArray(); boolean isContain = Boolean.FALSE; StringBuilder sbResult = new StringBuilder(); for (int i = 0; i < chars.length; i++) { if (sensitiveWordsMap.containsKey(chars[i])) { Map<Character, Map> currentMap = sensitiveWordsMap.get(chars[i]); sbResult.append(chars[i]); if (null == currentMap) { break; } else { if (currentMap.containsKey(null)) { if (sbResult.length() == 1) { /* 前一个字符或后一个字符是否是中文字符 */ boolean before = Boolean.FALSE; if (i - 1 < 0) { before = Boolean.TRUE; } else { if (chars[i - 1] < 13312 || chars[i - 1] > 40895) { before = Boolean.TRUE; } } boolean after = Boolean.FALSE; if (i + 1 >= chars.length) { after = Boolean.TRUE; } else { if (chars[i + 1] < 13312 || chars[i + 1] > 40895) { after = Boolean.TRUE; } } if (before && after) { isContain = Boolean.TRUE; break; } /* From以后index开始匹配是否存在敏感词 */ int j = i + 1; for (; j < chars.length; j++) { if (currentMap.containsKey(chars[j])) { sbResult.append(chars[j]); currentMap = currentMap.get(chars[j]); if (currentMap.containsKey(null)) { isContain = Boolean.TRUE; break; } else { continue; } } else { break; } } } else { isContain = Boolean.TRUE; break; } } else { /* From以后index开始匹配是否存在敏感词 */ int j = i + 1; for (; j < chars.length; j++) { if (currentMap.containsKey(chars[j])) { sbResult.append(chars[j]); currentMap = currentMap.get(chars[j]); if (currentMap.containsKey(null)) { isContain = Boolean.TRUE; break; } else { continue; } } else { break; } } } if (isContain) { break; } else { sbResult.setLength(0); } } } } if (isContain) { return sbResult.toString(); } else { return null; } } /** * 删除算法树的敏感词 */ public static void delSensitiveWord(String sensitiveWord) { if (null == sensitiveWord || sensitiveWord.length() == 0 || sensitiveWordsMap.size() == 0) { return; } int delIndex = 0; char[] chars = sensitiveWord.toCharArray(); Map<Character, Map> current = sensitiveWordsMap; synchronized (lock) { int i = 0; for (; i < chars.length; i++) { if (current.containsKey(chars[i])) { if (current.get(chars[i]).size() > 1) { delIndex = i; } } else { break; } current = current.get(chars[i]); } if (!current.containsKey(null)) { return; } current = sensitiveWordsMap; if (i == chars.length) { for (i = 0; i < delIndex; i++) { current = current.get(chars[i]); } if (i == chars.length) { current.remove(chars[i]); } else { if (i == 0 && chars.length == 1) { if (current.get(chars[i]).size() == 1) { current.remove(chars[i]); } else { current.get(chars[i]).remove(null); } } else { if (i + 1 == chars.length) { current.get(chars[i]).remove(null); } else { current.get(chars[i]).remove(chars[i + 1]); } } } } } } /** * 获取算法树的敏感词 */ public static LinkedList<String> getSevsitiveWords() { LinkedList<String> listWords = new LinkedList<String>(); if (sensitiveWordsMap.size() == 0) { return listWords; } StringBuilder sbWord = new StringBuilder(); getSevsitiveWords(sensitiveWordsMap, listWords, sbWord); return listWords; } /** * 算法树是否蕴含对应的敏感词 */ public static boolean containSensitiveWord(String sensitiveWord) { if (null == sensitiveWord || sensitiveWord.length() == 0 || sensitiveWordsMap.size() == 0) { return false; } return sensitiveWord.equals(checkSensitiveWord(sensitiveWord)); } /** * 清空算法树 */ public static void clearSensitiveWord() { synchronized (lock) { sensitiveWordsMap = new HashMap<Character, Map>(); } } /** * 递归获取算法树的敏感词 */ private static void getSevsitiveWords(Map<Character, Map> childMap, LinkedList<String> listWords, StringBuilder sbWord) { if (childMap.size() == 1 && childMap.containsKey(null)) { listWords.add(sbWord.toString()); sbWord.setLength(sbWord.length() - 1); return; } for (Map.Entry<Character, Map> entry : childMap.entrySet()) { Character keyChar = entry.getKey(); Map<Character, Map> valueMap = entry.getValue(); if (null == keyChar) { continue; } sbWord.append(keyChar); if (valueMap.containsKey(null)) { listWords.add(sbWord.toString()); if (valueMap.size() == 1) { sbWord.setLength(sbWord.length() - 1); } else { getSevsitiveWords(valueMap, listWords, sbWord); sbWord.setLength(sbWord.length() - 1); } } else { getSevsitiveWords(valueMap, listWords, sbWord); sbWord.setLength(sbWord.length() - 1); } } } private final static Object lock = new Object(); private static Map<Character, Map> sensitiveWordsMap = new HashMap<Character, Map>();}`
DFAUtilsTest
外汇名词解释https://www.fx61.com/definitions
import org.junit.Assert;import org.junit.Test;import java.util.LinkedList;public class DFAUtilsTest { /*==========================AddSensitiveWord-start==========================*/ @Test public void testAddSensitiveWord01() { DFAUtils.clearSensitiveWord(); LinkedList<String> listWords = null; DFAUtils.addSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.addSensitiveWord("中哈"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.addSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.addSensitiveWord("中哈"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.delSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.addSensitiveWord("中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.addSensitiveWord("中中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(3, listWords.size()); DFAUtils.addSensitiveWord("人"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(4, listWords.size()); DFAUtils.addSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(5, listWords.size()); } /*==========================AddSensitiveWord-end============================*/ /*==========================CheckSensitiveWord-start==========================*/ @Test public void testCheckSensitiveWord01() { DFAUtils.clearSensitiveWord(); String sencitivaWord = null; LinkedList<String> listWords = null; DFAUtils.addSensitiveWord("大"); DFAUtils.addSensitiveWord("大学"); DFAUtils.addSensitiveWord("中中中国中中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(8, listWords.size()); sencitivaWord = DFAUtils.checkSensitiveWord("滚"); Assert.assertEquals("滚", sencitivaWord); sencitivaWord = DFAUtils.checkSensitiveWord("翻滚"); Assert.assertEquals(null, sencitivaWord); sencitivaWord = DFAUtils.checkSensitiveWord("滚 "); Assert.assertEquals("滚", sencitivaWord); sencitivaWord = DFAUtils.checkSensitiveWord(" 滚"); Assert.assertEquals("滚", sencitivaWord); sencitivaWord = DFAUtils.checkSensitiveWord("体操"); Assert.assertEquals(null, sencitivaWord); sencitivaWord = DFAUtils.checkSensitiveWord("你好滚滚"); Assert.assertEquals("滚滚", sencitivaWord); sencitivaWord = DFAUtils.checkSensitiveWord("滚你好滚"); Assert.assertEquals(null, sencitivaWord); sencitivaWord = DFAUtils.checkSensitiveWord("滚轮胎"); Assert.assertEquals(null, sencitivaWord); sencitivaWord = DFAUtils.checkSensitiveWord("你你国国"); Assert.assertEquals(null, sencitivaWord); sencitivaWord = DFAUtils.checkSensitiveWord("中中国中中 中中中中国中中中"); Assert.assertEquals("中中中国中中中", sencitivaWord); } /*==========================CheckSensitiveWord-start==========================*/ /*==========================DelSensitiveWor-start==========================*/ @Test public void testDelSensitiveWord01() { DFAUtils.clearSensitiveWord(); LinkedList<String> listWords = null; DFAUtils.addSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord(""); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord("国"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(0, listWords.size()); } @Test public void testDelSensitiveWord02() { DFAUtils.clearSensitiveWord(); LinkedList<String> listWords = null; DFAUtils.addSensitiveWord("中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord(""); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord("中中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord("中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(0, listWords.size()); } @Test public void testDelSensitiveWord03() { DFAUtils.clearSensitiveWord(); LinkedList<String> listWords = null; DFAUtils.addSensitiveWord("中中"); DFAUtils.addSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.delSensitiveWord(""); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.delSensitiveWord("中中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.delSensitiveWord(" 中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.delSensitiveWord("中中 "); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.delSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord("中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(0, listWords.size()); DFAUtils.addSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord("中中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord("中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(1, listWords.size()); DFAUtils.delSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(0, listWords.size()); } @Test public void testDelSensitiveWord04() { DFAUtils.clearSensitiveWord(); LinkedList<String> listWords = null; DFAUtils.addSensitiveWord("中中中111"); DFAUtils.addSensitiveWord("中中"); DFAUtils.addSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(3, listWords.size()); DFAUtils.delSensitiveWord(""); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(3, listWords.size()); DFAUtils.delSensitiveWord("中中中111"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.addSensitiveWord("中中中111"); DFAUtils.delSensitiveWord("中中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(3, listWords.size()); DFAUtils.delSensitiveWord("中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.addSensitiveWord("中中 "); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(3, listWords.size()); DFAUtils.delSensitiveWord("中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); DFAUtils.delSensitiveWord("中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(2, listWords.size()); } /*==========================DelSensitiveWor-end============================*/ /*==========================ContainSensitiveWord-start==========================*/ @Test public void testContainSensitiveWord01() { DFAUtils.clearSensitiveWord(); LinkedList<String> listWords = null; DFAUtils.addSensitiveWord("滚"); DFAUtils.addSensitiveWord("中中中国中中中"); listWords = DFAUtils.getSevsitiveWords(); Assert.assertEquals(7, listWords.size()); Assert.assertEquals(false, DFAUtils.containSensitiveWord(" ")); Assert.assertEquals(true, DFAUtils.containSensitiveWord("操")); } /*==========================ContainSensitiveWord-end============================*/}