From 824aaa2618baa2ffc89634d3539a5729d7fd5494 Mon Sep 17 00:00:00 2001
From: haibinxiao <haibinxiao@creditease.cn>
Date: Sun, 6 Dec 2020 19:27:02 +0800
Subject: [PATCH 1/4] =?UTF-8?q?=E6=95=8F=E6=84=9F=E8=AF=8D=E8=BF=87?=
 =?UTF-8?q?=E6=BB=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../main/java/cn/hutool/dfa/FoundWord.java    |  50 ++++++
 .../cn/hutool/dfa/SensitiveProcessor.java     |  23 +++
 .../java/cn/hutool/dfa/SensitiveUtil.java     | 145 +++++++++++++-----
 .../src/main/java/cn/hutool/dfa/WordTree.java |  24 ++-
 .../test/java/cn/hutool/dfa/test/DfaTest.java |  41 ++---
 .../cn/hutool/dfa/test/SensitiveUtilTest.java |  49 ++++++
 6 files changed, 257 insertions(+), 75 deletions(-)
 create mode 100644 hutool-dfa/src/main/java/cn/hutool/dfa/FoundWord.java
 create mode 100644 hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveProcessor.java
 create mode 100644 hutool-dfa/src/test/java/cn/hutool/dfa/test/SensitiveUtilTest.java
diff --git a/hutool-dfa/src/main/java/cn/hutool/dfa/FoundWord.java b/hutool-dfa/src/main/java/cn/hutool/dfa/FoundWord.java
new file mode 100644
index 000000000..b24fc2232
--- /dev/null
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/FoundWord.java
@@ -0,0 +1,50 @@
+package cn.hutool.dfa;
+
+/**
+ * @author 肖海斌
+ * @Date 2020-12-05
+ * <p>
+ * 匹配到的敏感词，包含敏感词，text中匹配敏感词的内容，以及匹配内容在text中的下标，
+ * 下标可以用来做敏感词的进一步处理，如果替换成**
+ */
+public class FoundWord {
+	/**
+	 * 生效的敏感词
+	 */
+	private String word;
+	/**
+	 * 敏感词匹配到的内容
+	 */
+	private String foundWord;
+	/**
+	 * 匹配内容在待分析字符串中的开始位置
+	 */
+	private int startIndex;
+	/**
+	 * 匹配内容在待分析字符串中的结束位置
+	 */
+	private int endIndex;
+
+	public FoundWord(String word, String foundWord, int start, int end) {
+		this.word = word;
+		this.foundWord = foundWord;
+		this.startIndex = start;
+		this.endIndex = end;
+	}
+
+	public String getWord() {
+		return word;
+	}
+
+	public String getFoundWord() {
+		return foundWord;
+	}
+
+	public int getStartIndex() {
+		return startIndex;
+	}
+
+	public int getEndIndex() {
+		return endIndex;
+	}
+}
diff --git a/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveProcessor.java b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveProcessor.java
new file mode 100644
index 000000000..e8a1e8509
--- /dev/null
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveProcessor.java
@@ -0,0 +1,23 @@
+package cn.hutool.dfa;
+
+/**
+ * @author 肖海斌
+ * @Date 2020-12-05
+ * 敏感词过滤处理器，默认按字符数替换成*
+ */
+public interface SensitiveProcessor {
+
+	/**
+	 * 敏感词过滤处理
+	 * @param foundWord 敏感词匹配到的内容
+	 * @return 敏感词过滤后的内容，默认按字符数替换成*
+	 */
+	default String process(FoundWord foundWord) {
+		int length = foundWord.getFoundWord().length();
+		StringBuilder sb = new StringBuilder(length);
+		for (int i = 0; i < length; i++) {
+			sb.append("*");
+		}
+		return sb.toString();
+	}
+}
diff --git a/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
index d64100494..67244a4ad 100644
--- a/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
@@ -1,77 +1,84 @@
 package cn.hutool.dfa;
 
+import cn.hutool.core.collection.CollectionUtil;
 import cn.hutool.core.lang.Filter;
 import cn.hutool.core.thread.ThreadUtil;
 import cn.hutool.core.util.StrUtil;
 import cn.hutool.json.JSONUtil;
 
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * 敏感词工具类
- * @author Looly
  *
+ * @author Looly
  */
 public final class SensitiveUtil {
 
 	public static final char DEFAULT_SEPARATOR = StrUtil.C_COMMA;
 	private static final WordTree sensitiveTree = new WordTree();
-	
+
 	/**
 	 * @return 是否已经被初始化
 	 */
-	public static boolean isInited(){
+	public static boolean isInited() {
 		return !sensitiveTree.isEmpty();
 	}
-	
+
 	/**
 	 * 初始化敏感词树
-	 * @param isAsync 是否异步初始化
+	 *
+	 * @param isAsync        是否异步初始化
 	 * @param sensitiveWords 敏感词列表
 	 */
-	public static void init(final Collection<String> sensitiveWords, boolean isAsync){
-		if(isAsync){
+	public static void init(final Collection<String> sensitiveWords, boolean isAsync) {
+		if (isAsync) {
 			ThreadUtil.execAsync(() -> {
 				init(sensitiveWords);
 				return true;
 			});
-		}else{
+		} else {
 			init(sensitiveWords);
 		}
 	}
-	
+
 	/**
 	 * 初始化敏感词树
+	 *
 	 * @param sensitiveWords 敏感词列表
 	 */
-	public static void init(Collection<String> sensitiveWords){
+	public static void init(Collection<String> sensitiveWords) {
 		sensitiveTree.clear();
 		sensitiveTree.addWords(sensitiveWords);
 //		log.debug("Sensitive init finished, sensitives: {}", sensitiveWords);
 	}
-	
+
 	/**
 	 * 初始化敏感词树
+	 *
 	 * @param sensitiveWords 敏感词列表组成的字符串
-	 * @param isAsync 是否异步初始化
-	 * @param separator 分隔符
+	 * @param isAsync        是否异步初始化
+	 * @param separator      分隔符
 	 */
-	public static void init(String sensitiveWords, char separator, boolean isAsync){
-		if(StrUtil.isNotBlank(sensitiveWords)){
+	public static void init(String sensitiveWords, char separator, boolean isAsync) {
+		if (StrUtil.isNotBlank(sensitiveWords)) {
 			init(StrUtil.split(sensitiveWords, separator), isAsync);
 		}
 	}
-	
+
 	/**
 	 * 初始化敏感词树，使用逗号分隔每个单词
+	 *
 	 * @param sensitiveWords 敏感词列表组成的字符串
-	 * @param isAsync 是否异步初始化
+	 * @param isAsync        是否异步初始化
 	 */
-	public static void init(String sensitiveWords, boolean isAsync){
+	public static void init(String sensitiveWords, boolean isAsync) {
 		init(sensitiveWords, DEFAULT_SEPARATOR, isAsync);
 	}
-	
+
 	/**
 	 * 设置字符过滤规则，通过定义字符串过滤规则，过滤不需要的字符<br>
 	 * 当accept为false时，此字符不参与匹配
@@ -80,90 +87,144 @@ public final class SensitiveUtil {
 	 * @since 5.4.4
 	 */
 	public static void setCharFilter(Filter<Character> charFilter) {
-		if(charFilter != null) {
+		if (charFilter != null) {
 			sensitiveTree.setCharFilter(charFilter);
 		}
 	}
-	
+
 	/**
 	 * 是否包含敏感词
+	 *
 	 * @param text 文本
 	 * @return 是否包含
 	 */
-	public static boolean containsSensitive(String text){
+	public static boolean containsSensitive(String text) {
 		return sensitiveTree.isMatch(text);
 	}
-	
+
 	/**
 	 * 是否包含敏感词
+	 *
 	 * @param obj bean，会被转为JSON字符串
 	 * @return 是否包含
 	 */
-	public static boolean containsSensitive(Object obj){
+	public static boolean containsSensitive(Object obj) {
 		return sensitiveTree.isMatch(JSONUtil.toJsonStr(obj));
 	}
-	
+
 	/**
 	 * 查找敏感词，返回找到的第一个敏感词
+	 *
 	 * @param text 文本
 	 * @return 敏感词
 	 */
-	public static String getFindedFirstSensitive(String text){
+	public static FoundWord getFindedFirstSensitive(String text) {
 		return sensitiveTree.match(text);
 	}
-	
+
 	/**
 	 * 查找敏感词，返回找到的第一个敏感词
+	 *
 	 * @param obj bean，会被转为JSON字符串
 	 * @return 敏感词
 	 */
-	public static String getFindedFirstSensitive(Object obj){
+	public static FoundWord getFindedFirstSensitive(Object obj) {
 		return sensitiveTree.match(JSONUtil.toJsonStr(obj));
 	}
-	
+
 	/**
 	 * 查找敏感词，返回找到的所有敏感词
+	 *
 	 * @param text 文本
 	 * @return 敏感词
 	 */
-	public static List<String> getFindedAllSensitive(String text){
+	public static List<FoundWord> getFindedAllSensitive(String text) {
 		return sensitiveTree.matchAll(text);
 	}
-	
+
 	/**
 	 * 查找敏感词，返回找到的所有敏感词<br>
 	 * 密集匹配原则：假如关键词有 ab,b，文本是abab，将匹配 [ab,b,ab]<br>
 	 * 贪婪匹配（最长匹配）原则：假如关键字a,ab，最长匹配将匹配[a, ab]
-	 * 
-	 * @param text 文本
+	 *
+	 * @param text           文本
 	 * @param isDensityMatch 是否使用密集匹配原则
-	 * @param isGreedMatch 是否使用贪婪匹配（最长匹配）原则
+	 * @param isGreedMatch   是否使用贪婪匹配（最长匹配）原则
 	 * @return 敏感词
 	 */
-	public static List<String> getFindedAllSensitive(String text, boolean isDensityMatch, boolean isGreedMatch){
+	public static List<FoundWord> getFindedAllSensitive(String text, boolean isDensityMatch, boolean isGreedMatch) {
 		return sensitiveTree.matchAll(text, -1, isDensityMatch, isGreedMatch);
 	}
-	
+
 	/**
 	 * 查找敏感词，返回找到的所有敏感词
+	 *
 	 * @param bean 对象，会被转为JSON
 	 * @return 敏感词
 	 */
-	public static List<String> getFindedAllSensitive(Object bean){
+	public static List<FoundWord> getFindedAllSensitive(Object bean) {
 		return sensitiveTree.matchAll(JSONUtil.toJsonStr(bean));
 	}
-	
+
 	/**
 	 * 查找敏感词，返回找到的所有敏感词<br>
 	 * 密集匹配原则：假如关键词有 ab,b，文本是abab，将匹配 [ab,b,ab]<br>
 	 * 贪婪匹配（最长匹配）原则：假如关键字a,ab，最长匹配将匹配[a, ab]
-	 * 
-	 * @param bean 对象，会被转为JSON
+	 *
+	 * @param bean           对象，会被转为JSON
 	 * @param isDensityMatch 是否使用密集匹配原则
-	 * @param isGreedMatch 是否使用贪婪匹配（最长匹配）原则
+	 * @param isGreedMatch   是否使用贪婪匹配（最长匹配）原则
 	 * @return 敏感词
 	 */
-	public static List<String> getFindedAllSensitive(Object bean, boolean isDensityMatch, boolean isGreedMatch){
+	public static List<FoundWord> getFindedAllSensitive(Object bean, boolean isDensityMatch, boolean isGreedMatch) {
 		return getFindedAllSensitive(JSONUtil.toJsonStr(bean), isDensityMatch, isGreedMatch);
 	}
+
+	/**
+	 * 敏感词过滤
+	 *
+	 * @param bean               对象，会被转为JSON
+	 * @param isGreedMatch       贪婪匹配（最长匹配）原则：假如关键字a,ab，最长匹配将匹配[a, ab]
+	 * @param sensitiveProcessor 敏感词处理器，默认按匹配内容的字符数替换成*
+	 * @param <T>                bean的class类型
+	 * @return 敏感词过滤处理后的bean对象
+	 */
+	public static <T> T sensitiveFilter(T bean, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) {
+		sensitiveProcessor = sensitiveProcessor == null ? new SensitiveProcessor() {
+		} : sensitiveProcessor;
+		String jsonText = JSONUtil.toJsonStr(bean);
+		Class<T> c = (Class) bean.getClass();
+		return JSONUtil.toBean(sensitiveFilter(jsonText, isGreedMatch, sensitiveProcessor), c);
+	}
+
+	/**
+	 * @param text               文本
+	 * @param isGreedMatch       贪婪匹配（最长匹配）原则：假如关键字a,ab，最长匹配将匹配[a, ab]
+	 * @param sensitiveProcessor 敏感词处理器，默认按匹配内容的字符数替换成*
+	 * @return 敏感词过滤处理后的文本
+	 */
+	public static String sensitiveFilter(String text, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) {
+		if (null == text || text.trim().equals("")) {
+			return text;
+		}
+		//敏感词过滤场景下，不需要密集匹配
+		List<FoundWord> foundWordList = getFindedAllSensitive(text, false, isGreedMatch);
+		if (CollectionUtil.isEmpty(foundWordList)) {
+			return text;
+		}
+		Map<Integer, FoundWord> foundWordMap = new HashMap<>(foundWordList.size());
+		foundWordList.forEach(foundWord -> foundWordMap.put(foundWord.getStartIndex(), foundWord));
+		int length = text.length();
+		StringBuilder textStringBuilder = new StringBuilder();
+		for (int i = 0; i < length; i++) {
+			FoundWord fw = foundWordMap.get(i);
+			if (fw != null) {
+				textStringBuilder.append(sensitiveProcessor.process(fw));
+				i = fw.getEndIndex();
+			} else {
+				textStringBuilder.append(text.charAt(i));
+			}
+		}
+		return textStringBuilder.toString();
+	}
 }
diff --git a/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java b/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java
index 0d715b338..4e05657b9 100644
--- a/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java
@@ -5,12 +5,7 @@ import cn.hutool.core.lang.Filter;
 import cn.hutool.core.text.StrBuilder;
 import cn.hutool.core.util.StrUtil;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
 
 /**
  * DFA（Deterministic Finite Automaton 确定有穷自动机）
@@ -140,11 +135,11 @@ public class WordTree extends HashMap<Character, WordTree> {
 	 * @param text 被检查的文本
 	 * @return 匹配到的关键字
 	 */
-	public String match(String text) {
+	public FoundWord match(String text) {
 		if (null == text) {
 			return null;
 		}
-		List<String> matchAll = matchAll(text, 1);
+		List<FoundWord> matchAll = matchAll(text, 1);
 		if (CollectionUtil.isNotEmpty(matchAll)) {
 			return matchAll.get(0);
 		}
@@ -159,7 +154,7 @@ public class WordTree extends HashMap<Character, WordTree> {
 	 * @param text 被检查的文本
 	 * @return 匹配的词列表
 	 */
-	public List<String> matchAll(String text) {
+	public List<FoundWord> matchAll(String text) {
 		return matchAll(text, -1);
 	}
 
@@ -170,7 +165,7 @@ public class WordTree extends HashMap<Character, WordTree> {
 	 * @param limit 限制匹配个数
 	 * @return 匹配的词列表
 	 */
-	public List<String> matchAll(String text, int limit) {
+	public List<FoundWord> matchAll(String text, int limit) {
 		return matchAll(text, limit, false, false);
 	}
 
@@ -185,20 +180,22 @@ public class WordTree extends HashMap<Character, WordTree> {
 	 * @param isGreedMatch   是否使用贪婪匹配（最长匹配）原则
 	 * @return 匹配的词列表
 	 */
-	public List<String> matchAll(String text, int limit, boolean isDensityMatch, boolean isGreedMatch) {
+	public List<FoundWord> matchAll(String text, int limit, boolean isDensityMatch, boolean isGreedMatch) {
 		if (null == text) {
 			return null;
 		}
 
-		List<String> foundWords = new ArrayList<>();
+		List<FoundWord> foundWords = new ArrayList<>();
 		WordTree current = this;
 		int length = text.length();
 		final Filter<Character> charFilter = this.charFilter;
 		//存放查找到的字符缓存。完整出现一个词时加到findedWords中，否则清空
 		final StrBuilder wordBuffer = StrUtil.strBuilder();
+		final StrBuilder keyBuffer = StrUtil.strBuilder();
 		char currentChar;
 		for (int i = 0; i < length; i++) {
 			wordBuffer.reset();
+			keyBuffer.reset();
 			for (int j = i; j < length; j++) {
 				currentChar = text.charAt(j);
 //				Console.log("i: {}, j: {}, currentChar: {}", i, j, currentChar);
@@ -216,9 +213,10 @@ public class WordTree extends HashMap<Character, WordTree> {
 					break;
 				}
 				wordBuffer.append(currentChar);
+				keyBuffer.append(currentChar);
 				if (current.isEnd(currentChar)) {
 					//到达单词末尾，关键词成立，从此词的下一个位置开始查找
-					foundWords.add(wordBuffer.toString());
+					foundWords.add(new FoundWord(keyBuffer.toString(), wordBuffer.toString(), i, j));
 					if (limit > 0 && foundWords.size() >= limit) {
 						//超过匹配限制个数，直接返回
 						return foundWords;
diff --git a/hutool-dfa/src/test/java/cn/hutool/dfa/test/DfaTest.java b/hutool-dfa/src/test/java/cn/hutool/dfa/test/DfaTest.java
index 9d500f870..065b3d0da 100644
--- a/hutool-dfa/src/test/java/cn/hutool/dfa/test/DfaTest.java
+++ b/hutool-dfa/src/test/java/cn/hutool/dfa/test/DfaTest.java
@@ -1,16 +1,16 @@
 package cn.hutool.dfa.test;
 
-import java.util.List;
-
+import cn.hutool.core.collection.CollectionUtil;
+import cn.hutool.dfa.FoundWord;
+import cn.hutool.dfa.WordTree;
 import org.junit.Assert;
 import org.junit.Test;
 
-import cn.hutool.core.collection.CollectionUtil;
-import cn.hutool.dfa.WordTree;
+import java.util.List;
 
 /**
  * DFA单元测试
- * 
+ *
  * @author Looly
  *
  */
@@ -28,8 +28,8 @@ public class DfaTest {
 		// 情况一：标准匹配，匹配到最短关键词，并跳过已经匹配的关键词
 		// 匹配到【大】，就不再继续匹配了，因此【大土豆】不匹配
 		// 匹配到【刚出锅】，就跳过这三个字了，因此【出锅】不匹配（由于刚首先被匹配，因此长的被匹配，最短匹配只针对第一个字相同选最短）
-		List<String> matchAll = tree.matchAll(text, -1, false, false);
-		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "土^豆", "刚出锅"));
+		List<FoundWord> matchAll = tree.matchAll(text, -1, false, false);
+		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()), CollectionUtil.newArrayList("大", "土^豆", "刚出锅"));
 	}
 
 	/**
@@ -44,8 +44,8 @@ public class DfaTest {
 		// 情况二：匹配到最短关键词，不跳过已经匹配的关键词
 		// 【大】被匹配，最短匹配原则【大土豆】被跳过，【土豆继续被匹配】
 		// 【刚出锅】被匹配，由于不跳过已经匹配的词，【出锅】被匹配
-		List<String> matchAll = tree.matchAll(text, -1, true, false);
-		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "土^豆", "刚出锅", "出锅"));
+		List<FoundWord> matchAll = tree.matchAll(text, -1, true, false);
+		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()), CollectionUtil.newArrayList("大", "土^豆", "刚出锅", "出锅"));
 	}
 
 	/**
@@ -60,8 +60,8 @@ public class DfaTest {
 		// 情况三：匹配到最长关键词，跳过已经匹配的关键词
 		// 匹配到【大】，由于到最长匹配，因此【大土豆】接着被匹配
 		// 由于【大土豆】被匹配，【土豆】被跳过，由于【刚出锅】被匹配，【出锅】被跳过
-		List<String> matchAll = tree.matchAll(text, -1, false, true);
-		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "大土^豆", "刚出锅"));
+		List<FoundWord> matchAll = tree.matchAll(text, -1, false, true);
+		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()), CollectionUtil.newArrayList("大", "大土^豆", "刚出锅"));
 
 	}
 
@@ -77,8 +77,8 @@ public class DfaTest {
 		// 情况四：匹配到最长关键词，不跳过已经匹配的关键词（最全关键词）
 		// 匹配到【大】，由于到最长匹配，因此【大土豆】接着被匹配，由于不跳过已经匹配的关键词，土豆继续被匹配
 		// 【刚出锅】被匹配，由于不跳过已经匹配的词，【出锅】被匹配
-		List<String> matchAll = tree.matchAll(text, -1, true, true);
-		Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "大土^豆", "土^豆", "刚出锅", "出锅"));
+		List<FoundWord> matchAll = tree.matchAll(text, -1, true, true);
+		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()), CollectionUtil.newArrayList("大", "大土^豆", "土^豆", "刚出锅", "出锅"));
 
 	}
 
@@ -90,23 +90,24 @@ public class DfaTest {
 		WordTree tree = new WordTree();
 		tree.addWord("tio");
 
-		List<String> all = tree.matchAll("AAAAAAAt-ioBBBBBBB");
-		Assert.assertEquals(all, CollectionUtil.newArrayList("t-io"));
+		List<FoundWord> all = tree.matchAll("AAAAAAAt-ioBBBBBBB");
+		Assert.assertEquals(all.stream().map(fw -> fw.getFoundWord()), CollectionUtil.newArrayList("t-io"));
 	}
 
 	@Test
-	public void aTest(){
+	public void aTest() {
 		WordTree tree = new WordTree();
 		tree.addWord("women");
 		String text = "a WOMEN todo.".toLowerCase();
-		List<String> matchAll = tree.matchAll(text, -1, false, false);
-		Assert.assertEquals("[women]", matchAll.toString());
+		List<FoundWord> matchAll = tree.matchAll(text, -1, false, false);
+		Assert.assertEquals("[women]", matchAll.stream().map(fw -> fw.getFoundWord()).toString());
 	}
-	
+
 	// ----------------------------------------------------------------------------------------------------------
+
 	/**
 	 * 构建查找树
-	 * 
+	 *
 	 * @return 查找树
 	 */
 	private WordTree buildWordTree() {
diff --git a/hutool-dfa/src/test/java/cn/hutool/dfa/test/SensitiveUtilTest.java b/hutool-dfa/src/test/java/cn/hutool/dfa/test/SensitiveUtilTest.java
new file mode 100644
index 000000000..8a19fef3e
--- /dev/null
+++ b/hutool-dfa/src/test/java/cn/hutool/dfa/test/SensitiveUtilTest.java
@@ -0,0 +1,49 @@
+package cn.hutool.dfa.test;
+
+import cn.hutool.dfa.SensitiveUtil;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class SensitiveUtilTest {
+
+	@Test
+	public void testSensitiveFilter() {
+		List<String> wordList = new ArrayList<>();
+		wordList.add("大");
+		wordList.add("大土豆");
+		wordList.add("土豆");
+		wordList.add("刚出锅");
+		wordList.add("出锅");
+		TestBean bean = new TestBean();
+		bean.setStr("我有一颗$大土^豆，刚出锅的");
+		bean.setNum(100);
+		SensitiveUtil.init(wordList);
+		bean = SensitiveUtil.sensitiveFilter(bean, true, null);
+		Assert.assertEquals(bean.getStr(), "我有一颗$****，***的");
+	}
+
+	public static class TestBean {
+		private String str;
+		private Integer num;
+
+		public String getStr() {
+			return str;
+		}
+
+		public void setStr(String str) {
+			this.str = str;
+		}
+
+		public Integer getNum() {
+			return num;
+		}
+
+		public void setNum(Integer num) {
+			this.num = num;
+		}
+	}
+
+}

From f7c640934d6bf4c91fc11d0291f82f8eb3937f2c Mon Sep 17 00:00:00 2001
From: haibinxiao <haibinxiao@creditease.cn>
Date: Sun, 6 Dec 2020 21:29:21 +0800
Subject: [PATCH 2/4] =?UTF-8?q?=E6=B3=A8=E9=87=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 hutool-dfa/src/main/java/cn/hutool/dfa/FoundWord.java          | 1 -
 hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveProcessor.java | 1 -
 2 files changed, 2 deletions(-)

diff --git a/hutool-dfa/src/main/java/cn/hutool/dfa/FoundWord.java b/hutool-dfa/src/main/java/cn/hutool/dfa/FoundWord.java
index b24fc2232..e57a0f9ec 100644
--- a/hutool-dfa/src/main/java/cn/hutool/dfa/FoundWord.java
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/FoundWord.java
@@ -2,7 +2,6 @@ package cn.hutool.dfa;
 
 /**
  * @author 肖海斌
- * @Date 2020-12-05
  * <p>
  * 匹配到的敏感词，包含敏感词，text中匹配敏感词的内容，以及匹配内容在text中的下标，
  * 下标可以用来做敏感词的进一步处理，如果替换成**
diff --git a/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveProcessor.java b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveProcessor.java
index e8a1e8509..34c0128b8 100644
--- a/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveProcessor.java
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveProcessor.java
@@ -2,7 +2,6 @@ package cn.hutool.dfa;
 
 /**
  * @author 肖海斌
- * @Date 2020-12-05
  * 敏感词过滤处理器，默认按字符数替换成*
  */
 public interface SensitiveProcessor {

From f5c53a8f60c7f51866ab47601bd1cdcdb05822f7 Mon Sep 17 00:00:00 2001
From: haibinxiao <haibinxiao@creditease.cn>
Date: Sun, 6 Dec 2020 22:00:12 +0800
Subject: [PATCH 3/4] =?UTF-8?q?=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?=
 =?UTF-8?q?=E9=94=99=E8=AF=AF=E8=A7=A3=E5=86=B3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../src/test/java/cn/hutool/dfa/test/DfaTest.java   | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/hutool-dfa/src/test/java/cn/hutool/dfa/test/DfaTest.java b/hutool-dfa/src/test/java/cn/hutool/dfa/test/DfaTest.java
index 065b3d0da..913f10fce 100644
--- a/hutool-dfa/src/test/java/cn/hutool/dfa/test/DfaTest.java
+++ b/hutool-dfa/src/test/java/cn/hutool/dfa/test/DfaTest.java
@@ -7,6 +7,7 @@ import org.junit.Assert;
 import org.junit.Test;
 
 import java.util.List;
+import java.util.stream.Collectors;
 
 /**
  * DFA单元测试
@@ -29,7 +30,7 @@ public class DfaTest {
 		// 匹配到【大】，就不再继续匹配了，因此【大土豆】不匹配
 		// 匹配到【刚出锅】，就跳过这三个字了，因此【出锅】不匹配（由于刚首先被匹配，因此长的被匹配，最短匹配只针对第一个字相同选最短）
 		List<FoundWord> matchAll = tree.matchAll(text, -1, false, false);
-		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()), CollectionUtil.newArrayList("大", "土^豆", "刚出锅"));
+		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()).collect(Collectors.toList()), CollectionUtil.newArrayList("大", "土^豆", "刚出锅"));
 	}
 
 	/**
@@ -45,7 +46,7 @@ public class DfaTest {
 		// 【大】被匹配，最短匹配原则【大土豆】被跳过，【土豆继续被匹配】
 		// 【刚出锅】被匹配，由于不跳过已经匹配的词，【出锅】被匹配
 		List<FoundWord> matchAll = tree.matchAll(text, -1, true, false);
-		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()), CollectionUtil.newArrayList("大", "土^豆", "刚出锅", "出锅"));
+		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()).collect(Collectors.toList()), CollectionUtil.newArrayList("大", "土^豆", "刚出锅", "出锅"));
 	}
 
 	/**
@@ -61,7 +62,7 @@ public class DfaTest {
 		// 匹配到【大】，由于到最长匹配，因此【大土豆】接着被匹配
 		// 由于【大土豆】被匹配，【土豆】被跳过，由于【刚出锅】被匹配，【出锅】被跳过
 		List<FoundWord> matchAll = tree.matchAll(text, -1, false, true);
-		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()), CollectionUtil.newArrayList("大", "大土^豆", "刚出锅"));
+		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()).collect(Collectors.toList()), CollectionUtil.newArrayList("大", "大土^豆", "刚出锅"));
 
 	}
 
@@ -78,7 +79,7 @@ public class DfaTest {
 		// 匹配到【大】，由于到最长匹配，因此【大土豆】接着被匹配，由于不跳过已经匹配的关键词，土豆继续被匹配
 		// 【刚出锅】被匹配，由于不跳过已经匹配的词，【出锅】被匹配
 		List<FoundWord> matchAll = tree.matchAll(text, -1, true, true);
-		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()), CollectionUtil.newArrayList("大", "大土^豆", "土^豆", "刚出锅", "出锅"));
+		Assert.assertEquals(matchAll.stream().map(fw -> fw.getFoundWord()).collect(Collectors.toList()), CollectionUtil.newArrayList("大", "大土^豆", "土^豆", "刚出锅", "出锅"));
 
 	}
 
@@ -91,7 +92,7 @@ public class DfaTest {
 		tree.addWord("tio");
 
 		List<FoundWord> all = tree.matchAll("AAAAAAAt-ioBBBBBBB");
-		Assert.assertEquals(all.stream().map(fw -> fw.getFoundWord()), CollectionUtil.newArrayList("t-io"));
+		Assert.assertEquals(all.stream().map(fw -> fw.getFoundWord()).collect(Collectors.toList()), CollectionUtil.newArrayList("t-io"));
 	}
 
 	@Test
@@ -100,7 +101,7 @@ public class DfaTest {
 		tree.addWord("women");
 		String text = "a WOMEN todo.".toLowerCase();
 		List<FoundWord> matchAll = tree.matchAll(text, -1, false, false);
-		Assert.assertEquals("[women]", matchAll.stream().map(fw -> fw.getFoundWord()).toString());
+		Assert.assertEquals("[women]", matchAll.stream().map(fw -> fw.getFoundWord()).collect(Collectors.toList()).toString());
 	}
 
 	// ----------------------------------------------------------------------------------------------------------

From 11edc1fcc666537aed6f818405ff4ce5425e428c Mon Sep 17 00:00:00 2001
From: haibinxiao <haibinxiao@creditease.cn>
Date: Sun, 6 Dec 2020 23:02:11 +0800
Subject: [PATCH 4/4] =?UTF-8?q?sensitiveProcessor=E8=BF=81=E7=A7=BB?=
 =?UTF-8?q?=E5=88=B0=E6=AD=A3=E7=A1=AE=E7=9A=84=E6=96=B9=E6=B3=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
index 67244a4ad..6a396d49d 100644
--- a/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
@@ -190,8 +190,6 @@ public final class SensitiveUtil {
 	 * @return 敏感词过滤处理后的bean对象
 	 */
 	public static <T> T sensitiveFilter(T bean, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) {
-		sensitiveProcessor = sensitiveProcessor == null ? new SensitiveProcessor() {
-		} : sensitiveProcessor;
 		String jsonText = JSONUtil.toJsonStr(bean);
 		Class<T> c = (Class) bean.getClass();
 		return JSONUtil.toBean(sensitiveFilter(jsonText, isGreedMatch, sensitiveProcessor), c);
@@ -212,6 +210,8 @@ public final class SensitiveUtil {
 		if (CollectionUtil.isEmpty(foundWordList)) {
 			return text;
 		}
+		sensitiveProcessor = sensitiveProcessor == null ? new SensitiveProcessor() {
+		} : sensitiveProcessor;
 		Map<Integer, FoundWord> foundWordMap = new HashMap<>(foundWordList.size());
 		foundWordList.forEach(foundWord -> foundWordMap.put(foundWord.getStartIndex(), foundWord));
 		int length = text.length();