fix bug

2025-05-09 23:51:34 +08:00 · 2022-02-07 19:04:37 +08:00 · 2022-02-07 19:04:37 +08:00 · d5916b9998
commit d5916b9998
parent bbb12fa22d
6 changed files with 60 additions and 36 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -23,6 +23,7 @@
 * 【core   】     修复ChineseDate农历获取正月出现数组越界BUG（issue#2112@Github）
 * 【extra  】     修复EmojiUtil.toHtmlHex()方法（pr#519@Gitee）
 * 【system 】     修复CpuInfo.getUsed()方法（issue#2116@Github）
+* 【dfa    】     修复密集匹配和贪婪匹配冲突问题（issue#2126@Github）

 -------------------------------------------------------------------------------------------------------------
 # 5.7.20 (2022-01-20)
--- a/hutool-dfa/pom.xml
+++ b/hutool-dfa/pom.xml
@ -17,11 +17,6 @@
 	<description>Hutool 基于DFA的关键词查找</description>

 	<dependencies>
-		<dependency>
-			<groupId>cn.hutool</groupId>
-			<artifactId>hutool-core</artifactId>
-			<version>${project.parent.version}</version>
-		</dependency>
 		<dependency>
 			<groupId>cn.hutool</groupId>
 			<artifactId>hutool-json</artifactId>
--- a/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
@ -195,11 +195,21 @@ public final class SensitiveUtil {
 	 */
 	public static <T> T sensitiveFilter(T bean, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) {
 		String jsonText = JSONUtil.toJsonStr(bean);
-		@SuppressWarnings("unchecked")
-		final Class<T> c = (Class<T>) bean.getClass();
+		@SuppressWarnings("unchecked") final Class<T> c = (Class<T>) bean.getClass();
 		return JSONUtil.toBean(sensitiveFilter(jsonText, isGreedMatch, sensitiveProcessor), c);
 	}

+	/**
+	 * 处理过滤文本中的敏感词，默认替换成*
+	 *
+	 * @param text 文本
+	 * @return 敏感词过滤处理后的文本
+	 * @since 5.7.21
+	 */
+	public static String sensitiveFilter(String text) {
+		return sensitiveFilter(text, true, null);
+	}
+
 	/**
 	 * 处理过滤文本中的敏感词，默认替换成*
 	 *
@ -214,13 +224,14 @@ public final class SensitiveUtil {
 		}

 		//敏感词过滤场景下，不需要密集匹配
-		List<FoundWord> foundWordList = getFoundAllSensitive(text, false, isGreedMatch);
+		List<FoundWord> foundWordList = getFoundAllSensitive(text, true, isGreedMatch);
 		if (CollUtil.isEmpty(foundWordList)) {
 			return text;
 		}
 		sensitiveProcessor = sensitiveProcessor == null ? new SensitiveProcessor() {
 		} : sensitiveProcessor;
-		Map<Integer, FoundWord> foundWordMap = new HashMap<>(foundWordList.size());
+
+		final Map<Integer, FoundWord> foundWordMap = new HashMap<>(foundWordList.size(), 1);
 		foundWordList.forEach(foundWord -> foundWordMap.put(foundWord.getStartIndex(), foundWord));
 		int length = text.length();
 		StringBuilder textStringBuilder = new StringBuilder();
--- a/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java
@ -3,7 +3,6 @@ package cn.hutool.dfa;
 import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.CollectionUtil;
 import cn.hutool.core.lang.Filter;
-import cn.hutool.core.text.StrBuilder;
 import cn.hutool.core.util.StrUtil;

 import java.util.ArrayList;
@ -247,15 +246,15 @@ public class WordTree extends HashMap<Character, WordTree> {

 		List<FoundWord> foundWords = new ArrayList<>();
 		WordTree current = this;
-		int length = text.length();
+		final int length = text.length();
 		final Filter<Character> charFilter = this.charFilter;
 		//存放查找到的字符缓存。完整出现一个词时加到findedWords中，否则清空
-		final StrBuilder wordBuffer = StrUtil.strBuilder();
-		final StrBuilder keyBuffer = StrUtil.strBuilder();
+		final StringBuilder wordBuffer = StrUtil.builder();
+		final StringBuilder keyBuffer = StrUtil.builder();
 		char currentChar;
 		for (int i = 0; i < length; i++) {
-			wordBuffer.reset();
-			keyBuffer.reset();
+			wordBuffer.setLength(0);
+			keyBuffer.setLength(0);
 			for (int j = i; j < length; j++) {
 				currentChar = text.charAt(j);
 //				Console.log("i: {}, j: {}, currentChar: {}", i, j, currentChar);
@ -284,6 +283,7 @@ public class WordTree extends HashMap<Character, WordTree> {
 					if (false == isDensityMatch) {
 						//如果非密度匹配，跳过匹配到的词
 						i = j;
+						break;
 					}
 					if (false == isGreedMatch) {
 						//如果懒惰匹配（非贪婪匹配）。当遇到第一个结尾标记就结束本轮匹配
--- a/hutool-dfa/src/test/java/cn/hutool/dfa/DfaTest.java
+++ b/hutool-dfa/src/test/java/cn/hutool/dfa/DfaTest.java
@ -47,7 +47,7 @@ public class DfaTest {
 	}

 	/**
-	 * 贪婪匹配原则测试
+	 * 贪婪非密集匹配原则测试
 	 */
 	@Test
 	public void greedMatchTest() {
@ -56,15 +56,15 @@ public class DfaTest {

 		// -----------------------------------------------------------------------------------------------------------------------------------
 		// 情况三：匹配到最长关键词，跳过已经匹配的关键词
-		// 匹配到【大】，由于到最长匹配，因此【大土豆】接着被匹配
-		// 由于【大土豆】被匹配，【土豆】被跳过，由于【刚出锅】被匹配，【出锅】被跳过
+		// 匹配到【大】，由于非密集匹配，因此从下一个字符开始查找，匹配到【土豆】接着被匹配
+		// 由于【刚出锅】被匹配，由于非密集匹配，【出锅】被跳过
 		List<String> matchAll = tree.matchAll(text, -1, false, true);
-		Assert.assertEquals(matchAll, CollUtil.newArrayList("大", "大土^豆", "刚出锅"));
+		Assert.assertEquals(matchAll, CollUtil.newArrayList("大", "土^豆", "刚出锅"));

 	}

 	/**
-	 * 密集匹配原则（最短匹配）和贪婪匹配原则测试
+	 * 密集匹配原则（最长匹配）和贪婪匹配原则测试
 	 */
 	@Test
 	public void densityAndGreedMatchTest() {
@ -80,6 +80,29 @@ public class DfaTest {

 	}

+	@Test
+	public void densityAndGreedMatchTest2(){
+		WordTree tree = new WordTree();
+		tree.addWord("赵");
+		tree.addWord("赵阿");
+		tree.addWord("赵阿三");
+
+		final List<FoundWord> result = tree.matchAllWords("赵阿三在做什么", -1, true, true);
+		Assert.assertEquals(3, result.size());
+
+		Assert.assertEquals("赵", result.get(0).getWord());
+		Assert.assertEquals(0, result.get(0).getStartIndex().intValue());
+		Assert.assertEquals(0, result.get(0).getEndIndex().intValue());
+
+		Assert.assertEquals("赵阿", result.get(1).getWord());
+		Assert.assertEquals(0, result.get(1).getStartIndex().intValue());
+		Assert.assertEquals(1, result.get(1).getEndIndex().intValue());
+
+		Assert.assertEquals("赵阿三", result.get(2).getWord());
+		Assert.assertEquals(0, result.get(2).getStartIndex().intValue());
+		Assert.assertEquals(2, result.get(2).getEndIndex().intValue());
+	}
+
 	/**
 	 * 停顿词测试
 	 */
--- a/hutool-dfa/src/test/java/cn/hutool/dfa/SensitiveUtilTest.java
+++ b/hutool-dfa/src/test/java/cn/hutool/dfa/SensitiveUtilTest.java
@ -1,5 +1,7 @@
 package cn.hutool.dfa;

+import cn.hutool.core.collection.ListUtil;
+import lombok.Data;
 import org.junit.Assert;
 import org.junit.Test;

@ -24,25 +26,17 @@ public class SensitiveUtilTest {
 		Assert.assertEquals(bean.getStr(), "我有一颗$****，***的");
 	}

+	@Data
 	public static class TestBean {
 		private String str;
 		private Integer num;
-
-		public String getStr() {
-			return str;
 	}

-		public void setStr(String str) {
-			this.str = str;
-		}
+	@Test
+	public void issue2126(){
+		SensitiveUtil.init(ListUtil.of("赵", "赵阿", "赵阿三"));

-		public Integer getNum() {
-			return num;
-		}
-
-		public void setNum(Integer num) {
-			this.num = num;
+		String result = SensitiveUtil.sensitiveFilter("赵阿三在做什么。", true, null);
+		Assert.assertEquals("***在做什么。", result);
 	}
 }
-
-}