mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-05-09 23:51:34 +08:00
fix bug
This commit is contained in:
parent
bbb12fa22d
commit
d5916b9998
@ -23,6 +23,7 @@
|
||||
* 【core 】 修复ChineseDate农历获取正月出现数组越界BUG(issue#2112@Github)
|
||||
* 【extra 】 修复EmojiUtil.toHtmlHex()方法(pr#519@Gitee)
|
||||
* 【system 】 修复CpuInfo.getUsed()方法(issue#2116@Github)
|
||||
* 【dfa 】 修复密集匹配和贪婪匹配冲突问题(issue#2126@Github)
|
||||
|
||||
-------------------------------------------------------------------------------------------------------------
|
||||
# 5.7.20 (2022-01-20)
|
||||
|
@ -17,11 +17,6 @@
|
||||
<description>Hutool 基于DFA的关键词查找</description>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-core</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-json</artifactId>
|
||||
|
@ -195,11 +195,21 @@ public final class SensitiveUtil {
|
||||
*/
|
||||
public static <T> T sensitiveFilter(T bean, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) {
|
||||
String jsonText = JSONUtil.toJsonStr(bean);
|
||||
@SuppressWarnings("unchecked")
|
||||
final Class<T> c = (Class<T>) bean.getClass();
|
||||
@SuppressWarnings("unchecked") final Class<T> c = (Class<T>) bean.getClass();
|
||||
return JSONUtil.toBean(sensitiveFilter(jsonText, isGreedMatch, sensitiveProcessor), c);
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理过滤文本中的敏感词,默认替换成*
|
||||
*
|
||||
* @param text 文本
|
||||
* @return 敏感词过滤处理后的文本
|
||||
* @since 5.7.21
|
||||
*/
|
||||
public static String sensitiveFilter(String text) {
|
||||
return sensitiveFilter(text, true, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理过滤文本中的敏感词,默认替换成*
|
||||
*
|
||||
@ -214,13 +224,14 @@ public final class SensitiveUtil {
|
||||
}
|
||||
|
||||
//敏感词过滤场景下,不需要密集匹配
|
||||
List<FoundWord> foundWordList = getFoundAllSensitive(text, false, isGreedMatch);
|
||||
List<FoundWord> foundWordList = getFoundAllSensitive(text, true, isGreedMatch);
|
||||
if (CollUtil.isEmpty(foundWordList)) {
|
||||
return text;
|
||||
}
|
||||
sensitiveProcessor = sensitiveProcessor == null ? new SensitiveProcessor() {
|
||||
} : sensitiveProcessor;
|
||||
Map<Integer, FoundWord> foundWordMap = new HashMap<>(foundWordList.size());
|
||||
|
||||
final Map<Integer, FoundWord> foundWordMap = new HashMap<>(foundWordList.size(), 1);
|
||||
foundWordList.forEach(foundWord -> foundWordMap.put(foundWord.getStartIndex(), foundWord));
|
||||
int length = text.length();
|
||||
StringBuilder textStringBuilder = new StringBuilder();
|
||||
|
@ -3,7 +3,6 @@ package cn.hutool.dfa;
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.collection.CollectionUtil;
|
||||
import cn.hutool.core.lang.Filter;
|
||||
import cn.hutool.core.text.StrBuilder;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -247,15 +246,15 @@ public class WordTree extends HashMap<Character, WordTree> {
|
||||
|
||||
List<FoundWord> foundWords = new ArrayList<>();
|
||||
WordTree current = this;
|
||||
int length = text.length();
|
||||
final int length = text.length();
|
||||
final Filter<Character> charFilter = this.charFilter;
|
||||
//存放查找到的字符缓存。完整出现一个词时加到findedWords中,否则清空
|
||||
final StrBuilder wordBuffer = StrUtil.strBuilder();
|
||||
final StrBuilder keyBuffer = StrUtil.strBuilder();
|
||||
final StringBuilder wordBuffer = StrUtil.builder();
|
||||
final StringBuilder keyBuffer = StrUtil.builder();
|
||||
char currentChar;
|
||||
for (int i = 0; i < length; i++) {
|
||||
wordBuffer.reset();
|
||||
keyBuffer.reset();
|
||||
wordBuffer.setLength(0);
|
||||
keyBuffer.setLength(0);
|
||||
for (int j = i; j < length; j++) {
|
||||
currentChar = text.charAt(j);
|
||||
// Console.log("i: {}, j: {}, currentChar: {}", i, j, currentChar);
|
||||
@ -284,6 +283,7 @@ public class WordTree extends HashMap<Character, WordTree> {
|
||||
if (false == isDensityMatch) {
|
||||
//如果非密度匹配,跳过匹配到的词
|
||||
i = j;
|
||||
break;
|
||||
}
|
||||
if (false == isGreedMatch) {
|
||||
//如果懒惰匹配(非贪婪匹配)。当遇到第一个结尾标记就结束本轮匹配
|
||||
|
@ -47,7 +47,7 @@ public class DfaTest {
|
||||
}
|
||||
|
||||
/**
|
||||
* 贪婪匹配原则测试
|
||||
* 贪婪非密集匹配原则测试
|
||||
*/
|
||||
@Test
|
||||
public void greedMatchTest() {
|
||||
@ -56,15 +56,15 @@ public class DfaTest {
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------------------------
|
||||
// 情况三:匹配到最长关键词,跳过已经匹配的关键词
|
||||
// 匹配到【大】,由于到最长匹配,因此【大土豆】接着被匹配
|
||||
// 由于【大土豆】被匹配,【土豆】被跳过,由于【刚出锅】被匹配,【出锅】被跳过
|
||||
// 匹配到【大】,由于非密集匹配,因此从下一个字符开始查找,匹配到【土豆】接着被匹配
|
||||
// 由于【刚出锅】被匹配,由于非密集匹配,【出锅】被跳过
|
||||
List<String> matchAll = tree.matchAll(text, -1, false, true);
|
||||
Assert.assertEquals(matchAll, CollUtil.newArrayList("大", "大土^豆", "刚出锅"));
|
||||
Assert.assertEquals(matchAll, CollUtil.newArrayList("大", "土^豆", "刚出锅"));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 密集匹配原则(最短匹配)和贪婪匹配原则测试
|
||||
* 密集匹配原则(最长匹配)和贪婪匹配原则测试
|
||||
*/
|
||||
@Test
|
||||
public void densityAndGreedMatchTest() {
|
||||
@ -80,6 +80,29 @@ public class DfaTest {
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void densityAndGreedMatchTest2(){
|
||||
WordTree tree = new WordTree();
|
||||
tree.addWord("赵");
|
||||
tree.addWord("赵阿");
|
||||
tree.addWord("赵阿三");
|
||||
|
||||
final List<FoundWord> result = tree.matchAllWords("赵阿三在做什么", -1, true, true);
|
||||
Assert.assertEquals(3, result.size());
|
||||
|
||||
Assert.assertEquals("赵", result.get(0).getWord());
|
||||
Assert.assertEquals(0, result.get(0).getStartIndex().intValue());
|
||||
Assert.assertEquals(0, result.get(0).getEndIndex().intValue());
|
||||
|
||||
Assert.assertEquals("赵阿", result.get(1).getWord());
|
||||
Assert.assertEquals(0, result.get(1).getStartIndex().intValue());
|
||||
Assert.assertEquals(1, result.get(1).getEndIndex().intValue());
|
||||
|
||||
Assert.assertEquals("赵阿三", result.get(2).getWord());
|
||||
Assert.assertEquals(0, result.get(2).getStartIndex().intValue());
|
||||
Assert.assertEquals(2, result.get(2).getEndIndex().intValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* 停顿词测试
|
||||
*/
|
||||
|
@ -1,5 +1,7 @@
|
||||
package cn.hutool.dfa;
|
||||
|
||||
import cn.hutool.core.collection.ListUtil;
|
||||
import lombok.Data;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -24,25 +26,17 @@ public class SensitiveUtilTest {
|
||||
Assert.assertEquals(bean.getStr(), "我有一颗$****,***的");
|
||||
}
|
||||
|
||||
@Data
|
||||
public static class TestBean {
|
||||
private String str;
|
||||
private Integer num;
|
||||
|
||||
public String getStr() {
|
||||
return str;
|
||||
}
|
||||
|
||||
public void setStr(String str) {
|
||||
this.str = str;
|
||||
}
|
||||
|
||||
public Integer getNum() {
|
||||
return num;
|
||||
}
|
||||
|
||||
public void setNum(Integer num) {
|
||||
this.num = num;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void issue2126(){
|
||||
SensitiveUtil.init(ListUtil.of("赵", "赵阿", "赵阿三"));
|
||||
|
||||
String result = SensitiveUtil.sensitiveFilter("赵阿三在做什么。", true, null);
|
||||
Assert.assertEquals("***在做什么。", result);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user