mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-05-09 23:51:34 +08:00
fix bug
This commit is contained in:
parent
bbb12fa22d
commit
d5916b9998
@ -23,6 +23,7 @@
|
|||||||
* 【core 】 修复ChineseDate农历获取正月出现数组越界BUG(issue#2112@Github)
|
* 【core 】 修复ChineseDate农历获取正月出现数组越界BUG(issue#2112@Github)
|
||||||
* 【extra 】 修复EmojiUtil.toHtmlHex()方法(pr#519@Gitee)
|
* 【extra 】 修复EmojiUtil.toHtmlHex()方法(pr#519@Gitee)
|
||||||
* 【system 】 修复CpuInfo.getUsed()方法(issue#2116@Github)
|
* 【system 】 修复CpuInfo.getUsed()方法(issue#2116@Github)
|
||||||
|
* 【dfa 】 修复密集匹配和贪婪匹配冲突问题(issue#2126@Github)
|
||||||
|
|
||||||
-------------------------------------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------------------------------------
|
||||||
# 5.7.20 (2022-01-20)
|
# 5.7.20 (2022-01-20)
|
||||||
|
@ -17,11 +17,6 @@
|
|||||||
<description>Hutool 基于DFA的关键词查找</description>
|
<description>Hutool 基于DFA的关键词查找</description>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
|
||||||
<groupId>cn.hutool</groupId>
|
|
||||||
<artifactId>hutool-core</artifactId>
|
|
||||||
<version>${project.parent.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>cn.hutool</groupId>
|
<groupId>cn.hutool</groupId>
|
||||||
<artifactId>hutool-json</artifactId>
|
<artifactId>hutool-json</artifactId>
|
||||||
|
@ -195,11 +195,21 @@ public final class SensitiveUtil {
|
|||||||
*/
|
*/
|
||||||
public static <T> T sensitiveFilter(T bean, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) {
|
public static <T> T sensitiveFilter(T bean, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) {
|
||||||
String jsonText = JSONUtil.toJsonStr(bean);
|
String jsonText = JSONUtil.toJsonStr(bean);
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked") final Class<T> c = (Class<T>) bean.getClass();
|
||||||
final Class<T> c = (Class<T>) bean.getClass();
|
|
||||||
return JSONUtil.toBean(sensitiveFilter(jsonText, isGreedMatch, sensitiveProcessor), c);
|
return JSONUtil.toBean(sensitiveFilter(jsonText, isGreedMatch, sensitiveProcessor), c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 处理过滤文本中的敏感词,默认替换成*
|
||||||
|
*
|
||||||
|
* @param text 文本
|
||||||
|
* @return 敏感词过滤处理后的文本
|
||||||
|
* @since 5.7.21
|
||||||
|
*/
|
||||||
|
public static String sensitiveFilter(String text) {
|
||||||
|
return sensitiveFilter(text, true, null);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 处理过滤文本中的敏感词,默认替换成*
|
* 处理过滤文本中的敏感词,默认替换成*
|
||||||
*
|
*
|
||||||
@ -214,13 +224,14 @@ public final class SensitiveUtil {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//敏感词过滤场景下,不需要密集匹配
|
//敏感词过滤场景下,不需要密集匹配
|
||||||
List<FoundWord> foundWordList = getFoundAllSensitive(text, false, isGreedMatch);
|
List<FoundWord> foundWordList = getFoundAllSensitive(text, true, isGreedMatch);
|
||||||
if (CollUtil.isEmpty(foundWordList)) {
|
if (CollUtil.isEmpty(foundWordList)) {
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
sensitiveProcessor = sensitiveProcessor == null ? new SensitiveProcessor() {
|
sensitiveProcessor = sensitiveProcessor == null ? new SensitiveProcessor() {
|
||||||
} : sensitiveProcessor;
|
} : sensitiveProcessor;
|
||||||
Map<Integer, FoundWord> foundWordMap = new HashMap<>(foundWordList.size());
|
|
||||||
|
final Map<Integer, FoundWord> foundWordMap = new HashMap<>(foundWordList.size(), 1);
|
||||||
foundWordList.forEach(foundWord -> foundWordMap.put(foundWord.getStartIndex(), foundWord));
|
foundWordList.forEach(foundWord -> foundWordMap.put(foundWord.getStartIndex(), foundWord));
|
||||||
int length = text.length();
|
int length = text.length();
|
||||||
StringBuilder textStringBuilder = new StringBuilder();
|
StringBuilder textStringBuilder = new StringBuilder();
|
||||||
|
@ -3,7 +3,6 @@ package cn.hutool.dfa;
|
|||||||
import cn.hutool.core.collection.CollUtil;
|
import cn.hutool.core.collection.CollUtil;
|
||||||
import cn.hutool.core.collection.CollectionUtil;
|
import cn.hutool.core.collection.CollectionUtil;
|
||||||
import cn.hutool.core.lang.Filter;
|
import cn.hutool.core.lang.Filter;
|
||||||
import cn.hutool.core.text.StrBuilder;
|
|
||||||
import cn.hutool.core.util.StrUtil;
|
import cn.hutool.core.util.StrUtil;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -247,15 +246,15 @@ public class WordTree extends HashMap<Character, WordTree> {
|
|||||||
|
|
||||||
List<FoundWord> foundWords = new ArrayList<>();
|
List<FoundWord> foundWords = new ArrayList<>();
|
||||||
WordTree current = this;
|
WordTree current = this;
|
||||||
int length = text.length();
|
final int length = text.length();
|
||||||
final Filter<Character> charFilter = this.charFilter;
|
final Filter<Character> charFilter = this.charFilter;
|
||||||
//存放查找到的字符缓存。完整出现一个词时加到findedWords中,否则清空
|
//存放查找到的字符缓存。完整出现一个词时加到findedWords中,否则清空
|
||||||
final StrBuilder wordBuffer = StrUtil.strBuilder();
|
final StringBuilder wordBuffer = StrUtil.builder();
|
||||||
final StrBuilder keyBuffer = StrUtil.strBuilder();
|
final StringBuilder keyBuffer = StrUtil.builder();
|
||||||
char currentChar;
|
char currentChar;
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
wordBuffer.reset();
|
wordBuffer.setLength(0);
|
||||||
keyBuffer.reset();
|
keyBuffer.setLength(0);
|
||||||
for (int j = i; j < length; j++) {
|
for (int j = i; j < length; j++) {
|
||||||
currentChar = text.charAt(j);
|
currentChar = text.charAt(j);
|
||||||
// Console.log("i: {}, j: {}, currentChar: {}", i, j, currentChar);
|
// Console.log("i: {}, j: {}, currentChar: {}", i, j, currentChar);
|
||||||
@ -284,6 +283,7 @@ public class WordTree extends HashMap<Character, WordTree> {
|
|||||||
if (false == isDensityMatch) {
|
if (false == isDensityMatch) {
|
||||||
//如果非密度匹配,跳过匹配到的词
|
//如果非密度匹配,跳过匹配到的词
|
||||||
i = j;
|
i = j;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (false == isGreedMatch) {
|
if (false == isGreedMatch) {
|
||||||
//如果懒惰匹配(非贪婪匹配)。当遇到第一个结尾标记就结束本轮匹配
|
//如果懒惰匹配(非贪婪匹配)。当遇到第一个结尾标记就结束本轮匹配
|
||||||
|
@ -47,7 +47,7 @@ public class DfaTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 贪婪匹配原则测试
|
* 贪婪非密集匹配原则测试
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void greedMatchTest() {
|
public void greedMatchTest() {
|
||||||
@ -56,15 +56,15 @@ public class DfaTest {
|
|||||||
|
|
||||||
// -----------------------------------------------------------------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------------------------------------------------------------
|
||||||
// 情况三:匹配到最长关键词,跳过已经匹配的关键词
|
// 情况三:匹配到最长关键词,跳过已经匹配的关键词
|
||||||
// 匹配到【大】,由于到最长匹配,因此【大土豆】接着被匹配
|
// 匹配到【大】,由于非密集匹配,因此从下一个字符开始查找,匹配到【土豆】接着被匹配
|
||||||
// 由于【大土豆】被匹配,【土豆】被跳过,由于【刚出锅】被匹配,【出锅】被跳过
|
// 由于【刚出锅】被匹配,由于非密集匹配,【出锅】被跳过
|
||||||
List<String> matchAll = tree.matchAll(text, -1, false, true);
|
List<String> matchAll = tree.matchAll(text, -1, false, true);
|
||||||
Assert.assertEquals(matchAll, CollUtil.newArrayList("大", "大土^豆", "刚出锅"));
|
Assert.assertEquals(matchAll, CollUtil.newArrayList("大", "土^豆", "刚出锅"));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 密集匹配原则(最短匹配)和贪婪匹配原则测试
|
* 密集匹配原则(最长匹配)和贪婪匹配原则测试
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void densityAndGreedMatchTest() {
|
public void densityAndGreedMatchTest() {
|
||||||
@ -80,6 +80,29 @@ public class DfaTest {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void densityAndGreedMatchTest2(){
|
||||||
|
WordTree tree = new WordTree();
|
||||||
|
tree.addWord("赵");
|
||||||
|
tree.addWord("赵阿");
|
||||||
|
tree.addWord("赵阿三");
|
||||||
|
|
||||||
|
final List<FoundWord> result = tree.matchAllWords("赵阿三在做什么", -1, true, true);
|
||||||
|
Assert.assertEquals(3, result.size());
|
||||||
|
|
||||||
|
Assert.assertEquals("赵", result.get(0).getWord());
|
||||||
|
Assert.assertEquals(0, result.get(0).getStartIndex().intValue());
|
||||||
|
Assert.assertEquals(0, result.get(0).getEndIndex().intValue());
|
||||||
|
|
||||||
|
Assert.assertEquals("赵阿", result.get(1).getWord());
|
||||||
|
Assert.assertEquals(0, result.get(1).getStartIndex().intValue());
|
||||||
|
Assert.assertEquals(1, result.get(1).getEndIndex().intValue());
|
||||||
|
|
||||||
|
Assert.assertEquals("赵阿三", result.get(2).getWord());
|
||||||
|
Assert.assertEquals(0, result.get(2).getStartIndex().intValue());
|
||||||
|
Assert.assertEquals(2, result.get(2).getEndIndex().intValue());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 停顿词测试
|
* 停顿词测试
|
||||||
*/
|
*/
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
package cn.hutool.dfa;
|
package cn.hutool.dfa;
|
||||||
|
|
||||||
|
import cn.hutool.core.collection.ListUtil;
|
||||||
|
import lombok.Data;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@ -24,25 +26,17 @@ public class SensitiveUtilTest {
|
|||||||
Assert.assertEquals(bean.getStr(), "我有一颗$****,***的");
|
Assert.assertEquals(bean.getStr(), "我有一颗$****,***的");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Data
|
||||||
public static class TestBean {
|
public static class TestBean {
|
||||||
private String str;
|
private String str;
|
||||||
private Integer num;
|
private Integer num;
|
||||||
|
|
||||||
public String getStr() {
|
|
||||||
return str;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setStr(String str) {
|
@Test
|
||||||
this.str = str;
|
public void issue2126(){
|
||||||
}
|
SensitiveUtil.init(ListUtil.of("赵", "赵阿", "赵阿三"));
|
||||||
|
|
||||||
public Integer getNum() {
|
String result = SensitiveUtil.sensitiveFilter("赵阿三在做什么。", true, null);
|
||||||
return num;
|
Assert.assertEquals("***在做什么。", result);
|
||||||
}
|
|
||||||
|
|
||||||
public void setNum(Integer num) {
|
|
||||||
this.num = num;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user