mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-05-09 23:51:34 +08:00
add filter support
This commit is contained in:
parent
8fba51f62b
commit
2e2d43d764
@ -20,6 +20,7 @@
|
||||
* 【crypto】 RSA算法中,BlockSize长度策略调整(issue#721@Github)
|
||||
* 【crypto】 删除SM2Engine,使用BC库中的对象替代
|
||||
* 【crypto】 增加PemUtil工具类
|
||||
* 【dfa 】 WordTree增加Filter,支持自定义特殊字符过滤器
|
||||
|
||||
### Bug修复
|
||||
|
||||
|
@ -2,13 +2,14 @@ package cn.hutool.core.lang;
|
||||
|
||||
/**
|
||||
* 过滤器接口
|
||||
* @author Looly
|
||||
*
|
||||
* @author Looly
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface Filter<T> {
|
||||
/**
|
||||
* 是否接受对象
|
||||
*
|
||||
* @param t 检查的对象
|
||||
* @return 是否接受对象
|
||||
*/
|
||||
|
@ -9,6 +9,9 @@ import org.junit.Test;
|
||||
|
||||
public class BCUtilTest {
|
||||
|
||||
/**
|
||||
* 密钥生成来自:https://i.goto327.top/CryptTools/SM2.aspx?tdsourcetag=s_pctim_aiomsg
|
||||
*/
|
||||
@Test
|
||||
public void createECPublicKeyParametersTest() {
|
||||
String x = "706AD9DAA3E5CEAC3DA59F583429E8043BAFC576BE10092C4EA4D8E19846CA62";
|
||||
|
@ -1,4 +1,5 @@
|
||||
package cn.hutool.dfa;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
@ -7,6 +8,7 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import cn.hutool.core.collection.CollectionUtil;
|
||||
import cn.hutool.core.lang.Filter;
|
||||
import cn.hutool.core.text.StrBuilder;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
|
||||
@ -22,8 +24,8 @@ import cn.hutool.core.util.StrUtil;
|
||||
* / <br>
|
||||
* 巾 <br>
|
||||
* 其中每个节点都是一个WordTree对象,查找时从上向下查找。<br>
|
||||
* @author Looly
|
||||
*
|
||||
* @author Looly
|
||||
*/
|
||||
public class WordTree extends HashMap<Character, WordTree> {
|
||||
private static final long serialVersionUID = -4646423269465809276L;
|
||||
@ -32,8 +34,13 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
* 敏感词字符末尾标识,用于标识单词末尾字符
|
||||
*/
|
||||
private Set<Character> endCharacterSet = new HashSet<>();
|
||||
/**
|
||||
* 字符过滤规则,通过定义字符串过滤规则,过滤不需要的字符,当accept为false时,此字符不参与匹配
|
||||
*/
|
||||
private Filter<Character> charFilter = StopChar::isNotStopChar;
|
||||
|
||||
//--------------------------------------------------------------------------------------- Constructor start
|
||||
|
||||
/**
|
||||
* 默认构造
|
||||
*/
|
||||
@ -41,10 +48,24 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
}
|
||||
//--------------------------------------------------------------------------------------- Constructor start
|
||||
|
||||
/**
|
||||
* 设置字符过滤规则,通过定义字符串过滤规则,过滤不需要的字符<br>
|
||||
* 当accept为false时,此字符不参与匹配
|
||||
*
|
||||
* @param charFilter 过滤函数
|
||||
* @return this
|
||||
* @since 5.2.0
|
||||
*/
|
||||
public WordTree setCharFilter(Filter<Character> charFilter) {
|
||||
this.charFilter = charFilter;
|
||||
return this;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------- add word
|
||||
|
||||
/**
|
||||
* 增加一组单词
|
||||
*
|
||||
* @param words 单词集合
|
||||
*/
|
||||
public void addWords(Collection<String> words) {
|
||||
@ -58,6 +79,7 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
|
||||
/**
|
||||
* 增加一组单词
|
||||
*
|
||||
* @param words 单词数组
|
||||
*/
|
||||
public void addWords(String... words) {
|
||||
@ -69,9 +91,11 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
|
||||
/**
|
||||
* 添加单词,使用默认类型
|
||||
*
|
||||
* @param word 单词
|
||||
*/
|
||||
public void addWord(String word) {
|
||||
final Filter<Character> charFilter = this.charFilter;
|
||||
WordTree parent = null;
|
||||
WordTree current = this;
|
||||
WordTree child;
|
||||
@ -79,7 +103,7 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
int length = word.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
currentChar = word.charAt(i);
|
||||
if(false == StopChar.isStopChar(currentChar)){//只处理合法字符
|
||||
if (charFilter.accept(currentChar)) {//只处理合法字符
|
||||
child = current.get(currentChar);
|
||||
if (child == null) {
|
||||
//无子类,新建一个子节点后存放下一个字符
|
||||
@ -96,8 +120,10 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------- match
|
||||
|
||||
/**
|
||||
* 指定文本是否包含树中的词
|
||||
*
|
||||
* @param text 被检查的文本
|
||||
* @return 是否包含
|
||||
*/
|
||||
@ -110,6 +136,7 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
|
||||
/**
|
||||
* 获得第一个匹配的关键字
|
||||
*
|
||||
* @param text 被检查的文本
|
||||
* @return 匹配到的关键字
|
||||
*/
|
||||
@ -125,8 +152,10 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------- match all
|
||||
|
||||
/**
|
||||
* 找出所有匹配的关键字
|
||||
*
|
||||
* @param text 被检查的文本
|
||||
* @return 匹配的词列表
|
||||
*/
|
||||
@ -136,6 +165,7 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
|
||||
/**
|
||||
* 找出所有匹配的关键字
|
||||
*
|
||||
* @param text 被检查的文本
|
||||
* @param limit 限制匹配个数
|
||||
* @return 匹配的词列表
|
||||
@ -163,6 +193,7 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
List<String> foundWords = new ArrayList<>();
|
||||
WordTree current = this;
|
||||
int length = text.length();
|
||||
final Filter<Character> charFilter = this.charFilter;
|
||||
//存放查找到的字符缓存。完整出现一个词时加到findedWords中,否则清空
|
||||
final StrBuilder wordBuffer = StrUtil.strBuilder();
|
||||
char currentChar;
|
||||
@ -171,7 +202,7 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
for (int j = i; j < length; j++) {
|
||||
currentChar = text.charAt(j);
|
||||
// Console.log("i: {}, j: {}, currentChar: {}", i, j, currentChar);
|
||||
if(StopChar.isStopChar(currentChar)){
|
||||
if (false == charFilter.accept(currentChar)) {
|
||||
if (wordBuffer.length() > 0) {
|
||||
//做为关键词中间的停顿词被当作关键词的一部分被返回
|
||||
wordBuffer.append(currentChar);
|
||||
@ -213,8 +244,10 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------- Private method start
|
||||
|
||||
/**
|
||||
* 是否末尾
|
||||
*
|
||||
* @param c 检查的字符
|
||||
* @return 是否末尾
|
||||
*/
|
||||
@ -224,6 +257,7 @@ public class WordTree extends HashMap<Character, WordTree>{
|
||||
|
||||
/**
|
||||
* 设置是否到达末尾
|
||||
*
|
||||
* @param c 设置结尾的字符
|
||||
*/
|
||||
private void setEnd(Character c) {
|
||||
|
@ -16,8 +16,8 @@ import cn.hutool.dfa.WordTree;
|
||||
*/
|
||||
public class DfaTest {
|
||||
|
||||
// 构建被查询的文本
|
||||
String text = "我有一颗大土豆,刚出锅的";
|
||||
// 构建被查询的文本,包含停顿词
|
||||
String text = "我有一颗$大土^豆,刚出锅的";
|
||||
|
||||
@Test
|
||||
public void matchAllTest() {
|
||||
@ -29,7 +29,7 @@ public class DfaTest {
|
||||
// 匹配到【大】,就不再继续匹配了,因此【大土豆】不匹配
|
||||
// 匹配到【刚出锅】,就跳过这三个字了,因此【出锅】不匹配(由于刚首先被匹配,因此长的被匹配,最短匹配只针对第一个字相同选最短)
|
||||
List<String> matchAll = tree.matchAll(text, -1, false, false);
|
||||
Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "土豆", "刚出锅"));
|
||||
Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "土^豆", "刚出锅"));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -45,7 +45,7 @@ public class DfaTest {
|
||||
// 【大】被匹配,最短匹配原则【大土豆】被跳过,【土豆继续被匹配】
|
||||
// 【刚出锅】被匹配,由于不跳过已经匹配的词,【出锅】被匹配
|
||||
List<String> matchAll = tree.matchAll(text, -1, true, false);
|
||||
Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "土豆", "刚出锅", "出锅"));
|
||||
Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "土^豆", "刚出锅", "出锅"));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -61,7 +61,7 @@ public class DfaTest {
|
||||
// 匹配到【大】,由于到最长匹配,因此【大土豆】接着被匹配
|
||||
// 由于【大土豆】被匹配,【土豆】被跳过,由于【刚出锅】被匹配,【出锅】被跳过
|
||||
List<String> matchAll = tree.matchAll(text, -1, false, true);
|
||||
Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "大土豆", "刚出锅"));
|
||||
Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "大土^豆", "刚出锅"));
|
||||
|
||||
}
|
||||
|
||||
@ -78,7 +78,7 @@ public class DfaTest {
|
||||
// 匹配到【大】,由于到最长匹配,因此【大土豆】接着被匹配,由于不跳过已经匹配的关键词,土豆继续被匹配
|
||||
// 【刚出锅】被匹配,由于不跳过已经匹配的词,【出锅】被匹配
|
||||
List<String> matchAll = tree.matchAll(text, -1, true, true);
|
||||
Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "大土豆", "土豆", "刚出锅", "出锅"));
|
||||
Assert.assertEquals(matchAll, CollectionUtil.newArrayList("大", "大土^豆", "土^豆", "刚出锅", "出锅"));
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user