mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-04-19 03:01:48 +08:00
add NFA
This commit is contained in:
parent
ee55467690
commit
50178ee1ab
@ -9,13 +9,13 @@ import java.util.*;
|
|||||||
*
|
*
|
||||||
* @author renyp
|
* @author renyp
|
||||||
*/
|
*/
|
||||||
public class Automaton {
|
public class NFA {
|
||||||
private final Node root;
|
private final Node root;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 默认构造
|
* 默认构造
|
||||||
*/
|
*/
|
||||||
public Automaton() {
|
public NFA() {
|
||||||
this.root = new Node();
|
this.root = new Node();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -24,7 +24,7 @@ public class Automaton {
|
|||||||
*
|
*
|
||||||
* @param words 添加的新词
|
* @param words 添加的新词
|
||||||
*/
|
*/
|
||||||
public Automaton(String... words) {
|
public NFA(final String... words) {
|
||||||
this();
|
this();
|
||||||
this.insert(words);
|
this.insert(words);
|
||||||
}
|
}
|
||||||
@ -34,14 +34,13 @@ public class Automaton {
|
|||||||
*
|
*
|
||||||
* @param word 添加的新词
|
* @param word 添加的新词
|
||||||
*/
|
*/
|
||||||
public void insert(String word) {
|
public void insert(final String word) {
|
||||||
Node p = root;
|
Node p = root;
|
||||||
for (char curr : word.toCharArray()) {
|
for (final char curr : word.toCharArray()) {
|
||||||
int ind = curr;
|
if (p.next.get((int) curr) == null) {
|
||||||
if (p.next.get(ind) == null) {
|
p.next.put((int) curr, new Node());
|
||||||
p.next.put(ind, new Node());
|
|
||||||
}
|
}
|
||||||
p = p.next.get(ind);
|
p = p.next.get((int) curr);
|
||||||
}
|
}
|
||||||
p.flag = true;
|
p.flag = true;
|
||||||
p.str = word;
|
p.str = word;
|
||||||
@ -52,8 +51,8 @@ public class Automaton {
|
|||||||
*
|
*
|
||||||
* @param words 添加的新词
|
* @param words 添加的新词
|
||||||
*/
|
*/
|
||||||
public void insert(String... words) {
|
public void insert(final String... words) {
|
||||||
for (String word : words) {
|
for (final String word : words) {
|
||||||
this.insert(word);
|
this.insert(word);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -62,15 +61,15 @@ public class Automaton {
|
|||||||
* 构建基于NFA模型的 AC自动机
|
* 构建基于NFA模型的 AC自动机
|
||||||
*/
|
*/
|
||||||
public void buildAc() {
|
public void buildAc() {
|
||||||
Queue<Node> queue = new LinkedList<>();
|
final Queue<Node> queue = new LinkedList<>();
|
||||||
Node p = root;
|
final Node p = root;
|
||||||
for (Integer key : p.next.keySet()) {
|
for (final Integer key : p.next.keySet()) {
|
||||||
p.next.get(key).fail = root;
|
p.next.get(key).fail = root;
|
||||||
queue.offer(p.next.get(key));
|
queue.offer(p.next.get(key));
|
||||||
}
|
}
|
||||||
while (!queue.isEmpty()) {
|
while (!queue.isEmpty()) {
|
||||||
Node curr = queue.poll();
|
final Node curr = queue.poll();
|
||||||
for (Integer key : curr.next.keySet()) {
|
for (final Integer key : curr.next.keySet()) {
|
||||||
Node fail = curr.fail;
|
Node fail = curr.fail;
|
||||||
// 查找当前节点匹配失败,他对应等效匹配的节点是哪个
|
// 查找当前节点匹配失败,他对应等效匹配的节点是哪个
|
||||||
while (fail != null && fail.next.get(key) == null) {
|
while (fail != null && fail.next.get(key) == null) {
|
||||||
@ -90,20 +89,22 @@ public class Automaton {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @param text 查询的文本(母串)
|
* @param text 查询的文本(母串)
|
||||||
|
* @return 关键字列表
|
||||||
*/
|
*/
|
||||||
public List<FoundWord> find(String text) {
|
public List<FoundWord> find(final String text) {
|
||||||
return this.find(text, true);
|
return this.find(text, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param text 查找的文本(母串)
|
* @param text 查找的文本(母串)
|
||||||
* @param isDensityMatch 是否密集匹配
|
* @param isDensityMatch 是否密集匹配
|
||||||
|
* @return 关键字列表
|
||||||
*/
|
*/
|
||||||
public List<FoundWord> find(String text, boolean isDensityMatch) {
|
public List<FoundWord> find(final String text, final boolean isDensityMatch) {
|
||||||
List<FoundWord> ans = new ArrayList<>();
|
final List<FoundWord> ans = new ArrayList<>();
|
||||||
Node p = root, k = null;
|
Node p = root, k;
|
||||||
for (int i = 0, len = text.length(); i < len; i++) {
|
for (int i = 0, len = text.length(); i < len; i++) {
|
||||||
int ind = text.charAt(i);
|
final int ind = text.charAt(i);
|
||||||
// 状态转移(沿着fail指针链接的链表,此处区别于DFA模型)
|
// 状态转移(沿着fail指针链接的链表,此处区别于DFA模型)
|
||||||
while (p != null && p.next.get(ind) == null) {
|
while (p != null && p.next.get(ind) == null) {
|
||||||
p = p.fail;
|
p = p.fail;
|
@ -1,225 +0,0 @@
|
|||||||
package cn.hutool.core.text.dfa;
|
|
||||||
|
|
||||||
import cn.hutool.core.date.StopWatch;
|
|
||||||
import junit.framework.TestCase;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
public class AutomatonTest extends TestCase {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 密集匹配 测试查找结果,并与WordTree对比效率
|
|
||||||
*/
|
|
||||||
public void testFind() {
|
|
||||||
Automaton automaton = new Automaton();
|
|
||||||
WordTree wordTree = new WordTree();
|
|
||||||
automaton.insert("say", "her", "he", "she", "shr");
|
|
||||||
automaton.buildAc();
|
|
||||||
wordTree.addWords("say", "her", "he", "she", "shr");
|
|
||||||
|
|
||||||
StopWatch stopWatch = new StopWatch();
|
|
||||||
String input = "sasherhsay";
|
|
||||||
|
|
||||||
stopWatch.start("automaton_char_find");
|
|
||||||
List<FoundWord> ans1 = automaton.find(input);
|
|
||||||
stopWatch.stop();
|
|
||||||
assertEquals("she,he,her,say", ans1.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
|
||||||
assertEquals(Integer.valueOf(2), ans1.get(0).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(4), ans1.get(0).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(3), ans1.get(1).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(4), ans1.get(1).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(3), ans1.get(2).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(5), ans1.get(2).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(7), ans1.get(3).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(9), ans1.get(3).getEndIndex());
|
|
||||||
|
|
||||||
stopWatch.start("wordtree_char_find");
|
|
||||||
List<String> ans2 = wordTree.matchAll(input, -1, true, true);
|
|
||||||
stopWatch.stop();
|
|
||||||
assertEquals("she,he,her,say", String.join(",", ans2));
|
|
||||||
|
|
||||||
System.out.println(stopWatch.prettyPrint());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 非密集匹配 测试查找结果,并与WordTree对比效率
|
|
||||||
*/
|
|
||||||
public void testFindNotDensity() {
|
|
||||||
Automaton automaton = new Automaton();
|
|
||||||
WordTree wordTree = new WordTree();
|
|
||||||
automaton.insert("say", "her", "he", "she", "shr");
|
|
||||||
automaton.buildAc();
|
|
||||||
wordTree.addWords("say", "her", "he", "she", "shr");
|
|
||||||
|
|
||||||
StopWatch stopWatch = new StopWatch();
|
|
||||||
String input = "sasherhsay";
|
|
||||||
|
|
||||||
stopWatch.start("automaton_char_find_not_density");
|
|
||||||
List<FoundWord> ans1 = automaton.find(input, false);
|
|
||||||
stopWatch.stop();
|
|
||||||
assertEquals("she,say", ans1.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
|
||||||
assertEquals(Integer.valueOf(2), ans1.get(0).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(4), ans1.get(0).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(7), ans1.get(1).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(9), ans1.get(1).getEndIndex());
|
|
||||||
|
|
||||||
stopWatch.start("wordtree_char_find_not_density");
|
|
||||||
List<String> ans2 = wordTree.matchAll(input, -1, false, true);
|
|
||||||
stopWatch.stop();
|
|
||||||
assertEquals("she,say", String.join(",", ans2));
|
|
||||||
|
|
||||||
System.out.println(stopWatch.prettyPrint());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 密集匹配 测试建树和查找,并与WordTree对比效率
|
|
||||||
*/
|
|
||||||
public void testBuildAndFind() {
|
|
||||||
StopWatch stopWatch = new StopWatch();
|
|
||||||
String input = "sasherhsay";
|
|
||||||
|
|
||||||
stopWatch.start("automaton_char_buid_find");
|
|
||||||
Automaton automatonLocal = new Automaton();
|
|
||||||
automatonLocal.insert("say", "her", "he", "she", "shr");
|
|
||||||
automatonLocal.buildAc();
|
|
||||||
List<FoundWord> ans1 = automatonLocal.find(input);
|
|
||||||
stopWatch.stop();
|
|
||||||
assertEquals("she,he,her,say", ans1.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
|
||||||
assertEquals(Integer.valueOf(2), ans1.get(0).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(4), ans1.get(0).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(3), ans1.get(1).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(4), ans1.get(1).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(3), ans1.get(2).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(5), ans1.get(2).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(7), ans1.get(3).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(9), ans1.get(3).getEndIndex());
|
|
||||||
|
|
||||||
stopWatch.start("wordtree_char_build_find");
|
|
||||||
WordTree wordTreeLocal = new WordTree();
|
|
||||||
wordTreeLocal.addWords("say", "her", "he", "she", "shr");
|
|
||||||
List<String> ans2 = wordTreeLocal.matchAll(input, -1, true, true);
|
|
||||||
stopWatch.stop();
|
|
||||||
assertEquals("she,he,her,say", String.join(",", ans2));
|
|
||||||
|
|
||||||
System.out.println(stopWatch.prettyPrint());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 密集匹配 构建树和查找 测试中文字符,并与wordTree对比效率
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testBuildFindCnChar() {
|
|
||||||
StopWatch stopWatch = new StopWatch();
|
|
||||||
String input = "赵啊三在做什么";
|
|
||||||
|
|
||||||
stopWatch.start("automaton_cn_build_find");
|
|
||||||
Automaton automatonLocal = new Automaton();
|
|
||||||
automatonLocal.insert("赵", "赵啊", "赵啊三");
|
|
||||||
automatonLocal.buildAc();
|
|
||||||
|
|
||||||
final List<FoundWord> result = automatonLocal.find(input);
|
|
||||||
stopWatch.stop();
|
|
||||||
|
|
||||||
Assert.assertEquals(3, result.size());
|
|
||||||
Assert.assertEquals("赵,赵啊,赵啊三", result.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
|
||||||
assertEquals(Integer.valueOf(0), result.get(0).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(0), result.get(0).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(0), result.get(1).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(1), result.get(1).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(0), result.get(2).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(2), result.get(2).getEndIndex());
|
|
||||||
|
|
||||||
stopWatch.start("wordtree_cn_build_find");
|
|
||||||
WordTree wordTreeLocal = new WordTree();
|
|
||||||
wordTreeLocal.addWords("赵", "赵啊", "赵啊三");
|
|
||||||
|
|
||||||
final List<String> result1 = wordTreeLocal.matchAll(input, -1, true, true);
|
|
||||||
stopWatch.stop();
|
|
||||||
|
|
||||||
Assert.assertEquals(3, result1.size());
|
|
||||||
Assert.assertEquals("赵,赵啊,赵啊三", String.join(",", result1));
|
|
||||||
|
|
||||||
System.out.println(stopWatch.prettyPrint());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 密集匹配 测试构建树和查找 中文字符,并与wordTree对比效率
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testFindCNChar() {
|
|
||||||
StopWatch stopWatch = new StopWatch();
|
|
||||||
String input = "赵啊三在做什么";
|
|
||||||
|
|
||||||
Automaton automatonLocal = new Automaton();
|
|
||||||
automatonLocal.insert("赵", "赵啊", "赵啊三");
|
|
||||||
automatonLocal.buildAc();
|
|
||||||
|
|
||||||
stopWatch.start("automaton_cn_find");
|
|
||||||
final List<FoundWord> result = automatonLocal.find(input);
|
|
||||||
stopWatch.stop();
|
|
||||||
|
|
||||||
Assert.assertEquals(3, result.size());
|
|
||||||
Assert.assertEquals("赵,赵啊,赵啊三", result.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
|
||||||
assertEquals(Integer.valueOf(0), result.get(0).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(0), result.get(0).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(0), result.get(1).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(1), result.get(1).getEndIndex());
|
|
||||||
assertEquals(Integer.valueOf(0), result.get(2).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(2), result.get(2).getEndIndex());
|
|
||||||
|
|
||||||
WordTree wordTreeLocal = new WordTree();
|
|
||||||
wordTreeLocal.addWords("赵", "赵啊", "赵啊三");
|
|
||||||
|
|
||||||
stopWatch.start("wordtree_cn_find");
|
|
||||||
final List<String> result1 = wordTreeLocal.matchAllWords(input, -1, true, true).stream().map(FoundWord::getWord)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
stopWatch.stop();
|
|
||||||
|
|
||||||
Assert.assertEquals(3, result1.size());
|
|
||||||
Assert.assertEquals("赵,赵啊,赵啊三", String.join(",", result1));
|
|
||||||
|
|
||||||
System.out.println(stopWatch.prettyPrint());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 非密集匹配 测试构建树和查找 中文字符,并与wordTree对比效率,
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testFindCNCharNotDensity() {
|
|
||||||
StopWatch stopWatch = new StopWatch();
|
|
||||||
String input = "赵啊三在做什么";
|
|
||||||
|
|
||||||
Automaton automatonLocal = new Automaton();
|
|
||||||
automatonLocal.insert("赵", "赵啊", "赵啊三");
|
|
||||||
automatonLocal.buildAc();
|
|
||||||
|
|
||||||
stopWatch.start("automaton_cn_find_not_density");
|
|
||||||
final List<FoundWord> result = automatonLocal.find(input, false);
|
|
||||||
stopWatch.stop();
|
|
||||||
|
|
||||||
Assert.assertEquals(1, result.size());
|
|
||||||
Assert.assertEquals("赵", result.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
|
||||||
assertEquals(Integer.valueOf(0), result.get(0).getStartIndex());
|
|
||||||
assertEquals(Integer.valueOf(0), result.get(0).getEndIndex());
|
|
||||||
|
|
||||||
WordTree wordTreeLocal = new WordTree();
|
|
||||||
wordTreeLocal.addWords("赵", "赵啊", "赵啊三");
|
|
||||||
|
|
||||||
stopWatch.start("wordtree_cn_find_not_density");
|
|
||||||
final List<String> result1 =
|
|
||||||
wordTreeLocal.matchAllWords(input, -1, false, true).stream().map(FoundWord::getWord)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
stopWatch.stop();
|
|
||||||
|
|
||||||
Assert.assertEquals(1, result1.size());
|
|
||||||
Assert.assertEquals("赵", String.join(",", result1));
|
|
||||||
|
|
||||||
System.out.println(stopWatch.prettyPrint());
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
228
hutool-core/src/test/java/cn/hutool/core/text/dfa/NFATest.java
Normal file
228
hutool-core/src/test/java/cn/hutool/core/text/dfa/NFATest.java
Normal file
@ -0,0 +1,228 @@
|
|||||||
|
package cn.hutool.core.text.dfa;
|
||||||
|
|
||||||
|
import cn.hutool.core.date.StopWatch;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public class NFATest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 密集匹配 测试查找结果,并与WordTree对比效率
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFind() {
|
||||||
|
final NFA NFA = new NFA();
|
||||||
|
NFA.insert("say", "her", "he", "she", "shr");
|
||||||
|
NFA.buildAc();
|
||||||
|
|
||||||
|
final WordTree wordTree = new WordTree();
|
||||||
|
wordTree.addWords("say", "her", "he", "she", "shr");
|
||||||
|
|
||||||
|
final StopWatch stopWatch = new StopWatch();
|
||||||
|
final String input = "sasherhsay";
|
||||||
|
|
||||||
|
stopWatch.start("automaton_char_find");
|
||||||
|
final List<FoundWord> ans1 = NFA.find(input);
|
||||||
|
stopWatch.stop();
|
||||||
|
|
||||||
|
Assert.assertEquals("she,he,her,say", ans1.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
||||||
|
Assert.assertEquals(2, ans1.get(0).getBeginIndex().intValue());
|
||||||
|
Assert.assertEquals(4, ans1.get(0).getEndIndex().intValue());
|
||||||
|
Assert.assertEquals(3, ans1.get(1).getBeginIndex().intValue());
|
||||||
|
Assert.assertEquals(4, ans1.get(1).getEndIndex().intValue());
|
||||||
|
Assert.assertEquals(3, ans1.get(2).getBeginIndex().intValue());
|
||||||
|
Assert.assertEquals(5, ans1.get(2).getEndIndex().intValue());
|
||||||
|
Assert.assertEquals(7, ans1.get(3).getBeginIndex().intValue());
|
||||||
|
Assert.assertEquals(9, ans1.get(3).getEndIndex().intValue());
|
||||||
|
|
||||||
|
stopWatch.start("wordtree_char_find");
|
||||||
|
final List<String> ans2 = wordTree.matchAll(input, -1, true, true);
|
||||||
|
stopWatch.stop();
|
||||||
|
Assert.assertEquals("she,he,her,say", String.join(",", ans2));
|
||||||
|
|
||||||
|
//Console.log(stopWatch.prettyPrint());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 非密集匹配 测试查找结果,并与WordTree对比效率
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFindNotDensity() {
|
||||||
|
final NFA NFA = new NFA();
|
||||||
|
NFA.insert("say", "her", "he", "she", "shr");
|
||||||
|
NFA.buildAc();
|
||||||
|
|
||||||
|
final WordTree wordTree = new WordTree();
|
||||||
|
wordTree.addWords("say", "her", "he", "she", "shr");
|
||||||
|
|
||||||
|
final StopWatch stopWatch = new StopWatch();
|
||||||
|
final String input = "sasherhsay";
|
||||||
|
|
||||||
|
stopWatch.start("automaton_char_find_not_density");
|
||||||
|
final List<FoundWord> ans1 = NFA.find(input, false);
|
||||||
|
stopWatch.stop();
|
||||||
|
Assert.assertEquals("she,say", ans1.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
||||||
|
Assert.assertEquals(2, ans1.get(0).getBeginIndex().intValue());
|
||||||
|
Assert.assertEquals(4, ans1.get(0).getEndIndex().intValue());
|
||||||
|
Assert.assertEquals(7, ans1.get(1).getBeginIndex().intValue());
|
||||||
|
Assert.assertEquals(9, ans1.get(1).getEndIndex().intValue());
|
||||||
|
|
||||||
|
stopWatch.start("wordtree_char_find_not_density");
|
||||||
|
final List<String> ans2 = wordTree.matchAll(input, -1, false, true);
|
||||||
|
stopWatch.stop();
|
||||||
|
Assert.assertEquals("she,say", String.join(",", ans2));
|
||||||
|
|
||||||
|
//Console.log(stopWatch.prettyPrint());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 密集匹配 测试建树和查找,并与WordTree对比效率
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testBuildAndFind() {
|
||||||
|
final StopWatch stopWatch = new StopWatch();
|
||||||
|
final String input = "sasherhsay";
|
||||||
|
|
||||||
|
stopWatch.start("automaton_char_buid_find");
|
||||||
|
final NFA NFALocal = new NFA();
|
||||||
|
NFALocal.insert("say", "her", "he", "she", "shr");
|
||||||
|
NFALocal.buildAc();
|
||||||
|
final List<FoundWord> ans1 = NFALocal.find(input);
|
||||||
|
stopWatch.stop();
|
||||||
|
|
||||||
|
Assert.assertEquals("she,he,her,say", ans1.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
||||||
|
Assert.assertEquals(2, ans1.get(0).getBeginIndex().intValue());
|
||||||
|
Assert.assertEquals(4, ans1.get(0).getEndIndex().intValue());
|
||||||
|
Assert.assertEquals(3, ans1.get(1).getBeginIndex().intValue());
|
||||||
|
Assert.assertEquals(4, ans1.get(1).getEndIndex().intValue());
|
||||||
|
Assert.assertEquals(3, ans1.get(2).getBeginIndex().intValue());
|
||||||
|
Assert.assertEquals(5, ans1.get(2).getEndIndex().intValue());
|
||||||
|
Assert.assertEquals(7, ans1.get(3).getBeginIndex().intValue());
|
||||||
|
Assert.assertEquals(9, ans1.get(3).getEndIndex().intValue());
|
||||||
|
|
||||||
|
stopWatch.start("wordtree_char_build_find");
|
||||||
|
final WordTree wordTreeLocal = new WordTree();
|
||||||
|
wordTreeLocal.addWords("say", "her", "he", "she", "shr");
|
||||||
|
final List<String> ans2 = wordTreeLocal.matchAll(input, -1, true, true);
|
||||||
|
stopWatch.stop();
|
||||||
|
Assert.assertEquals("she,he,her,say", String.join(",", ans2));
|
||||||
|
|
||||||
|
//Console.log(stopWatch.prettyPrint());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 密集匹配 构建树和查找 测试中文字符,并与wordTree对比效率
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void buildFindCnCharTest() {
|
||||||
|
final StopWatch stopWatch = new StopWatch();
|
||||||
|
final String input = "赵啊三在做什么";
|
||||||
|
|
||||||
|
stopWatch.start("automaton_cn_build_find");
|
||||||
|
final NFA NFALocal = new NFA();
|
||||||
|
NFALocal.insert("赵", "赵啊", "赵啊三");
|
||||||
|
NFALocal.buildAc();
|
||||||
|
|
||||||
|
final List<FoundWord> result = NFALocal.find(input);
|
||||||
|
stopWatch.stop();
|
||||||
|
|
||||||
|
Assert.assertEquals(3, result.size());
|
||||||
|
Assert.assertEquals("赵,赵啊,赵啊三", result.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
||||||
|
Assert.assertEquals(Integer.valueOf(0), result.get(0).getBeginIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(0), result.get(0).getEndIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(0), result.get(1).getBeginIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(1), result.get(1).getEndIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(0), result.get(2).getBeginIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(2), result.get(2).getEndIndex());
|
||||||
|
|
||||||
|
stopWatch.start("wordtree_cn_build_find");
|
||||||
|
final WordTree wordTreeLocal = new WordTree();
|
||||||
|
wordTreeLocal.addWords("赵", "赵啊", "赵啊三");
|
||||||
|
|
||||||
|
final List<String> result1 = wordTreeLocal.matchAll(input, -1, true, true);
|
||||||
|
stopWatch.stop();
|
||||||
|
|
||||||
|
Assert.assertEquals(3, result1.size());
|
||||||
|
Assert.assertEquals("赵,赵啊,赵啊三", String.join(",", result1));
|
||||||
|
|
||||||
|
//Console.log(stopWatch.prettyPrint());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 密集匹配 测试构建树和查找 中文字符,并与wordTree对比效率
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFindCNChar() {
|
||||||
|
final StopWatch stopWatch = new StopWatch();
|
||||||
|
final String input = "赵啊三在做什么";
|
||||||
|
|
||||||
|
final NFA NFALocal = new NFA();
|
||||||
|
NFALocal.insert("赵", "赵啊", "赵啊三");
|
||||||
|
NFALocal.buildAc();
|
||||||
|
|
||||||
|
stopWatch.start("automaton_cn_find");
|
||||||
|
final List<FoundWord> result = NFALocal.find(input);
|
||||||
|
stopWatch.stop();
|
||||||
|
|
||||||
|
Assert.assertEquals(3, result.size());
|
||||||
|
Assert.assertEquals("赵,赵啊,赵啊三", result.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
||||||
|
Assert.assertEquals(Integer.valueOf(0), result.get(0).getBeginIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(0), result.get(0).getEndIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(0), result.get(1).getBeginIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(1), result.get(1).getEndIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(0), result.get(2).getBeginIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(2), result.get(2).getEndIndex());
|
||||||
|
|
||||||
|
final WordTree wordTreeLocal = new WordTree();
|
||||||
|
wordTreeLocal.addWords("赵", "赵啊", "赵啊三");
|
||||||
|
|
||||||
|
stopWatch.start("wordtree_cn_find");
|
||||||
|
final List<String> result1 = wordTreeLocal.matchAllWords(input, -1, true, true).stream().map(FoundWord::getWord)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
stopWatch.stop();
|
||||||
|
|
||||||
|
Assert.assertEquals(3, result1.size());
|
||||||
|
Assert.assertEquals("赵,赵啊,赵啊三", String.join(",", result1));
|
||||||
|
|
||||||
|
//Console.log(stopWatch.prettyPrint());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 非密集匹配 测试构建树和查找 中文字符,并与wordTree对比效率,
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFindCNCharNotDensity() {
|
||||||
|
final StopWatch stopWatch = new StopWatch();
|
||||||
|
final String input = "赵啊三在做什么";
|
||||||
|
|
||||||
|
final NFA NFALocal = new NFA();
|
||||||
|
NFALocal.insert("赵", "赵啊", "赵啊三");
|
||||||
|
NFALocal.buildAc();
|
||||||
|
|
||||||
|
stopWatch.start("automaton_cn_find_not_density");
|
||||||
|
final List<FoundWord> result = NFALocal.find(input, false);
|
||||||
|
stopWatch.stop();
|
||||||
|
|
||||||
|
Assert.assertEquals(1, result.size());
|
||||||
|
Assert.assertEquals("赵", result.stream().map(FoundWord::getWord).collect(Collectors.joining(",")));
|
||||||
|
Assert.assertEquals(Integer.valueOf(0), result.get(0).getBeginIndex());
|
||||||
|
Assert.assertEquals(Integer.valueOf(0), result.get(0).getEndIndex());
|
||||||
|
|
||||||
|
final WordTree wordTreeLocal = new WordTree();
|
||||||
|
wordTreeLocal.addWords("赵", "赵啊", "赵啊三");
|
||||||
|
|
||||||
|
stopWatch.start("wordtree_cn_find_not_density");
|
||||||
|
final List<String> result1 =
|
||||||
|
wordTreeLocal.matchAllWords(input, -1, false, true).stream().map(FoundWord::getWord)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
stopWatch.stop();
|
||||||
|
|
||||||
|
Assert.assertEquals(1, result1.size());
|
||||||
|
Assert.assertEquals("赵", String.join(",", result1));
|
||||||
|
|
||||||
|
//Console.log(stopWatch.prettyPrint());
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user