diff --git a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/TokenizerEngine.java b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/TokenizerEngine.java
index d6e097d0b..c0a780585 100644
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/TokenizerEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/TokenizerEngine.java
@@ -15,10 +15,10 @@ package org.dromara.hutool.extra.tokenizer.engine;
import org.dromara.hutool.extra.tokenizer.Result;
/**
- * 分词引擎接口定义,用户通过实现此接口完成特定分词引擎的适配
+ * 分词引擎接口定义,用户通过实现此接口完成特定分词引擎的适配
+ * 由于引擎使用单例模式,因此要求实现类保证线程安全
*
* @author looly
- *
*/
public interface TokenizerEngine {
diff --git a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/hanlp/HanLPEngine.java b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/hanlp/HanLPEngine.java
index 7b48e2f3c..e3bdc7ad0 100644
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/hanlp/HanLPEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/hanlp/HanLPEngine.java
@@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.Result;
/**
* HanLP分词引擎实现
- * 项目地址:https://github.com/hankcs/HanLP
+ * 项目地址:https://github.com/hankcs/HanLP
+ * {@link Segment#seg(String)}方法线程安全
*
* @author looly
- *
*/
public class HanLPEngine implements TokenizerEngine {
@@ -32,7 +32,6 @@ public class HanLPEngine implements TokenizerEngine {
/**
* 构造
- *
*/
public HanLPEngine() {
this(HanLP.newSegment());
diff --git a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/ikanalyzer/IKAnalyzerEngine.java b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/ikanalyzer/IKAnalyzerEngine.java
index 337ec2321..59f67892a 100644
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/ikanalyzer/IKAnalyzerEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/ikanalyzer/IKAnalyzerEngine.java
@@ -12,6 +12,8 @@
package org.dromara.hutool.extra.tokenizer.engine.ikanalyzer;
+import org.wltea.analyzer.cfg.Configuration;
+import org.wltea.analyzer.cfg.DefaultConfig;
import org.wltea.analyzer.core.IKSegmenter;
import org.dromara.hutool.core.text.StrUtil;
@@ -20,35 +22,44 @@ import org.dromara.hutool.extra.tokenizer.Result;
/**
* IKAnalyzer分词引擎实现
- * 项目地址:https://github.com/yozhao/IKAnalyzer
+ * 项目地址:https://github.com/yozhao/IKAnalyzer
+ * {@link IKSegmenter} 非线程全,因此每次单独创建对象
*
* @author looly
- *
*/
public class IKAnalyzerEngine implements TokenizerEngine {
- private final IKSegmenter seg;
+ private final Configuration cfg;
/**
* 构造
- *
*/
public IKAnalyzerEngine() {
- this(new IKSegmenter(null, true));
+ this(createDefaultConfig());
}
/**
* 构造
- *
- * @param seg {@link IKSegmenter}
+ * @param cfg 配置
*/
- public IKAnalyzerEngine(final IKSegmenter seg) {
- this.seg = seg;
+ public IKAnalyzerEngine(final Configuration cfg) {
+ cfg.setUseSmart(true);
+ this.cfg = cfg;
}
@Override
public Result parse(final CharSequence text) {
- this.seg.reset(StrUtil.getReader(text));
- return new IKAnalyzerResult(this.seg);
+ final IKSegmenter seg = new IKSegmenter(StrUtil.getReader(text), cfg);
+ return new IKAnalyzerResult(seg);
+ }
+
+ /**
+ * 创建默认配置
+ * @return {@link Configuration}
+ */
+ private static Configuration createDefaultConfig(){
+ final Configuration configuration = DefaultConfig.getInstance();
+ configuration.setUseSmart(true);
+ return configuration;
}
}
diff --git a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jcseg/JcsegEngine.java b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jcseg/JcsegEngine.java
index 41a277ff8..cb67d6a05 100644
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jcseg/JcsegEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jcseg/JcsegEngine.java
@@ -26,45 +26,45 @@ import java.io.StringReader;
/**
* Jcseg分词引擎实现
- * 项目地址:https://gitee.com/lionsoul/jcseg
+ * 项目地址:https://gitee.com/lionsoul/jcseg
+ * {@link ISegment}非线程安全,每次单独创建
*
* @author looly
- *
*/
public class JcsegEngine implements TokenizerEngine {
- private final ISegment segment;
+ private final SegmenterConfig config;
+ private final ADictionary dic;
/**
* 构造
*/
public JcsegEngine() {
// 创建SegmenterConfig分词配置实例,自动查找加载jcseg.properties配置项来初始化
- final SegmenterConfig config = new SegmenterConfig(true);
- // 创建默认单例词库实现,并且按照config配置加载词库
- final ADictionary dic = DictionaryFactory.createSingletonDictionary(config);
-
- // 依据给定的ADictionary和SegmenterConfig来创建ISegment
- this.segment = ISegment.COMPLEX.factory.create(config, dic);
+ this(new SegmenterConfig(true));
}
/**
* 构造
*
- * @param segment {@link ISegment}
+ * @param config {@link SegmenterConfig}
*/
- public JcsegEngine(final ISegment segment) {
- this.segment = segment;
+ public JcsegEngine(final SegmenterConfig config) {
+ this.config = config;
+ // 创建默认单例词库实现,并且按照config配置加载词库
+ this.dic = DictionaryFactory.createSingletonDictionary(config);
}
@Override
public Result parse(final CharSequence text) {
+ // 依据给定的ADictionary和SegmenterConfig来创建ISegment
+ final ISegment segment = ISegment.COMPLEX.factory.create(config, dic);
try {
- this.segment.reset(new StringReader(StrUtil.str(text)));
+ segment.reset(new StringReader(StrUtil.str(text)));
} catch (final IOException e) {
throw new TokenizerException(e);
}
- return new JcsegResult(this.segment);
+ return new JcsegResult(segment);
}
}
diff --git a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jieba/JiebaEngine.java b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jieba/JiebaEngine.java
index 0e5d9f859..b5a74cac6 100644
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jieba/JiebaEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jieba/JiebaEngine.java
@@ -22,6 +22,7 @@ import org.dromara.hutool.extra.tokenizer.Result;
/**
* Jieba分词引擎实现
* 项目地址:https://github.com/huaban/jieba-analysis
+ * {@link JiebaSegmenter#process(String, SegMode)} 线程安全
*
* @author looly
*
diff --git a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mmseg/MmsegEngine.java b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mmseg/MmsegEngine.java
index 047edf29f..4999d63f0 100644
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mmseg/MmsegEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mmseg/MmsegEngine.java
@@ -12,6 +12,7 @@
package org.dromara.hutool.extra.tokenizer.engine.mmseg;
+import com.chenlb.mmseg4j.Seg;
import org.dromara.hutool.core.text.StrUtil;
import org.dromara.hutool.extra.tokenizer.Result;
import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
@@ -23,37 +24,35 @@ import java.io.StringReader;
/**
* mmseg4j分词引擎实现
- * 项目地址:https://github.com/chenlb/mmseg4j-core
+ * 项目地址:https://github.com/chenlb/mmseg4j-core
+ * {@link MMSeg}非线程安全,故单独创建之
*
* @author looly
- *
*/
public class MmsegEngine implements TokenizerEngine {
- private final MMSeg mmSeg;
+ private final Seg seg;
/**
* 构造
*/
public MmsegEngine() {
- final Dictionary dict = Dictionary.getInstance();
- final ComplexSeg seg = new ComplexSeg(dict);
- this.mmSeg = new MMSeg(new StringReader(""), seg);
+ this(new ComplexSeg(Dictionary.getInstance()));
}
/**
* 构造
*
- * @param mmSeg 模式{@link MMSeg}
+ * @param seg 模式{@link Seg}
*/
- public MmsegEngine(final MMSeg mmSeg) {
- this.mmSeg = mmSeg;
+ public MmsegEngine(final Seg seg) {
+ this.seg = seg;
}
@Override
public Result parse(final CharSequence text) {
- this.mmSeg.reset(StrUtil.getReader(text));
- return new MmsegResult(this.mmSeg);
+ final MMSeg mmSeg = new MMSeg(StrUtil.getReader(text), seg);
+ return new MmsegResult(mmSeg);
}
}
diff --git a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mynlp/MynlpEngine.java b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mynlp/MynlpEngine.java
index c3a861177..7fca5d9b4 100644
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mynlp/MynlpEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mynlp/MynlpEngine.java
@@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
/**
* MYNLP 中文NLP工具包分词实现
- * 项目地址:https://github.com/mayabot/mynlp/
+ * 项目地址:https://github.com/mayabot/mynlp/
+ * {@link Lexer} 线程安全
*
* @author looly
- *
*/
public class MynlpEngine implements TokenizerEngine {
diff --git a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/word/WordEngine.java b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/word/WordEngine.java
index 497c5dde7..71722c99a 100644
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/word/WordEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/word/WordEngine.java
@@ -22,7 +22,8 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
/**
* Word分词引擎实现
- * 项目地址:https://github.com/ysc/word
+ * 项目地址:https://github.com/ysc/word
+ * {@link Segmentation} 线程安全
*
* @author looly
*