fix thread safe

2025-05-09 23:51:34 +08:00 · 2023-12-14 19:14:18 +08:00 · 2023-12-14 19:14:18 +08:00 · 050021912b
commit 050021912b
parent 9a5fd52e9f
8 changed files with 55 additions and 44 deletions
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/TokenizerEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/TokenizerEngine.java
@ -15,10 +15,10 @@ package org.dromara.hutool.extra.tokenizer.engine;
 import org.dromara.hutool.extra.tokenizer.Result;
 /**
- * 分词引擎接口定义，用户通过实现此接口完成特定分词引擎的适配
+ * 分词引擎接口定义，用户通过实现此接口完成特定分词引擎的适配<br>
 * 由于引擎使用单例模式，因此要求实现类保证线程安全
 *
 * @author looly
 *
 */
 public interface TokenizerEngine {
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/hanlp/HanLPEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/hanlp/HanLPEngine.java
@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.Result;
 /**
 * HanLP分词引擎实现<br>
- * 项目地址：https://github.com/hankcs/HanLP
+ * 项目地址：https://github.com/hankcs/HanLP<br>
 * {@link Segment#seg(String)}方法线程安全
 *
 * @author looly
 *
 */
 public class HanLPEngine implements TokenizerEngine {
@ -32,7 +32,6 @@ public class HanLPEngine implements TokenizerEngine {
 	/**
 	 * 构造
 	 *
 	 */
 	public HanLPEngine() {
 		this(HanLP.newSegment());
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/ikanalyzer/IKAnalyzerEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/ikanalyzer/IKAnalyzerEngine.java
@ -12,6 +12,8 @@
 package org.dromara.hutool.extra.tokenizer.engine.ikanalyzer;
 import org.wltea.analyzer.cfg.Configuration;
 import org.wltea.analyzer.cfg.DefaultConfig;
 import org.wltea.analyzer.core.IKSegmenter;
 import org.dromara.hutool.core.text.StrUtil;
@ -20,35 +22,44 @@ import org.dromara.hutool.extra.tokenizer.Result;
 /**
 * IKAnalyzer分词引擎实现<br>
- * 项目地址：https://github.com/yozhao/IKAnalyzer
+ * 项目地址：https://github.com/yozhao/IKAnalyzer<br>
 * {@link IKSegmenter} 非线程全，因此每次单独创建对象
 *
 * @author looly
 *
 */
 public class IKAnalyzerEngine implements TokenizerEngine {
-	private final IKSegmenter seg;
+	private final Configuration cfg;
 	/**
 	 * 构造
 	 *
 	 */
 	public IKAnalyzerEngine() {
-		this(new IKSegmenter(null, true));
+		this(createDefaultConfig());
 	}
 	/**
 	 * 构造
-	 *
+	 * @param cfg 配置
 	 * @param seg {@link IKSegmenter}
 	 */
-	public IKAnalyzerEngine(final IKSegmenter seg) {
+	public IKAnalyzerEngine(final Configuration cfg) {
-		this.seg = seg;
+		cfg.setUseSmart(true);
 		this.cfg = cfg;
 	}
 	@Override
 	public Result parse(final CharSequence text) {
-		this.seg.reset(StrUtil.getReader(text));
+		final IKSegmenter seg = new IKSegmenter(StrUtil.getReader(text), cfg);
-		return new IKAnalyzerResult(this.seg);
+		return new IKAnalyzerResult(seg);
 	}
 	/**
 	 * 创建默认配置
 	 * @return {@link Configuration}
 	 */
 	private static Configuration createDefaultConfig(){
 		final Configuration configuration = DefaultConfig.getInstance();
 		configuration.setUseSmart(true);
 		return configuration;
 	}
 }
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jcseg/JcsegEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jcseg/JcsegEngine.java
@ -26,45 +26,45 @@ import java.io.StringReader;
 /**
 * Jcseg分词引擎实现<br>
- * 项目地址：https://gitee.com/lionsoul/jcseg
+ * 项目地址：https://gitee.com/lionsoul/jcseg<br>
 * {@link ISegment}非线程安全，每次单独创建
 *
 * @author looly
 *
 */
 public class JcsegEngine implements TokenizerEngine {
-	private final ISegment segment;
+	private final SegmenterConfig config;
 	private final ADictionary dic;
 	/**
 	 * 构造
 	 */
 	public JcsegEngine() {
 		// 创建SegmenterConfig分词配置实例，自动查找加载jcseg.properties配置项来初始化
-		final SegmenterConfig config = new SegmenterConfig(true);
+		this(new SegmenterConfig(true));
 		// 创建默认单例词库实现，并且按照config配置加载词库
 		final ADictionary dic = DictionaryFactory.createSingletonDictionary(config);
 		// 依据给定的ADictionary和SegmenterConfig来创建ISegment
 		this.segment = ISegment.COMPLEX.factory.create(config, dic);
 	}
 	/**
 	 * 构造
 	 *
-	 * @param segment {@link ISegment}
+	 * @param config {@link SegmenterConfig}
 	 */
-	public JcsegEngine(final ISegment segment) {
+	public JcsegEngine(final SegmenterConfig config) {
-		this.segment = segment;
+		this.config = config;
 		// 创建默认单例词库实现，并且按照config配置加载词库
 		this.dic = DictionaryFactory.createSingletonDictionary(config);
 	}
 	@Override
 	public Result parse(final CharSequence text) {
 		// 依据给定的ADictionary和SegmenterConfig来创建ISegment
 		final ISegment segment = ISegment.COMPLEX.factory.create(config, dic);
 		try {
-			this.segment.reset(new StringReader(StrUtil.str(text)));
+			segment.reset(new StringReader(StrUtil.str(text)));
 		} catch (final IOException e) {
 			throw new TokenizerException(e);
 		}
-		return new JcsegResult(this.segment);
+		return new JcsegResult(segment);
 	}
 }
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jieba/JiebaEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jieba/JiebaEngine.java
@ -22,6 +22,7 @@ import org.dromara.hutool.extra.tokenizer.Result;
 /**
 * Jieba分词引擎实现<br>
 * 项目地址：https://github.com/huaban/jieba-analysis
 * {@link JiebaSegmenter#process(String, SegMode)} 线程安全
 *
 * @author looly
 *
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mmseg/MmsegEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mmseg/MmsegEngine.java
@ -12,6 +12,7 @@
 package org.dromara.hutool.extra.tokenizer.engine.mmseg;
 import com.chenlb.mmseg4j.Seg;
 import org.dromara.hutool.core.text.StrUtil;
 import org.dromara.hutool.extra.tokenizer.Result;
 import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
@ -23,37 +24,35 @@ import java.io.StringReader;
 /**
 * mmseg4j分词引擎实现<br>
- * 项目地址：https://github.com/chenlb/mmseg4j-core
+ * 项目地址：https://github.com/chenlb/mmseg4j-core<br>
 * {@link MMSeg}非线程安全，故单独创建之
 *
 * @author looly
 *
 */
 public class MmsegEngine implements TokenizerEngine {
-	private final MMSeg mmSeg;
+	private final Seg seg;
 	/**
 	 * 构造
 	 */
 	public MmsegEngine() {
-		final Dictionary dict = Dictionary.getInstance();
+		this(new ComplexSeg(Dictionary.getInstance()));
 		final ComplexSeg seg = new ComplexSeg(dict);
 		this.mmSeg = new MMSeg(new StringReader(""), seg);
 	}
 	/**
 	 * 构造
 	 *
-	 * @param mmSeg 模式{@link MMSeg}
+	 * @param seg 模式{@link Seg}
 	 */
-	public MmsegEngine(final MMSeg mmSeg) {
+	public MmsegEngine(final Seg seg) {
-		this.mmSeg = mmSeg;
+		this.seg = seg;
 	}
 	@Override
 	public Result parse(final CharSequence text) {
-		this.mmSeg.reset(StrUtil.getReader(text));
+		final MMSeg mmSeg = new MMSeg(StrUtil.getReader(text), seg);
-		return new MmsegResult(this.mmSeg);
+		return new MmsegResult(mmSeg);
 	}
 }
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mynlp/MynlpEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mynlp/MynlpEngine.java
@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
 /**
 * MYNLP 中文NLP工具包分词实现<br>
- * 项目地址：https://github.com/mayabot/mynlp/
+ * 项目地址：https://github.com/mayabot/mynlp/<br>
 * {@link Lexer} 线程安全
 *
 * @author looly
 *
 */
 public class MynlpEngine implements TokenizerEngine {
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/word/WordEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/word/WordEngine.java
@ -22,7 +22,8 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
 /**
 * Word分词引擎实现<br>
- * 项目地址：https://github.com/ysc/word
+ * 项目地址：https://github.com/ysc/word<br>
 * {@link Segmentation} 线程安全
 *
 * @author looly
 *