fix thread safe

2025-05-09 23:51:34 +08:00 · 2023-12-14 19:14:18 +08:00 · 2023-12-14 19:14:18 +08:00 · 050021912b
commit 050021912b
parent 9a5fd52e9f
8 changed files with 55 additions and 44 deletions
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/TokenizerEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/TokenizerEngine.java
@ -15,10 +15,10 @@ package org.dromara.hutool.extra.tokenizer.engine;
 import org.dromara.hutool.extra.tokenizer.Result;

 /**
- * 分词引擎接口定义，用户通过实现此接口完成特定分词引擎的适配
+ * 分词引擎接口定义，用户通过实现此接口完成特定分词引擎的适配<br>
+ * 由于引擎使用单例模式，因此要求实现类保证线程安全
 *
 * @author looly
- *
 */
 public interface TokenizerEngine {

--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/hanlp/HanLPEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/hanlp/HanLPEngine.java
@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.Result;

 /**
 * HanLP分词引擎实现<br>
- * 项目地址：https://github.com/hankcs/HanLP
+ * 项目地址：https://github.com/hankcs/HanLP<br>
+ * {@link Segment#seg(String)}方法线程安全
 *
 * @author looly
- *
 */
 public class HanLPEngine implements TokenizerEngine {

@ -32,7 +32,6 @@ public class HanLPEngine implements TokenizerEngine {

 	/**
 	 * 构造
-	 *
 	 */
 	public HanLPEngine() {
 		this(HanLP.newSegment());
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/ikanalyzer/IKAnalyzerEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/ikanalyzer/IKAnalyzerEngine.java
@ -12,6 +12,8 @@

 package org.dromara.hutool.extra.tokenizer.engine.ikanalyzer;

+import org.wltea.analyzer.cfg.Configuration;
+import org.wltea.analyzer.cfg.DefaultConfig;
 import org.wltea.analyzer.core.IKSegmenter;

 import org.dromara.hutool.core.text.StrUtil;
@ -20,35 +22,44 @@ import org.dromara.hutool.extra.tokenizer.Result;

 /**
 * IKAnalyzer分词引擎实现<br>
- * 项目地址：https://github.com/yozhao/IKAnalyzer
+ * 项目地址：https://github.com/yozhao/IKAnalyzer<br>
+ * {@link IKSegmenter} 非线程全，因此每次单独创建对象
 *
 * @author looly
- *
 */
 public class IKAnalyzerEngine implements TokenizerEngine {

-	private final IKSegmenter seg;
+	private final Configuration cfg;

 	/**
 	 * 构造
-	 *
 	 */
 	public IKAnalyzerEngine() {
-		this(new IKSegmenter(null, true));
+		this(createDefaultConfig());
 	}

 	/**
 	 * 构造
-	 *
-	 * @param seg {@link IKSegmenter}
+	 * @param cfg 配置
 	 */
-	public IKAnalyzerEngine(final IKSegmenter seg) {
-		this.seg = seg;
+	public IKAnalyzerEngine(final Configuration cfg) {
+		cfg.setUseSmart(true);
+		this.cfg = cfg;
 	}

 	@Override
 	public Result parse(final CharSequence text) {
-		this.seg.reset(StrUtil.getReader(text));
-		return new IKAnalyzerResult(this.seg);
+		final IKSegmenter seg = new IKSegmenter(StrUtil.getReader(text), cfg);
+		return new IKAnalyzerResult(seg);
+	}
+
+	/**
+	 * 创建默认配置
+	 * @return {@link Configuration}
+	 */
+	private static Configuration createDefaultConfig(){
+		final Configuration configuration = DefaultConfig.getInstance();
+		configuration.setUseSmart(true);
+		return configuration;
 	}
 }
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jcseg/JcsegEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jcseg/JcsegEngine.java
@ -26,45 +26,45 @@ import java.io.StringReader;

 /**
 * Jcseg分词引擎实现<br>
- * 项目地址：https://gitee.com/lionsoul/jcseg
+ * 项目地址：https://gitee.com/lionsoul/jcseg<br>
+ * {@link ISegment}非线程安全，每次单独创建
 *
 * @author looly
- *
 */
 public class JcsegEngine implements TokenizerEngine {

-	private final ISegment segment;
+	private final SegmenterConfig config;
+	private final ADictionary dic;

 	/**
 	 * 构造
 	 */
 	public JcsegEngine() {
 		// 创建SegmenterConfig分词配置实例，自动查找加载jcseg.properties配置项来初始化
-		final SegmenterConfig config = new SegmenterConfig(true);
-		// 创建默认单例词库实现，并且按照config配置加载词库
-		final ADictionary dic = DictionaryFactory.createSingletonDictionary(config);
-
-		// 依据给定的ADictionary和SegmenterConfig来创建ISegment
-		this.segment = ISegment.COMPLEX.factory.create(config, dic);
+		this(new SegmenterConfig(true));
 	}

 	/**
 	 * 构造
 	 *
-	 * @param segment {@link ISegment}
+	 * @param config {@link SegmenterConfig}
 	 */
-	public JcsegEngine(final ISegment segment) {
-		this.segment = segment;
+	public JcsegEngine(final SegmenterConfig config) {
+		this.config = config;
+		// 创建默认单例词库实现，并且按照config配置加载词库
+		this.dic = DictionaryFactory.createSingletonDictionary(config);
 	}

 	@Override
 	public Result parse(final CharSequence text) {
+		// 依据给定的ADictionary和SegmenterConfig来创建ISegment
+		final ISegment segment = ISegment.COMPLEX.factory.create(config, dic);
 		try {
-			this.segment.reset(new StringReader(StrUtil.str(text)));
+			segment.reset(new StringReader(StrUtil.str(text)));
 		} catch (final IOException e) {
 			throw new TokenizerException(e);
 		}
-		return new JcsegResult(this.segment);
+		return new JcsegResult(segment);
 	}

 }
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jieba/JiebaEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/jieba/JiebaEngine.java
@ -22,6 +22,7 @@ import org.dromara.hutool.extra.tokenizer.Result;
 /**
 * Jieba分词引擎实现<br>
 * 项目地址：https://github.com/huaban/jieba-analysis
+ * {@link JiebaSegmenter#process(String, SegMode)} 线程安全
 *
 * @author looly
 *
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mmseg/MmsegEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mmseg/MmsegEngine.java
@ -12,6 +12,7 @@

 package org.dromara.hutool.extra.tokenizer.engine.mmseg;

+import com.chenlb.mmseg4j.Seg;
 import org.dromara.hutool.core.text.StrUtil;
 import org.dromara.hutool.extra.tokenizer.Result;
 import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
@ -23,37 +24,35 @@ import java.io.StringReader;

 /**
 * mmseg4j分词引擎实现<br>
- * 项目地址：https://github.com/chenlb/mmseg4j-core
+ * 项目地址：https://github.com/chenlb/mmseg4j-core<br>
+ * {@link MMSeg}非线程安全，故单独创建之
 *
 * @author looly
- *
 */
 public class MmsegEngine implements TokenizerEngine {

-	private final MMSeg mmSeg;
+	private final Seg seg;

 	/**
 	 * 构造
 	 */
 	public MmsegEngine() {
-		final Dictionary dict = Dictionary.getInstance();
-		final ComplexSeg seg = new ComplexSeg(dict);
-		this.mmSeg = new MMSeg(new StringReader(""), seg);
+		this(new ComplexSeg(Dictionary.getInstance()));
 	}

 	/**
 	 * 构造
 	 *
-	 * @param mmSeg 模式{@link MMSeg}
+	 * @param seg 模式{@link Seg}
 	 */
-	public MmsegEngine(final MMSeg mmSeg) {
-		this.mmSeg = mmSeg;
+	public MmsegEngine(final Seg seg) {
+		this.seg = seg;
 	}

 	@Override
 	public Result parse(final CharSequence text) {
-		this.mmSeg.reset(StrUtil.getReader(text));
-		return new MmsegResult(this.mmSeg);
+		final MMSeg mmSeg = new MMSeg(StrUtil.getReader(text), seg);
+		return new MmsegResult(mmSeg);
 	}

 }
--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mynlp/MynlpEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/mynlp/MynlpEngine.java
@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;

 /**
 * MYNLP 中文NLP工具包分词实现<br>
- * 项目地址：https://github.com/mayabot/mynlp/
+ * 项目地址：https://github.com/mayabot/mynlp/<br>
+ * {@link Lexer} 线程安全
 *
 * @author looly
- *
 */
 public class MynlpEngine implements TokenizerEngine {

--- a/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/word/WordEngine.java
+++ b/hutool-extra/src/main/java/org/dromara/hutool/extra/tokenizer/engine/word/WordEngine.java
@ -22,7 +22,8 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;

 /**
 * Word分词引擎实现<br>
- * 项目地址：https://github.com/ysc/word
+ * 项目地址：https://github.com/ysc/word<br>
+ * {@link Segmentation} 线程安全
 *
 * @author looly
 *