fix thread safe

This commit is contained in:
Looly 2023-12-14 19:14:18 +08:00
parent 9a5fd52e9f
commit 050021912b
8 changed files with 55 additions and 44 deletions

View File

@ -15,10 +15,10 @@ package org.dromara.hutool.extra.tokenizer.engine;
import org.dromara.hutool.extra.tokenizer.Result;
/**
* 分词引擎接口定义用户通过实现此接口完成特定分词引擎的适配
* 分词引擎接口定义用户通过实现此接口完成特定分词引擎的适配<br>
* 由于引擎使用单例模式因此要求实现类保证线程安全
*
* @author looly
*
*/
public interface TokenizerEngine {

View File

@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.Result;
/**
* HanLP分词引擎实现<br>
* 项目地址https://github.com/hankcs/HanLP
* 项目地址https://github.com/hankcs/HanLP<br>
* {@link Segment#seg(String)}方法线程安全
*
* @author looly
*
*/
public class HanLPEngine implements TokenizerEngine {
@ -32,7 +32,6 @@ public class HanLPEngine implements TokenizerEngine {
/**
* 构造
*
*/
public HanLPEngine() {
this(HanLP.newSegment());

View File

@ -12,6 +12,8 @@
package org.dromara.hutool.extra.tokenizer.engine.ikanalyzer;
import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.cfg.DefaultConfig;
import org.wltea.analyzer.core.IKSegmenter;
import org.dromara.hutool.core.text.StrUtil;
@ -20,35 +22,44 @@ import org.dromara.hutool.extra.tokenizer.Result;
/**
* IKAnalyzer分词引擎实现<br>
* 项目地址https://github.com/yozhao/IKAnalyzer
* 项目地址https://github.com/yozhao/IKAnalyzer<br>
* {@link IKSegmenter} 非线程全因此每次单独创建对象
*
* @author looly
*
*/
public class IKAnalyzerEngine implements TokenizerEngine {
private final IKSegmenter seg;
private final Configuration cfg;
/**
* 构造
*
*/
public IKAnalyzerEngine() {
this(new IKSegmenter(null, true));
this(createDefaultConfig());
}
/**
* 构造
*
* @param seg {@link IKSegmenter}
* @param cfg 配置
*/
public IKAnalyzerEngine(final IKSegmenter seg) {
this.seg = seg;
public IKAnalyzerEngine(final Configuration cfg) {
cfg.setUseSmart(true);
this.cfg = cfg;
}
@Override
public Result parse(final CharSequence text) {
this.seg.reset(StrUtil.getReader(text));
return new IKAnalyzerResult(this.seg);
final IKSegmenter seg = new IKSegmenter(StrUtil.getReader(text), cfg);
return new IKAnalyzerResult(seg);
}
/**
* 创建默认配置
* @return {@link Configuration}
*/
private static Configuration createDefaultConfig(){
final Configuration configuration = DefaultConfig.getInstance();
configuration.setUseSmart(true);
return configuration;
}
}

View File

@ -26,45 +26,45 @@ import java.io.StringReader;
/**
* Jcseg分词引擎实现<br>
* 项目地址https://gitee.com/lionsoul/jcseg
* 项目地址https://gitee.com/lionsoul/jcseg<br>
* {@link ISegment}非线程安全每次单独创建
*
* @author looly
*
*/
public class JcsegEngine implements TokenizerEngine {
private final ISegment segment;
private final SegmenterConfig config;
private final ADictionary dic;
/**
* 构造
*/
public JcsegEngine() {
// 创建SegmenterConfig分词配置实例自动查找加载jcseg.properties配置项来初始化
final SegmenterConfig config = new SegmenterConfig(true);
// 创建默认单例词库实现并且按照config配置加载词库
final ADictionary dic = DictionaryFactory.createSingletonDictionary(config);
// 依据给定的ADictionary和SegmenterConfig来创建ISegment
this.segment = ISegment.COMPLEX.factory.create(config, dic);
this(new SegmenterConfig(true));
}
/**
* 构造
*
* @param segment {@link ISegment}
* @param config {@link SegmenterConfig}
*/
public JcsegEngine(final ISegment segment) {
this.segment = segment;
public JcsegEngine(final SegmenterConfig config) {
this.config = config;
// 创建默认单例词库实现并且按照config配置加载词库
this.dic = DictionaryFactory.createSingletonDictionary(config);
}
@Override
public Result parse(final CharSequence text) {
// 依据给定的ADictionary和SegmenterConfig来创建ISegment
final ISegment segment = ISegment.COMPLEX.factory.create(config, dic);
try {
this.segment.reset(new StringReader(StrUtil.str(text)));
segment.reset(new StringReader(StrUtil.str(text)));
} catch (final IOException e) {
throw new TokenizerException(e);
}
return new JcsegResult(this.segment);
return new JcsegResult(segment);
}
}

View File

@ -22,6 +22,7 @@ import org.dromara.hutool.extra.tokenizer.Result;
/**
* Jieba分词引擎实现<br>
* 项目地址https://github.com/huaban/jieba-analysis
* {@link JiebaSegmenter#process(String, SegMode)} 线程安全
*
* @author looly
*

View File

@ -12,6 +12,7 @@
package org.dromara.hutool.extra.tokenizer.engine.mmseg;
import com.chenlb.mmseg4j.Seg;
import org.dromara.hutool.core.text.StrUtil;
import org.dromara.hutool.extra.tokenizer.Result;
import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
@ -23,37 +24,35 @@ import java.io.StringReader;
/**
* mmseg4j分词引擎实现<br>
* 项目地址https://github.com/chenlb/mmseg4j-core
* 项目地址https://github.com/chenlb/mmseg4j-core<br>
* {@link MMSeg}非线程安全故单独创建之
*
* @author looly
*
*/
public class MmsegEngine implements TokenizerEngine {
private final MMSeg mmSeg;
private final Seg seg;
/**
* 构造
*/
public MmsegEngine() {
final Dictionary dict = Dictionary.getInstance();
final ComplexSeg seg = new ComplexSeg(dict);
this.mmSeg = new MMSeg(new StringReader(""), seg);
this(new ComplexSeg(Dictionary.getInstance()));
}
/**
* 构造
*
* @param mmSeg 模式{@link MMSeg}
* @param seg 模式{@link Seg}
*/
public MmsegEngine(final MMSeg mmSeg) {
this.mmSeg = mmSeg;
public MmsegEngine(final Seg seg) {
this.seg = seg;
}
@Override
public Result parse(final CharSequence text) {
this.mmSeg.reset(StrUtil.getReader(text));
return new MmsegResult(this.mmSeg);
final MMSeg mmSeg = new MMSeg(StrUtil.getReader(text), seg);
return new MmsegResult(mmSeg);
}
}

View File

@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
/**
* MYNLP 中文NLP工具包分词实现<br>
* 项目地址https://github.com/mayabot/mynlp/
* 项目地址https://github.com/mayabot/mynlp/<br>
* {@link Lexer} 线程安全
*
* @author looly
*
*/
public class MynlpEngine implements TokenizerEngine {

View File

@ -22,7 +22,8 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
/**
* Word分词引擎实现<br>
* 项目地址https://github.com/ysc/word
* 项目地址https://github.com/ysc/word<br>
* {@link Segmentation} 线程安全
*
* @author looly
*