mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-04-19 03:01:48 +08:00
fix thread safe
This commit is contained in:
parent
9a5fd52e9f
commit
050021912b
@ -15,10 +15,10 @@ package org.dromara.hutool.extra.tokenizer.engine;
|
||||
import org.dromara.hutool.extra.tokenizer.Result;
|
||||
|
||||
/**
|
||||
* 分词引擎接口定义,用户通过实现此接口完成特定分词引擎的适配
|
||||
* 分词引擎接口定义,用户通过实现此接口完成特定分词引擎的适配<br>
|
||||
* 由于引擎使用单例模式,因此要求实现类保证线程安全
|
||||
*
|
||||
* @author looly
|
||||
*
|
||||
*/
|
||||
public interface TokenizerEngine {
|
||||
|
||||
|
@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.Result;
|
||||
|
||||
/**
|
||||
* HanLP分词引擎实现<br>
|
||||
* 项目地址:https://github.com/hankcs/HanLP
|
||||
* 项目地址:https://github.com/hankcs/HanLP<br>
|
||||
* {@link Segment#seg(String)}方法线程安全
|
||||
*
|
||||
* @author looly
|
||||
*
|
||||
*/
|
||||
public class HanLPEngine implements TokenizerEngine {
|
||||
|
||||
@ -32,7 +32,6 @@ public class HanLPEngine implements TokenizerEngine {
|
||||
|
||||
/**
|
||||
* 构造
|
||||
*
|
||||
*/
|
||||
public HanLPEngine() {
|
||||
this(HanLP.newSegment());
|
||||
|
@ -12,6 +12,8 @@
|
||||
|
||||
package org.dromara.hutool.extra.tokenizer.engine.ikanalyzer;
|
||||
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
import org.wltea.analyzer.cfg.DefaultConfig;
|
||||
import org.wltea.analyzer.core.IKSegmenter;
|
||||
|
||||
import org.dromara.hutool.core.text.StrUtil;
|
||||
@ -20,35 +22,44 @@ import org.dromara.hutool.extra.tokenizer.Result;
|
||||
|
||||
/**
|
||||
* IKAnalyzer分词引擎实现<br>
|
||||
* 项目地址:https://github.com/yozhao/IKAnalyzer
|
||||
* 项目地址:https://github.com/yozhao/IKAnalyzer<br>
|
||||
* {@link IKSegmenter} 非线程全,因此每次单独创建对象
|
||||
*
|
||||
* @author looly
|
||||
*
|
||||
*/
|
||||
public class IKAnalyzerEngine implements TokenizerEngine {
|
||||
|
||||
private final IKSegmenter seg;
|
||||
private final Configuration cfg;
|
||||
|
||||
/**
|
||||
* 构造
|
||||
*
|
||||
*/
|
||||
public IKAnalyzerEngine() {
|
||||
this(new IKSegmenter(null, true));
|
||||
this(createDefaultConfig());
|
||||
}
|
||||
|
||||
/**
|
||||
* 构造
|
||||
*
|
||||
* @param seg {@link IKSegmenter}
|
||||
* @param cfg 配置
|
||||
*/
|
||||
public IKAnalyzerEngine(final IKSegmenter seg) {
|
||||
this.seg = seg;
|
||||
public IKAnalyzerEngine(final Configuration cfg) {
|
||||
cfg.setUseSmart(true);
|
||||
this.cfg = cfg;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Result parse(final CharSequence text) {
|
||||
this.seg.reset(StrUtil.getReader(text));
|
||||
return new IKAnalyzerResult(this.seg);
|
||||
final IKSegmenter seg = new IKSegmenter(StrUtil.getReader(text), cfg);
|
||||
return new IKAnalyzerResult(seg);
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建默认配置
|
||||
* @return {@link Configuration}
|
||||
*/
|
||||
private static Configuration createDefaultConfig(){
|
||||
final Configuration configuration = DefaultConfig.getInstance();
|
||||
configuration.setUseSmart(true);
|
||||
return configuration;
|
||||
}
|
||||
}
|
||||
|
@ -26,45 +26,45 @@ import java.io.StringReader;
|
||||
|
||||
/**
|
||||
* Jcseg分词引擎实现<br>
|
||||
* 项目地址:https://gitee.com/lionsoul/jcseg
|
||||
* 项目地址:https://gitee.com/lionsoul/jcseg<br>
|
||||
* {@link ISegment}非线程安全,每次单独创建
|
||||
*
|
||||
* @author looly
|
||||
*
|
||||
*/
|
||||
public class JcsegEngine implements TokenizerEngine {
|
||||
|
||||
private final ISegment segment;
|
||||
private final SegmenterConfig config;
|
||||
private final ADictionary dic;
|
||||
|
||||
/**
|
||||
* 构造
|
||||
*/
|
||||
public JcsegEngine() {
|
||||
// 创建SegmenterConfig分词配置实例,自动查找加载jcseg.properties配置项来初始化
|
||||
final SegmenterConfig config = new SegmenterConfig(true);
|
||||
// 创建默认单例词库实现,并且按照config配置加载词库
|
||||
final ADictionary dic = DictionaryFactory.createSingletonDictionary(config);
|
||||
|
||||
// 依据给定的ADictionary和SegmenterConfig来创建ISegment
|
||||
this.segment = ISegment.COMPLEX.factory.create(config, dic);
|
||||
this(new SegmenterConfig(true));
|
||||
}
|
||||
|
||||
/**
|
||||
* 构造
|
||||
*
|
||||
* @param segment {@link ISegment}
|
||||
* @param config {@link SegmenterConfig}
|
||||
*/
|
||||
public JcsegEngine(final ISegment segment) {
|
||||
this.segment = segment;
|
||||
public JcsegEngine(final SegmenterConfig config) {
|
||||
this.config = config;
|
||||
// 创建默认单例词库实现,并且按照config配置加载词库
|
||||
this.dic = DictionaryFactory.createSingletonDictionary(config);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Result parse(final CharSequence text) {
|
||||
// 依据给定的ADictionary和SegmenterConfig来创建ISegment
|
||||
final ISegment segment = ISegment.COMPLEX.factory.create(config, dic);
|
||||
try {
|
||||
this.segment.reset(new StringReader(StrUtil.str(text)));
|
||||
segment.reset(new StringReader(StrUtil.str(text)));
|
||||
} catch (final IOException e) {
|
||||
throw new TokenizerException(e);
|
||||
}
|
||||
return new JcsegResult(this.segment);
|
||||
return new JcsegResult(segment);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ import org.dromara.hutool.extra.tokenizer.Result;
|
||||
/**
|
||||
* Jieba分词引擎实现<br>
|
||||
* 项目地址:https://github.com/huaban/jieba-analysis
|
||||
* {@link JiebaSegmenter#process(String, SegMode)} 线程安全
|
||||
*
|
||||
* @author looly
|
||||
*
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
package org.dromara.hutool.extra.tokenizer.engine.mmseg;
|
||||
|
||||
import com.chenlb.mmseg4j.Seg;
|
||||
import org.dromara.hutool.core.text.StrUtil;
|
||||
import org.dromara.hutool.extra.tokenizer.Result;
|
||||
import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
|
||||
@ -23,37 +24,35 @@ import java.io.StringReader;
|
||||
|
||||
/**
|
||||
* mmseg4j分词引擎实现<br>
|
||||
* 项目地址:https://github.com/chenlb/mmseg4j-core
|
||||
* 项目地址:https://github.com/chenlb/mmseg4j-core<br>
|
||||
* {@link MMSeg}非线程安全,故单独创建之
|
||||
*
|
||||
* @author looly
|
||||
*
|
||||
*/
|
||||
public class MmsegEngine implements TokenizerEngine {
|
||||
|
||||
private final MMSeg mmSeg;
|
||||
private final Seg seg;
|
||||
|
||||
/**
|
||||
* 构造
|
||||
*/
|
||||
public MmsegEngine() {
|
||||
final Dictionary dict = Dictionary.getInstance();
|
||||
final ComplexSeg seg = new ComplexSeg(dict);
|
||||
this.mmSeg = new MMSeg(new StringReader(""), seg);
|
||||
this(new ComplexSeg(Dictionary.getInstance()));
|
||||
}
|
||||
|
||||
/**
|
||||
* 构造
|
||||
*
|
||||
* @param mmSeg 模式{@link MMSeg}
|
||||
* @param seg 模式{@link Seg}
|
||||
*/
|
||||
public MmsegEngine(final MMSeg mmSeg) {
|
||||
this.mmSeg = mmSeg;
|
||||
public MmsegEngine(final Seg seg) {
|
||||
this.seg = seg;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Result parse(final CharSequence text) {
|
||||
this.mmSeg.reset(StrUtil.getReader(text));
|
||||
return new MmsegResult(this.mmSeg);
|
||||
final MMSeg mmSeg = new MMSeg(StrUtil.getReader(text), seg);
|
||||
return new MmsegResult(mmSeg);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
|
||||
|
||||
/**
|
||||
* MYNLP 中文NLP工具包分词实现<br>
|
||||
* 项目地址:https://github.com/mayabot/mynlp/
|
||||
* 项目地址:https://github.com/mayabot/mynlp/<br>
|
||||
* {@link Lexer} 线程安全
|
||||
*
|
||||
* @author looly
|
||||
*
|
||||
*/
|
||||
public class MynlpEngine implements TokenizerEngine {
|
||||
|
||||
|
@ -22,7 +22,8 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
|
||||
|
||||
/**
|
||||
* Word分词引擎实现<br>
|
||||
* 项目地址:https://github.com/ysc/word
|
||||
* 项目地址:https://github.com/ysc/word<br>
|
||||
* {@link Segmentation} 线程安全
|
||||
*
|
||||
* @author looly
|
||||
*
|
||||
|
Loading…
x
Reference in New Issue
Block a user