mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-05-09 23:51:34 +08:00
fix thread safe
This commit is contained in:
parent
9a5fd52e9f
commit
050021912b
@ -15,10 +15,10 @@ package org.dromara.hutool.extra.tokenizer.engine;
|
|||||||
import org.dromara.hutool.extra.tokenizer.Result;
|
import org.dromara.hutool.extra.tokenizer.Result;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 分词引擎接口定义,用户通过实现此接口完成特定分词引擎的适配
|
* 分词引擎接口定义,用户通过实现此接口完成特定分词引擎的适配<br>
|
||||||
|
* 由于引擎使用单例模式,因此要求实现类保证线程安全
|
||||||
*
|
*
|
||||||
* @author looly
|
* @author looly
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public interface TokenizerEngine {
|
public interface TokenizerEngine {
|
||||||
|
|
||||||
|
@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.Result;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* HanLP分词引擎实现<br>
|
* HanLP分词引擎实现<br>
|
||||||
* 项目地址:https://github.com/hankcs/HanLP
|
* 项目地址:https://github.com/hankcs/HanLP<br>
|
||||||
|
* {@link Segment#seg(String)}方法线程安全
|
||||||
*
|
*
|
||||||
* @author looly
|
* @author looly
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class HanLPEngine implements TokenizerEngine {
|
public class HanLPEngine implements TokenizerEngine {
|
||||||
|
|
||||||
@ -32,7 +32,6 @@ public class HanLPEngine implements TokenizerEngine {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 构造
|
* 构造
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public HanLPEngine() {
|
public HanLPEngine() {
|
||||||
this(HanLP.newSegment());
|
this(HanLP.newSegment());
|
||||||
|
@ -12,6 +12,8 @@
|
|||||||
|
|
||||||
package org.dromara.hutool.extra.tokenizer.engine.ikanalyzer;
|
package org.dromara.hutool.extra.tokenizer.engine.ikanalyzer;
|
||||||
|
|
||||||
|
import org.wltea.analyzer.cfg.Configuration;
|
||||||
|
import org.wltea.analyzer.cfg.DefaultConfig;
|
||||||
import org.wltea.analyzer.core.IKSegmenter;
|
import org.wltea.analyzer.core.IKSegmenter;
|
||||||
|
|
||||||
import org.dromara.hutool.core.text.StrUtil;
|
import org.dromara.hutool.core.text.StrUtil;
|
||||||
@ -20,35 +22,44 @@ import org.dromara.hutool.extra.tokenizer.Result;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* IKAnalyzer分词引擎实现<br>
|
* IKAnalyzer分词引擎实现<br>
|
||||||
* 项目地址:https://github.com/yozhao/IKAnalyzer
|
* 项目地址:https://github.com/yozhao/IKAnalyzer<br>
|
||||||
|
* {@link IKSegmenter} 非线程全,因此每次单独创建对象
|
||||||
*
|
*
|
||||||
* @author looly
|
* @author looly
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class IKAnalyzerEngine implements TokenizerEngine {
|
public class IKAnalyzerEngine implements TokenizerEngine {
|
||||||
|
|
||||||
private final IKSegmenter seg;
|
private final Configuration cfg;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 构造
|
* 构造
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public IKAnalyzerEngine() {
|
public IKAnalyzerEngine() {
|
||||||
this(new IKSegmenter(null, true));
|
this(createDefaultConfig());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 构造
|
* 构造
|
||||||
*
|
* @param cfg 配置
|
||||||
* @param seg {@link IKSegmenter}
|
|
||||||
*/
|
*/
|
||||||
public IKAnalyzerEngine(final IKSegmenter seg) {
|
public IKAnalyzerEngine(final Configuration cfg) {
|
||||||
this.seg = seg;
|
cfg.setUseSmart(true);
|
||||||
|
this.cfg = cfg;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Result parse(final CharSequence text) {
|
public Result parse(final CharSequence text) {
|
||||||
this.seg.reset(StrUtil.getReader(text));
|
final IKSegmenter seg = new IKSegmenter(StrUtil.getReader(text), cfg);
|
||||||
return new IKAnalyzerResult(this.seg);
|
return new IKAnalyzerResult(seg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 创建默认配置
|
||||||
|
* @return {@link Configuration}
|
||||||
|
*/
|
||||||
|
private static Configuration createDefaultConfig(){
|
||||||
|
final Configuration configuration = DefaultConfig.getInstance();
|
||||||
|
configuration.setUseSmart(true);
|
||||||
|
return configuration;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,45 +26,45 @@ import java.io.StringReader;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Jcseg分词引擎实现<br>
|
* Jcseg分词引擎实现<br>
|
||||||
* 项目地址:https://gitee.com/lionsoul/jcseg
|
* 项目地址:https://gitee.com/lionsoul/jcseg<br>
|
||||||
|
* {@link ISegment}非线程安全,每次单独创建
|
||||||
*
|
*
|
||||||
* @author looly
|
* @author looly
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class JcsegEngine implements TokenizerEngine {
|
public class JcsegEngine implements TokenizerEngine {
|
||||||
|
|
||||||
private final ISegment segment;
|
private final SegmenterConfig config;
|
||||||
|
private final ADictionary dic;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 构造
|
* 构造
|
||||||
*/
|
*/
|
||||||
public JcsegEngine() {
|
public JcsegEngine() {
|
||||||
// 创建SegmenterConfig分词配置实例,自动查找加载jcseg.properties配置项来初始化
|
// 创建SegmenterConfig分词配置实例,自动查找加载jcseg.properties配置项来初始化
|
||||||
final SegmenterConfig config = new SegmenterConfig(true);
|
this(new SegmenterConfig(true));
|
||||||
// 创建默认单例词库实现,并且按照config配置加载词库
|
|
||||||
final ADictionary dic = DictionaryFactory.createSingletonDictionary(config);
|
|
||||||
|
|
||||||
// 依据给定的ADictionary和SegmenterConfig来创建ISegment
|
|
||||||
this.segment = ISegment.COMPLEX.factory.create(config, dic);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 构造
|
* 构造
|
||||||
*
|
*
|
||||||
* @param segment {@link ISegment}
|
* @param config {@link SegmenterConfig}
|
||||||
*/
|
*/
|
||||||
public JcsegEngine(final ISegment segment) {
|
public JcsegEngine(final SegmenterConfig config) {
|
||||||
this.segment = segment;
|
this.config = config;
|
||||||
|
// 创建默认单例词库实现,并且按照config配置加载词库
|
||||||
|
this.dic = DictionaryFactory.createSingletonDictionary(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Result parse(final CharSequence text) {
|
public Result parse(final CharSequence text) {
|
||||||
|
// 依据给定的ADictionary和SegmenterConfig来创建ISegment
|
||||||
|
final ISegment segment = ISegment.COMPLEX.factory.create(config, dic);
|
||||||
try {
|
try {
|
||||||
this.segment.reset(new StringReader(StrUtil.str(text)));
|
segment.reset(new StringReader(StrUtil.str(text)));
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
throw new TokenizerException(e);
|
throw new TokenizerException(e);
|
||||||
}
|
}
|
||||||
return new JcsegResult(this.segment);
|
return new JcsegResult(segment);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ import org.dromara.hutool.extra.tokenizer.Result;
|
|||||||
/**
|
/**
|
||||||
* Jieba分词引擎实现<br>
|
* Jieba分词引擎实现<br>
|
||||||
* 项目地址:https://github.com/huaban/jieba-analysis
|
* 项目地址:https://github.com/huaban/jieba-analysis
|
||||||
|
* {@link JiebaSegmenter#process(String, SegMode)} 线程安全
|
||||||
*
|
*
|
||||||
* @author looly
|
* @author looly
|
||||||
*
|
*
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
package org.dromara.hutool.extra.tokenizer.engine.mmseg;
|
package org.dromara.hutool.extra.tokenizer.engine.mmseg;
|
||||||
|
|
||||||
|
import com.chenlb.mmseg4j.Seg;
|
||||||
import org.dromara.hutool.core.text.StrUtil;
|
import org.dromara.hutool.core.text.StrUtil;
|
||||||
import org.dromara.hutool.extra.tokenizer.Result;
|
import org.dromara.hutool.extra.tokenizer.Result;
|
||||||
import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
|
import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
|
||||||
@ -23,37 +24,35 @@ import java.io.StringReader;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* mmseg4j分词引擎实现<br>
|
* mmseg4j分词引擎实现<br>
|
||||||
* 项目地址:https://github.com/chenlb/mmseg4j-core
|
* 项目地址:https://github.com/chenlb/mmseg4j-core<br>
|
||||||
|
* {@link MMSeg}非线程安全,故单独创建之
|
||||||
*
|
*
|
||||||
* @author looly
|
* @author looly
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class MmsegEngine implements TokenizerEngine {
|
public class MmsegEngine implements TokenizerEngine {
|
||||||
|
|
||||||
private final MMSeg mmSeg;
|
private final Seg seg;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 构造
|
* 构造
|
||||||
*/
|
*/
|
||||||
public MmsegEngine() {
|
public MmsegEngine() {
|
||||||
final Dictionary dict = Dictionary.getInstance();
|
this(new ComplexSeg(Dictionary.getInstance()));
|
||||||
final ComplexSeg seg = new ComplexSeg(dict);
|
|
||||||
this.mmSeg = new MMSeg(new StringReader(""), seg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 构造
|
* 构造
|
||||||
*
|
*
|
||||||
* @param mmSeg 模式{@link MMSeg}
|
* @param seg 模式{@link Seg}
|
||||||
*/
|
*/
|
||||||
public MmsegEngine(final MMSeg mmSeg) {
|
public MmsegEngine(final Seg seg) {
|
||||||
this.mmSeg = mmSeg;
|
this.seg = seg;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Result parse(final CharSequence text) {
|
public Result parse(final CharSequence text) {
|
||||||
this.mmSeg.reset(StrUtil.getReader(text));
|
final MMSeg mmSeg = new MMSeg(StrUtil.getReader(text), seg);
|
||||||
return new MmsegResult(this.mmSeg);
|
return new MmsegResult(mmSeg);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -21,10 +21,10 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* MYNLP 中文NLP工具包分词实现<br>
|
* MYNLP 中文NLP工具包分词实现<br>
|
||||||
* 项目地址:https://github.com/mayabot/mynlp/
|
* 项目地址:https://github.com/mayabot/mynlp/<br>
|
||||||
|
* {@link Lexer} 线程安全
|
||||||
*
|
*
|
||||||
* @author looly
|
* @author looly
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class MynlpEngine implements TokenizerEngine {
|
public class MynlpEngine implements TokenizerEngine {
|
||||||
|
|
||||||
|
@ -22,7 +22,8 @@ import org.dromara.hutool.extra.tokenizer.engine.TokenizerEngine;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Word分词引擎实现<br>
|
* Word分词引擎实现<br>
|
||||||
* 项目地址:https://github.com/ysc/word
|
* 项目地址:https://github.com/ysc/word<br>
|
||||||
|
* {@link Segmentation} 线程安全
|
||||||
*
|
*
|
||||||
* @author looly
|
* @author looly
|
||||||
*
|
*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user