From bebacf2d00341ac49903f13372b0190849b8f08c Mon Sep 17 00:00:00 2001 From: Looly Date: Sat, 25 Sep 2021 18:20:12 +0800 Subject: [PATCH] fix split --- .../java/cn/hutool/core/lang/Matcher.java | 5 +- .../cn/hutool/core/text/CharSequenceUtil.java | 34 +-- .../java/cn/hutool/core/text/StrSplitter.java | 224 +++++------------- .../hutool/core/text/finder/CharFinder.java | 57 +++++ .../core/text/finder/CharMatcherFinder.java | 44 ++++ .../cn/hutool/core/text/finder/Finder.java | 34 +++ .../hutool/core/text/finder/LengthFinder.java | 38 +++ .../core/text/finder/PatternFinder.java | 61 +++++ .../cn/hutool/core/text/finder/StrFinder.java | 43 ++++ .../hutool/core/text/finder/TextFinder.java | 26 ++ .../cn/hutool/core/text/split/SplitIter.java | 154 ++++++++++++ .../cn/hutool/core/text/split/Splitter.java | 12 - .../java/cn/hutool/core/util/CharUtil.java | 10 +- .../cn/hutool/core/text/StrMatcherTest.java | 8 +- .../hutool/core/text/split/SplitIterTest.java | 95 ++++++++ .../{lang => text/split}/StrSpliterTest.java | 17 +- .../java/cn/hutool/core/util/StrUtilTest.java | 9 - 17 files changed, 641 insertions(+), 230 deletions(-) create mode 100644 hutool-core/src/main/java/cn/hutool/core/text/finder/CharFinder.java create mode 100644 hutool-core/src/main/java/cn/hutool/core/text/finder/CharMatcherFinder.java create mode 100644 hutool-core/src/main/java/cn/hutool/core/text/finder/Finder.java create mode 100644 hutool-core/src/main/java/cn/hutool/core/text/finder/LengthFinder.java create mode 100644 hutool-core/src/main/java/cn/hutool/core/text/finder/PatternFinder.java create mode 100644 hutool-core/src/main/java/cn/hutool/core/text/finder/StrFinder.java create mode 100644 hutool-core/src/main/java/cn/hutool/core/text/finder/TextFinder.java create mode 100644 hutool-core/src/main/java/cn/hutool/core/text/split/SplitIter.java delete mode 100644 hutool-core/src/main/java/cn/hutool/core/text/split/Splitter.java create mode 100644 hutool-core/src/test/java/cn/hutool/core/text/split/SplitIterTest.java rename hutool-core/src/test/java/cn/hutool/core/{lang => text/split}/StrSpliterTest.java (76%) diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/Matcher.java b/hutool-core/src/main/java/cn/hutool/core/lang/Matcher.java index 8627e30e5..5867e7d68 100644 --- a/hutool-core/src/main/java/cn/hutool/core/lang/Matcher.java +++ b/hutool-core/src/main/java/cn/hutool/core/lang/Matcher.java @@ -2,14 +2,15 @@ package cn.hutool.core.lang; /** * 匹配接口 - * @author Looly * * @param 匹配的对象类型 + * @author Looly */ @FunctionalInterface -public interface Matcher{ +public interface Matcher { /** * 给定对象是否匹配 + * * @param t 对象 * @return 是否匹配 */ diff --git a/hutool-core/src/main/java/cn/hutool/core/text/CharSequenceUtil.java b/hutool-core/src/main/java/cn/hutool/core/text/CharSequenceUtil.java index 065d2e763..c33987cf0 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/CharSequenceUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/CharSequenceUtil.java @@ -1699,19 +1699,6 @@ public class CharSequenceUtil { return split(str, separator, 0); } - /** - * 切分字符串,并根据指定的映射函数,进行切分后的元素类型转换 - * - * @param str 被切分的字符串 - * @param separator 分隔符字符 - * @param mapping 切分后的字符串元素的转换方法 - * @return 切分后的集合,元素类型是经过 mapping 转换后的 - * @since 5.7.14 - */ - public static List split(CharSequence str, char separator, Function mapping) { - return split(str, separator, 0, mapping); - } - /** * 切分字符串,如果分隔符不存在则返回原字符串 * @@ -1766,20 +1753,6 @@ public class CharSequenceUtil { return split(str, separator, limit, false, false); } - /** - * 切分字符串,不去除切分后每个元素两边的空白符,不去除空白项,会根据指定的映射函数,进行切分后的元素类型转换 - * - * @param str 被切分的字符串 - * @param separator 分隔符字符 - * @param limit 限制分片数,-1不限制 - * @param mapping 切分后的字符串元素的转换方法 - * @return 切分后的集合,元素类型是经过 mapping 转换后的 - * @since 5.7.14 - */ - public static List split(CharSequence str, char separator, int limit, Function mapping) { - return split(str, separator, limit, false, false, mapping); - } - /** * 切分字符串,去除切分后每个元素两边的空白符,去除空白项 * @@ -1856,7 +1829,7 @@ public class CharSequenceUtil { * @since 3.0.8 */ public static List split(CharSequence str, char separator, int limit, boolean isTrim, boolean ignoreEmpty) { - return split(str, separator, limit, isTrim, ignoreEmpty, Function.identity()); + return StrSplitter.split(str, separator, limit, isTrim, ignoreEmpty); } /** @@ -1865,17 +1838,16 @@ public class CharSequenceUtil { * @param str 被切分的字符串 * @param separator 分隔符字符 * @param limit 限制分片数,-1不限制 - * @param isTrim 是否去除切分字符串后每个元素两边的空格 * @param ignoreEmpty 是否忽略空串 * @param mapping 切分后的字符串元素的转换方法 * @return 切分后的集合,元素类型是经过 mapping 转换后的 * @since 5.7.14 */ - public static List split(CharSequence str, char separator, int limit, boolean isTrim, boolean ignoreEmpty, Function mapping) { + public static List split(CharSequence str, char separator, int limit, boolean ignoreEmpty, Function mapping) { if (null == str) { return new ArrayList<>(0); } - return StrSplitter.split(str.toString(), separator, limit, isTrim, ignoreEmpty, mapping); + return StrSplitter.split(str.toString(), separator, limit, ignoreEmpty, mapping); } /** diff --git a/hutool-core/src/main/java/cn/hutool/core/text/StrSplitter.java b/hutool-core/src/main/java/cn/hutool/core/text/StrSplitter.java index 39c8ddb96..0e341fca3 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/StrSplitter.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/StrSplitter.java @@ -1,18 +1,22 @@ package cn.hutool.core.text; import cn.hutool.core.lang.PatternPool; +import cn.hutool.core.text.finder.CharFinder; +import cn.hutool.core.text.finder.CharMatcherFinder; +import cn.hutool.core.text.finder.LengthFinder; +import cn.hutool.core.text.finder.PatternFinder; +import cn.hutool.core.text.finder.StrFinder; +import cn.hutool.core.text.split.SplitIter; import cn.hutool.core.util.CharUtil; -import cn.hutool.core.util.NumberUtil; import cn.hutool.core.util.StrUtil; import java.util.ArrayList; import java.util.List; import java.util.function.Function; -import java.util.regex.Matcher; import java.util.regex.Pattern; /** - * 字符串切分器 + * 字符串切分器,封装统一的字符串分割静态方法 * * @author Looly * @since 5.7.0 @@ -28,7 +32,7 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.0.8 */ - public static List splitPath(String str) { + public static List splitPath(CharSequence str) { return splitPath(str, 0); } @@ -39,7 +43,7 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.0.8 */ - public static String[] splitPathToArray(String str) { + public static String[] splitPathToArray(CharSequence str) { return toArray(splitPath(str)); } @@ -51,7 +55,7 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.0.8 */ - public static List splitPath(String str, int limit) { + public static List splitPath(CharSequence str, int limit) { return split(str, StrUtil.C_SLASH, limit, true, true); } @@ -63,7 +67,7 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.0.8 */ - public static String[] splitPathToArray(String str, int limit) { + public static String[] splitPathToArray(CharSequence str, int limit) { return toArray(splitPath(str, limit)); } @@ -76,7 +80,7 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.2.1 */ - public static List splitTrim(String str, char separator, boolean ignoreEmpty) { + public static List splitTrim(CharSequence str, char separator, boolean ignoreEmpty) { return split(str, separator, 0, true, ignoreEmpty); } @@ -90,7 +94,7 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.0.8 */ - public static List split(String str, char separator, boolean isTrim, boolean ignoreEmpty) { + public static List split(CharSequence str, char separator, boolean isTrim, boolean ignoreEmpty) { return split(str, separator, 0, isTrim, ignoreEmpty); } @@ -104,7 +108,7 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.0.8 */ - public static List splitTrim(String str, char separator, int limit, boolean ignoreEmpty) { + public static List splitTrim(CharSequence str, char separator, int limit, boolean ignoreEmpty) { return split(str, separator, limit, true, ignoreEmpty, false); } @@ -119,7 +123,7 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.0.8 */ - public static List split(String str, char separator, int limit, boolean isTrim, boolean ignoreEmpty) { + public static List split(CharSequence str, char separator, int limit, boolean isTrim, boolean ignoreEmpty) { return split(str, separator, limit, isTrim, ignoreEmpty, false); } @@ -129,14 +133,13 @@ public class StrSplitter { * @param str 被切分的字符串 * @param separator 分隔符字符 * @param limit 限制分片数,-1不限制 - * @param isTrim 是否去除切分字符串后每个元素两边的空格 * @param ignoreEmpty 是否忽略空串 * @param mapping 切分后的字符串元素的转换方法 * @return 切分后的集合,元素类型是经过 mapping 转换后的 * @since 5.7.14 */ - public static List split(String str, char separator, int limit, boolean isTrim, boolean ignoreEmpty, Function mapping) { - return split(str, separator, limit, isTrim, ignoreEmpty, false, mapping); + public static List split(CharSequence str, char separator, int limit, boolean ignoreEmpty, Function mapping) { + return split(str, separator, limit, ignoreEmpty, false, mapping); } /** @@ -150,14 +153,14 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.2.1 */ - public static List splitIgnoreCase(String str, char separator, int limit, boolean isTrim, boolean ignoreEmpty) { + public static List splitIgnoreCase(CharSequence str, char separator, int limit, boolean isTrim, boolean ignoreEmpty) { return split(str, separator, limit, isTrim, ignoreEmpty, true); } /** * 切分字符串 * - * @param str 被切分的字符串 + * @param text 被切分的字符串 * @param separator 分隔符字符 * @param limit 限制分片数,-1不限制 * @param isTrim 是否去除切分字符串后每个元素两边的空格 @@ -166,46 +169,26 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.2.1 */ - public static List split(String str, char separator, int limit, boolean isTrim, boolean ignoreEmpty, boolean ignoreCase) { - return split(str, separator, limit, isTrim, ignoreEmpty, ignoreCase, Function.identity()); + public static List split(CharSequence text, char separator, int limit, boolean isTrim, boolean ignoreEmpty, boolean ignoreCase) { + return split(text, separator, limit, ignoreEmpty, trimFunc(isTrim)); } /** * 切分字符串 * - * @param str 被切分的字符串 + * @param text 被切分的字符串 * @param separator 分隔符字符 * @param limit 限制分片数,-1不限制 - * @param isTrim 是否去除切分字符串后每个元素两边的空格 * @param ignoreEmpty 是否忽略空串 * @param ignoreCase 是否忽略大小写 * @param mapping 切分后的字符串元素的转换方法 * @return 切分后的集合,元素类型是经过 mapping 转换后的 * @since 5.7.14 */ - public static List split(String str, char separator, int limit, boolean isTrim, boolean ignoreEmpty, boolean ignoreCase, Function mapping) { - if (StrUtil.isEmpty(str)) { - return new ArrayList<>(0); - } - if (limit == 1) { - return addToList(new ArrayList<>(1), str, isTrim, ignoreEmpty, mapping); - } - - final ArrayList list = new ArrayList<>(limit > 0 ? limit : 16); - int len = str.length(); - int start = 0;//切分后每个部分的起始 - for (int i = 0; i < len; i++) { - if (NumberUtil.equals(separator, str.charAt(i), ignoreCase)) { - addToList(list, str.substring(start, i), isTrim, ignoreEmpty, mapping); - start = i + 1;//i+1同时将start与i保持一致 - - //检查是否超出范围(最大允许limit-1个,剩下一个留给末尾字符串) - if (limit > 0 && list.size() > limit - 2) { - break; - } - } - } - return addToList(list, str.substring(start, len), isTrim, ignoreEmpty, mapping);//收尾 + public static List split(CharSequence text, char separator, int limit, boolean ignoreEmpty, + boolean ignoreCase, Function mapping) { + final SplitIter splitIter = new SplitIter(text, new CharFinder(separator, ignoreCase), limit, ignoreEmpty); + return splitIter.toList(mapping); } /** @@ -313,7 +296,7 @@ public class StrSplitter { /** * 切分字符串 * - * @param str 被切分的字符串 + * @param text 被切分的字符串 * @param separator 分隔符字符串 * @param limit 限制分片数 * @param isTrim 是否去除切分字符串后每个元素两边的空格 @@ -322,40 +305,9 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.2.1 */ - public static List split(String str, String separator, int limit, boolean isTrim, boolean ignoreEmpty, boolean ignoreCase) { - if (StrUtil.isEmpty(str)) { - return new ArrayList<>(0); - } - if (limit == 1) { - return addToList(new ArrayList<>(1), str, isTrim, ignoreEmpty, Function.identity()); - } - - if (StrUtil.isEmpty(separator)) {//分隔符为空时按照空白符切分 - return split(str, limit); - } else if (separator.length() == 1) {//分隔符只有一个字符长度时按照单分隔符切分 - return split(str, separator.charAt(0), limit, isTrim, ignoreEmpty, ignoreCase); - } - - final ArrayList list = new ArrayList<>(); - int len = str.length(); - int separatorLen = separator.length(); - int start = 0; - int i = 0; - while (i < len) { - i = StrUtil.indexOf(str, separator, start, ignoreCase); - if (i > -1) { - addToList(list, str.substring(start, i), isTrim, ignoreEmpty, Function.identity()); - start = i + separatorLen; - - //检查是否超出范围(最大允许limit-1个,剩下一个留给末尾字符串) - if (limit > 0 && list.size() > limit - 2) { - break; - } - } else { - break; - } - } - return addToList(list, str.substring(start, len), isTrim, ignoreEmpty, Function.identity()); + public static List split(String text, String separator, int limit, boolean isTrim, boolean ignoreEmpty, boolean ignoreCase) { + final SplitIter splitIter = new SplitIter(text, new StrFinder(separator, ignoreCase), limit, ignoreEmpty); + return splitIter.toList(isTrim); } /** @@ -379,34 +331,17 @@ public class StrSplitter { * 使用空白符切分字符串
* 切分后的字符串两边不包含空白符,空串或空白符串并不做为元素之一 * - * @param str 被切分的字符串 + * @param text 被切分的字符串 * @param limit 限制分片数 * @return 切分后的集合 * @since 3.0.8 */ - public static List split(String str, int limit) { - if (StrUtil.isEmpty(str)) { + public static List split(String text, int limit) { + if (StrUtil.isEmpty(text)) { return new ArrayList<>(0); } - if (limit == 1) { - return addToList(new ArrayList<>(1), str, true, true, Function.identity()); - } - - final ArrayList list = new ArrayList<>(); - int len = str.length(); - int start = 0;//切分后每个部分的起始 - for (int i = 0; i < len; i++) { - if (CharUtil.isBlankChar(str.charAt(i))) { - addToList(list, str.substring(start, i), true, true, Function.identity()); - start = i + 1;//i+1同时将start与i保持一致 - - //检查是否超出范围(最大允许limit-1个,剩下一个留给末尾字符串) - if (limit > 0 && list.size() > limit - 2) { - break; - } - } - } - return addToList(list, str.substring(start, len), true, true, Function.identity());//收尾 + final SplitIter splitIter = new SplitIter(text, new CharMatcherFinder(CharUtil::isBlankChar), limit, true); + return splitIter.toList(false); } /** @@ -420,7 +355,6 @@ public class StrSplitter { public static String[] splitToArray(String str, int limit) { return toArray(split(str, limit)); } - //---------------------------------------------------------------------------------------------- Split by regex /** @@ -442,7 +376,7 @@ public class StrSplitter { /** * 通过正则切分字符串 * - * @param str 字符串 + * @param text 字符串 * @param separatorPattern 分隔符正则{@link Pattern} * @param limit 限制分片数 * @param isTrim 是否去除切分字符串后每个元素两边的空格 @@ -450,32 +384,12 @@ public class StrSplitter { * @return 切分后的集合 * @since 3.0.8 */ - public static List split(String str, Pattern separatorPattern, int limit, boolean isTrim, boolean ignoreEmpty) { - if (StrUtil.isEmpty(str)) { + public static List split(String text, Pattern separatorPattern, int limit, boolean isTrim, boolean ignoreEmpty) { + if (StrUtil.isEmpty(text)) { return new ArrayList<>(0); } - if (limit == 1) { - return addToList(new ArrayList<>(1), str, isTrim, ignoreEmpty, Function.identity()); - } - - if (null == separatorPattern) {//分隔符为空时按照空白符切分 - return split(str, limit); - } - - final Matcher matcher = separatorPattern.matcher(str); - final ArrayList list = new ArrayList<>(); - int len = str.length(); - int start = 0; - while (matcher.find()) { - addToList(list, str.substring(start, matcher.start()), isTrim, ignoreEmpty, Function.identity()); - start = matcher.end(); - - //检查是否超出范围(最大允许limit-1个,剩下一个留给末尾字符串) - if (limit > 0 && list.size() > limit - 2) { - break; - } - } - return addToList(list, str.substring(start, len), isTrim, ignoreEmpty, Function.identity()); + final SplitIter splitIter = new SplitIter(text, new PatternFinder(separatorPattern), limit, ignoreEmpty); + return splitIter.toList(isTrim); } /** @@ -492,57 +406,25 @@ public class StrSplitter { public static String[] splitToArray(String str, Pattern separatorPattern, int limit, boolean isTrim, boolean ignoreEmpty) { return toArray(split(str, separatorPattern, limit, isTrim, ignoreEmpty)); } - //---------------------------------------------------------------------------------------------- Split by length /** * 根据给定长度,将给定字符串截取为多个部分 * - * @param str 字符串 + * @param text 字符串 * @param len 每一个小节的长度 * @return 截取后的字符串数组 */ - public static String[] splitByLength(String str, int len) { - int partCount = str.length() / len; - int lastPartCount = str.length() % len; - int fixPart = 0; - if (lastPartCount != 0) { - fixPart = 1; - } - - final String[] strs = new String[partCount + fixPart]; - for (int i = 0; i < partCount + fixPart; i++) { - if (i == partCount + fixPart - 1 && lastPartCount != 0) { - strs[i] = str.substring(i * len, i * len + lastPartCount); - } else { - strs[i] = str.substring(i * len, i * len + len); - } - } - return strs; + public static String[] splitByLength(String text, int len) { + SplitIter splitIter = new SplitIter(text, + new LengthFinder(len), + Integer.MAX_VALUE, + false + ); + return splitIter.toArray(false); } - //---------------------------------------------------------------------------------------------------------- Private method start - /** - * 将字符串加入List中 - * - * @param list 列表 - * @param part 被加入的部分 - * @param isTrim 是否去除两端空白符 - * @param ignoreEmpty 是否略过空字符串(空字符串不做为一个元素) - * @param mapping part的类型转换方法 - * @return 列表集合 - */ - private static List addToList(List list, String part, boolean isTrim, boolean ignoreEmpty, Function mapping) { - if (isTrim) { - part = StrUtil.trim(part); - } - if (false == ignoreEmpty || false == part.isEmpty()) { - list.add(mapping.apply(part)); - } - return list; - } - /** * List转Array * @@ -552,5 +434,15 @@ public class StrSplitter { private static String[] toArray(List list) { return list.toArray(new String[0]); } + + /** + * Trim函数 + * + * @param isTrim 是否trim + * @return {@link Function} + */ + private static Function trimFunc(boolean isTrim){ + return (str) -> isTrim ? StrUtil.trim(str) : str; + } //---------------------------------------------------------------------------------------------------------- Private method end } diff --git a/hutool-core/src/main/java/cn/hutool/core/text/finder/CharFinder.java b/hutool-core/src/main/java/cn/hutool/core/text/finder/CharFinder.java new file mode 100644 index 000000000..0fed9fefc --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/text/finder/CharFinder.java @@ -0,0 +1,57 @@ +package cn.hutool.core.text.finder; + +import cn.hutool.core.lang.Assert; +import cn.hutool.core.util.NumberUtil; + +/** + * 字符查找器 + * + * @author looly + * @since 5.7.14 + */ +public class CharFinder extends TextFinder { + private static final long serialVersionUID = 1L; + + private final char c; + private final boolean caseInsensitive; + + /** + * 构造,不忽略字符大小写 + * + * @param c 被查找的字符 + */ + public CharFinder(char c) { + this(c, false); + } + + /** + * 构造 + * + * @param c 被查找的字符 + * @param caseInsensitive 是否忽略大小写 + */ + public CharFinder(char c, boolean caseInsensitive) { + this.c = c; + this.caseInsensitive = caseInsensitive; + } + + @Override + public int start(int from) { + Assert.notNull(this.text, "Text to find must be not null!"); + final int length = text.length(); + for (int i = from; i < length; i++) { + if (NumberUtil.equals(c, text.charAt(i), caseInsensitive)) { + return i; + } + } + return -1; + } + + @Override + public int end(int start) { + if (start < 0) { + return -1; + } + return start + 1; + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/text/finder/CharMatcherFinder.java b/hutool-core/src/main/java/cn/hutool/core/text/finder/CharMatcherFinder.java new file mode 100644 index 000000000..301f3a257 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/text/finder/CharMatcherFinder.java @@ -0,0 +1,44 @@ +package cn.hutool.core.text.finder; + +import cn.hutool.core.lang.Assert; +import cn.hutool.core.lang.Matcher; + +/** + * 字符匹配查找器 + * + * @since 5.7.14 + * @author looly + */ +public class CharMatcherFinder extends TextFinder { + private static final long serialVersionUID = 1L; + + private final Matcher matcher; + + /** + * 构造 + * @param matcher 被查找的字符匹配器 + */ + public CharMatcherFinder(Matcher matcher) { + this.matcher = matcher; + } + + @Override + public int start(int from) { + Assert.notNull(this.text, "Text to find must be not null!"); + final int length = text.length(); + for (int i = from; i < length; i++) { + if(matcher.match(text.charAt(i))){ + return i; + } + } + return -1; + } + + @Override + public int end(int start) { + if(start < 0){ + return -1; + } + return start + 1; + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/text/finder/Finder.java b/hutool-core/src/main/java/cn/hutool/core/text/finder/Finder.java new file mode 100644 index 000000000..82ab597ed --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/text/finder/Finder.java @@ -0,0 +1,34 @@ +package cn.hutool.core.text.finder; + +/** + * 字符串查找接口,通过调用{@link #start(int)}查找开始位置,再调用{@link #end(int)}找结束位置 + * + * @author looly + * @since 5.7.14 + */ +public interface Finder { + + /** + * 返回开始位置,即起始字符位置(包含),未找到返回-1 + * + * @param from 查找的开始位置(包含 + * @return 起始字符位置,未找到返回-1 + */ + int start(int from); + + /** + * 返回结束位置,即最后一个字符后的位置(不包含) + * + * @param start 找到的起始位置 + * @return 结束位置,未找到返回-1 + */ + int end(int start); + + /** + * 复位查找器,用于重用对象 + * @return this + */ + default Finder reset(){ + return this; + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/text/finder/LengthFinder.java b/hutool-core/src/main/java/cn/hutool/core/text/finder/LengthFinder.java new file mode 100644 index 000000000..ca19d4424 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/text/finder/LengthFinder.java @@ -0,0 +1,38 @@ +package cn.hutool.core.text.finder; + +import cn.hutool.core.lang.Assert; + +/** + * 固定长度查找器 + * + * @since 5.7.14 + * @author looly + */ +public class LengthFinder extends TextFinder { + private static final long serialVersionUID = 1L; + + private final int length; + + /** + * 构造 + * @param length 长度 + */ + public LengthFinder(int length) { + this.length = length; + } + + @Override + public int start(int from) { + Assert.notNull(this.text, "Text to find must be not null!"); + final int result = from + length; + if(result < text.length()){ + return result; + } + return -1; + } + + @Override + public int end(int start) { + return start; + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/text/finder/PatternFinder.java b/hutool-core/src/main/java/cn/hutool/core/text/finder/PatternFinder.java new file mode 100644 index 000000000..a393b9e23 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/text/finder/PatternFinder.java @@ -0,0 +1,61 @@ +package cn.hutool.core.text.finder; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * 正则查找器 + * + * @author looly + * @since 5.7.14 + */ +public class PatternFinder extends TextFinder { + private static final long serialVersionUID = 1L; + + private final Pattern pattern; + private Matcher matcher; + + /** + * 构造 + * + * @param regex 被查找的正则表达式 + * @param caseInsensitive 是否忽略大小写 + */ + public PatternFinder(String regex, boolean caseInsensitive) { + this(Pattern.compile(regex, caseInsensitive ? Pattern.CASE_INSENSITIVE : 0)); + } + + /** + * 构造 + * + * @param pattern 被查找的正则{@link Pattern} + */ + public PatternFinder(Pattern pattern) { + this.pattern = pattern; + } + + @Override + public TextFinder setText(CharSequence text) { + this.matcher = pattern.matcher(text); + return super.setText(text); + } + + @Override + public int start(int from) { + if (matcher.find(from)) { + return matcher.start(); + } + return -1; + } + + @Override + public int end(int start) { + return matcher.end(); + } + + @Override + public PatternFinder reset() { + this.matcher.reset(); + return this; + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/text/finder/StrFinder.java b/hutool-core/src/main/java/cn/hutool/core/text/finder/StrFinder.java new file mode 100644 index 000000000..43ecb8b82 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/text/finder/StrFinder.java @@ -0,0 +1,43 @@ +package cn.hutool.core.text.finder; + +import cn.hutool.core.lang.Assert; +import cn.hutool.core.util.StrUtil; + +/** + * 字符查找器 + * + * @author looly + * @since 5.7.14 + */ +public class StrFinder extends TextFinder { + private static final long serialVersionUID = 1L; + + private final CharSequence str; + private final boolean caseInsensitive; + + /** + * 构造 + * + * @param str 被查找的字符串 + * @param caseInsensitive 是否忽略大小写 + */ + public StrFinder(CharSequence str, boolean caseInsensitive) { + Assert.notEmpty(str); + this.str = str; + this.caseInsensitive = caseInsensitive; + } + + @Override + public int start(int from) { + Assert.notNull(this.text, "Text to find must be not null!"); + return StrUtil.indexOf(text, str, from, caseInsensitive); + } + + @Override + public int end(int start) { + if (start < 0) { + return -1; + } + return start + str.length(); + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/text/finder/TextFinder.java b/hutool-core/src/main/java/cn/hutool/core/text/finder/TextFinder.java new file mode 100644 index 000000000..7045aa8fd --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/text/finder/TextFinder.java @@ -0,0 +1,26 @@ +package cn.hutool.core.text.finder; + +import java.io.Serializable; + +/** + * 文本查找抽象类 + * + * @author looly + * @since 5.7.14 + */ +public abstract class TextFinder implements Finder, Serializable { + private static final long serialVersionUID = 1L; + + protected CharSequence text; + + /** + * 设置被查找的文本 + * + * @param text 文本 + * @return this + */ + public TextFinder setText(CharSequence text) { + this.text = text; + return this; + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/text/split/SplitIter.java b/hutool-core/src/main/java/cn/hutool/core/text/split/SplitIter.java new file mode 100644 index 000000000..95652083b --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/text/split/SplitIter.java @@ -0,0 +1,154 @@ +package cn.hutool.core.text.split; + +import cn.hutool.core.collection.ComputeIter; +import cn.hutool.core.lang.Assert; +import cn.hutool.core.text.finder.TextFinder; +import cn.hutool.core.util.StrUtil; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; + +/** + * 字符串切分迭代器
+ * 此迭代器是字符串切分的懒模式实现,实例化后不完成切分,只有调用{@link #hasNext()}或遍历时才完成切分
+ * 此迭代器非线程安全 + * + * @author looly + * @since 5.7.14 + */ +public class SplitIter extends ComputeIter implements Serializable { + private static final long serialVersionUID = 1L; + + private final String text; + private final TextFinder finder; + private final int limit; + private final boolean ignoreEmpty; + + /** + * 上一次的结束位置 + */ + private int offset; + /** + * 计数器,用于判断是否超过limit + */ + private int count; + + /** + * 构造 + * + * @param text 文本 + * @param separatorFinder 分隔符匹配器 + * @param limit 限制数量 + * @param ignoreEmpty 是否忽略"" + */ + public SplitIter(CharSequence text, TextFinder separatorFinder, int limit, boolean ignoreEmpty) { + Assert.notNull(text, "Text must be not null!"); + this.text = text.toString(); + this.finder = separatorFinder.setText(text); + this.limit = limit > 0 ? limit : Integer.MAX_VALUE; + this.ignoreEmpty = ignoreEmpty; + } + + @Override + protected String computeNext() { + Assert.notNull(this.text, "Text to find must be not null!"); + // 达到数量上限或末尾,结束 + if (count >= limit || offset > text.length()) { + return null; + } + + // 达到数量上限 + if (count == (limit - 1)) { + // 当到达限制次数时,最后一个元素为剩余部分 + if (ignoreEmpty && offset == text.length()) { + // 最后一个是空串 + return null; + } + + // 结尾整个作为一个元素 + count++; + return text.substring(offset); + } + + final int start = finder.start(offset); + // 无分隔符,结束 + if (start < 0) { + // 如果不再有分隔符,但是遗留了字符,则单独作为一个段 + if (offset <= text.length()) { + final String result = text.substring(offset); + if (false == ignoreEmpty || false == result.isEmpty()) { + // 返回非空串 + offset = Integer.MAX_VALUE; + return result; + } + } + return null; + } + + // 找到新的分隔符位置 + final int end = finder.end(start); + final String result = text.substring(offset, start); + offset = end; + + if (ignoreEmpty && result.isEmpty()) { + // 发现空串且需要忽略时,跳过之 + return computeNext(); + } + + count++; + return result; + } + + /** + * 重置 + */ + public void reset() { + this.finder.reset(); + this.offset = 0; + this.count = 0; + } + + /** + * 获取切分后的对象数组 + * + * @param trim 是否去除元素两边空格 + * @return 切分后的列表 + */ + public String[] toArray(boolean trim) { + return toList(trim).toArray(new String[0]); + } + + /** + * 获取切分后的对象列表 + * + * @param trim 是否去除元素两边空格 + * @return 切分后的列表 + */ + public List toList(boolean trim) { + return toList((str) -> trim ? StrUtil.trim(str) : str); + } + + /** + * 获取切分后的对象列表 + * + * @param 元素类型 + * @return 切分后的列表 + */ + public List toList(Function mapping) { + final List result = new ArrayList<>(); + while (this.hasNext()) { + final T apply = mapping.apply(this.next()); + if(ignoreEmpty && StrUtil.isEmptyIfStr(apply)){ + // 对于mapping之后依旧是String的情况,ignoreEmpty依旧有效 + continue; + } + result.add(apply); + } + if (result.isEmpty()) { + return new ArrayList<>(0); + } + return result; + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/text/split/Splitter.java b/hutool-core/src/main/java/cn/hutool/core/text/split/Splitter.java deleted file mode 100644 index 3510e89aa..000000000 --- a/hutool-core/src/main/java/cn/hutool/core/text/split/Splitter.java +++ /dev/null @@ -1,12 +0,0 @@ -package cn.hutool.core.text.split; - -import java.io.Serializable; - -public class Splitter implements Serializable { - private static final long serialVersionUID = 1L; - - private int limit; - private boolean ignoreEmpty; - private boolean caseInsensitive; - -} diff --git a/hutool-core/src/main/java/cn/hutool/core/util/CharUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/CharUtil.java index 802ceda47..258f977a3 100644 --- a/hutool-core/src/main/java/cn/hutool/core/util/CharUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/CharUtil.java @@ -295,14 +295,14 @@ public class CharUtil implements CharPool { /** * 比较两个字符是否相同 * - * @param c1 字符1 - * @param c2 字符2 - * @param ignoreCase 是否忽略大小写 + * @param c1 字符1 + * @param c2 字符2 + * @param caseInsensitive 是否忽略大小写 * @return 是否相同 * @since 4.0.3 */ - public static boolean equals(char c1, char c2, boolean ignoreCase) { - if (ignoreCase) { + public static boolean equals(char c1, char c2, boolean caseInsensitive) { + if (caseInsensitive) { return Character.toLowerCase(c1) == Character.toLowerCase(c2); } return c1 == c2; diff --git a/hutool-core/src/test/java/cn/hutool/core/text/StrMatcherTest.java b/hutool-core/src/test/java/cn/hutool/core/text/StrMatcherTest.java index 4ebf4ff9a..53720fda8 100644 --- a/hutool-core/src/test/java/cn/hutool/core/text/StrMatcherTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/text/StrMatcherTest.java @@ -12,7 +12,13 @@ public class StrMatcherTest { public void matcherTest(){ final StrMatcher strMatcher = new StrMatcher("${name}-${age}-${gender}-${country}-${province}-${city}-${status}"); final Map match = strMatcher.match("小明-19-男-中国-河南-郑州-已婚"); - Console.log(match); + Assert.assertEquals("小明", match.get("name")); + Assert.assertEquals("19", match.get("age")); + Assert.assertEquals("男", match.get("gender")); + Assert.assertEquals("中国", match.get("country")); + Assert.assertEquals("河南", match.get("province")); + Assert.assertEquals("郑州", match.get("city")); + Assert.assertEquals("已婚", match.get("status")); } @Test diff --git a/hutool-core/src/test/java/cn/hutool/core/text/split/SplitIterTest.java b/hutool-core/src/test/java/cn/hutool/core/text/split/SplitIterTest.java new file mode 100644 index 000000000..c1d09fecd --- /dev/null +++ b/hutool-core/src/test/java/cn/hutool/core/text/split/SplitIterTest.java @@ -0,0 +1,95 @@ +package cn.hutool.core.text.split; + +import cn.hutool.core.text.finder.CharFinder; +import cn.hutool.core.text.finder.LengthFinder; +import cn.hutool.core.text.finder.PatternFinder; +import cn.hutool.core.text.finder.StrFinder; +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; +import java.util.regex.Pattern; + +public class SplitIterTest { + + @Test + public void splitByCharTest(){ + String str1 = "a, ,,efedsfs, ddf,"; + + //不忽略"" + SplitIter splitIter = new SplitIter(str1, + new CharFinder(',', false), + Integer.MAX_VALUE, + false + ); + Assert.assertEquals(6, splitIter.toList(false).size()); + } + + @Test + public void splitByCharIgnoreEmptyTest(){ + String str1 = "a, ,,efedsfs, ddf,"; + + SplitIter splitIter = new SplitIter(str1, + new CharFinder(',', false), + Integer.MAX_VALUE, + true + ); + + final List strings = splitIter.toList(false); + Assert.assertEquals(4, strings.size()); + } + + @Test + public void splitByStrTest(){ + String str1 = "a, ,,efedsfs, ddf,"; + + SplitIter splitIter = new SplitIter(str1, + new StrFinder("e", false), + Integer.MAX_VALUE, + true + ); + + final List strings = splitIter.toList(false); + Assert.assertEquals(3, strings.size()); + } + + @Test + public void splitByPatternTest(){ + String str1 = "a, ,,efedsfs, ddf,"; + + SplitIter splitIter = new SplitIter(str1, + new PatternFinder(Pattern.compile("\\s")), + Integer.MAX_VALUE, + true + ); + + final List strings = splitIter.toList(false); + Assert.assertEquals(3, strings.size()); + } + + @Test + public void splitByLengthTest(){ + String text = "1234123412341234"; + SplitIter splitIter = new SplitIter(text, + new LengthFinder(4), + Integer.MAX_VALUE, + false + ); + + final List strings = splitIter.toList(false); + Assert.assertEquals(4, strings.size()); + } + + @Test + public void splitLimitTest(){ + String text = "55:02:18"; + SplitIter splitIter = new SplitIter(text, + new CharFinder(':'), + 3, + false + ); + + final List strings = splitIter.toList(false); + Assert.assertEquals(3, strings.size()); + } +} diff --git a/hutool-core/src/test/java/cn/hutool/core/lang/StrSpliterTest.java b/hutool-core/src/test/java/cn/hutool/core/text/split/StrSpliterTest.java similarity index 76% rename from hutool-core/src/test/java/cn/hutool/core/lang/StrSpliterTest.java rename to hutool-core/src/test/java/cn/hutool/core/text/split/StrSpliterTest.java index 072f391ae..aee0e0166 100644 --- a/hutool-core/src/test/java/cn/hutool/core/lang/StrSpliterTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/text/split/StrSpliterTest.java @@ -1,11 +1,10 @@ -package cn.hutool.core.lang; - -import java.util.List; +package cn.hutool.core.text.split; +import cn.hutool.core.text.StrSplitter; import org.junit.Assert; import org.junit.Test; -import cn.hutool.core.text.StrSplitter; +import java.util.List; /** * {@link StrSplitter} 单元测试 @@ -18,6 +17,7 @@ public class StrSpliterTest { public void splitByCharTest(){ String str1 = "a, ,efedsfs, ddf"; List split = StrSplitter.split(str1, ',', 0, true, true); + Assert.assertEquals("ddf", split.get(2)); Assert.assertEquals(3, split.size()); } @@ -45,4 +45,13 @@ public class StrSpliterTest { Assert.assertEquals("bin", split.get(2)); Assert.assertEquals(3, split.size()); } + + @Test + public void splitMappingTest() { + String str = "1.2."; + List split = StrSplitter.split(str, '.', 0, true, true, Long::parseLong); + Assert.assertEquals(2, split.size()); + Assert.assertEquals(Long.valueOf(1L), split.get(0)); + Assert.assertEquals(Long.valueOf(2L), split.get(1)); + } } diff --git a/hutool-core/src/test/java/cn/hutool/core/util/StrUtilTest.java b/hutool-core/src/test/java/cn/hutool/core/util/StrUtilTest.java index 24f1a51b6..73d8e6e7f 100644 --- a/hutool-core/src/test/java/cn/hutool/core/util/StrUtilTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/util/StrUtilTest.java @@ -81,15 +81,6 @@ public class StrUtilTest { Assert.assertEquals("", split.get(2)); } - @Test - public void splitTest3() { - String str = "1.2."; - List split = StrUtil.split(str, '.', 0, true, true, Long::parseLong); - Assert.assertEquals(2, split.size()); - Assert.assertEquals(Long.valueOf(1L), split.get(0)); - Assert.assertEquals(Long.valueOf(2L), split.get(1)); - } - @Test public void splitToLongTest() { String str = "1,2,3,4, 5";