diff --git a/CHANGELOG.md b/CHANGELOG.md index 465713eb8..a776319a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ * 【setting】 Props增加toProperties方法(issue#1701@Github) * 【http 】 UserAgent增加getOsVersion方法(issue#I3YZUQ@Gitee) * 【jwt 】 JWT增加validate方法(issue#I3YDM4@Gitee) +* 【core 】 CscReader支持指定读取开始行号和结束行号(issue#I3ZMZL@Gitee) ### 🐞Bug修复 * 【core 】 修复RadixUtil.decode非static问题(issue#I3YPEH@Gitee) diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvData.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvData.java index a02290e97..1bf31a92e 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvData.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvData.java @@ -69,4 +69,12 @@ public class CsvData implements Iterable, Serializable { public Iterator iterator() { return this.rows.iterator(); } + + @Override + public String toString() { + return "CsvData{" + + "header=" + header + + ", rows=" + rows + + '}'; + } } diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java index a883ee621..c6d0bae2f 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java @@ -52,7 +52,11 @@ public final class CsvParser implements Closeable, Serializable { /** * 当前行号 */ - private long lineNo; + private long lineNo = -1; + /** + * 引号内的行数 + */ + private long inQuotesLineCount; /** * 第一行字段数,用于检查每行字段数是否一致 */ @@ -87,7 +91,7 @@ public final class CsvParser implements Closeable, Serializable { if (false == config.containsHeader) { throw new IllegalStateException("No header available - header parsing is disabled"); } - if (lineNo == 0) { + if (lineNo < config.beginLineNo) { throw new IllegalStateException("No header available - call nextRow() first"); } return header.fields; @@ -100,25 +104,35 @@ public final class CsvParser implements Closeable, Serializable { * @throws IORuntimeException IO读取异常 */ public CsvRow nextRow() throws IORuntimeException { - long startingLineNo; List currentFields; int fieldCount; while (false == finished) { - startingLineNo = ++lineNo; currentFields = readLine(); fieldCount = currentFields.size(); if (fieldCount < 1) { + // 空List表示读取结束 + break; + } + + // 读取范围校验 + if(lineNo < config.beginLineNo){ + // 未达到读取起始行,继续 + continue; + } + if(lineNo > config.endLineNo){ + // 超出结束行,读取结束 break; } // 跳过空行 if (config.skipEmptyRows && fieldCount == 1 && currentFields.get(0).isEmpty()) { + // [""]表示空行 continue; } // 检查每行的字段数是否一致 if (config.errorOnDifferentFieldCount) { - if (firstLineFieldCount == -1) { + if (firstLineFieldCount < 0) { firstLineFieldCount = fieldCount; } else if (fieldCount != firstLineFieldCount) { throw new IORuntimeException(String.format("Line %d has %d fields, but first line has %d fields", lineNo, fieldCount, firstLineFieldCount)); @@ -137,7 +151,7 @@ public final class CsvParser implements Closeable, Serializable { continue; } - return new CsvRow(startingLineNo, null == header ? null : header.headerMap, currentFields); + return new CsvRow(lineNo, null == header ? null : header.headerMap, currentFields); } return null; @@ -161,12 +175,24 @@ public final class CsvParser implements Closeable, Serializable { } /** - * 读取一行数据 + * 读取一行数据,如果读取结束,返回size为0的List
+ * 空行是size为1的List,唯一元素是"" + * + *

+ * 行号要考虑注释行和引号包装的内容中的换行 + *

* * @return 一行数据 * @throws IORuntimeException IO异常 */ private List readLine() throws IORuntimeException { + // 矫正行号 + // 当一行内容包含多行数据时,记录首行行号,但是读取下一行时,需要把多行内容的行数加上 + if(inQuotesLineCount > 0){ + this.lineNo += this.inQuotesLineCount; + this.inQuotesLineCount = 0; + } + final List currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY); final StrBuilder currentField = this.currentField; @@ -211,6 +237,7 @@ public final class CsvParser implements Closeable, Serializable { if(inComment){ if (c == CharUtil.CR || c == CharUtil.LF) { // 注释行以换行符为结尾 + lineNo++; inComment = false; } // 跳过注释行中的任何字符 @@ -225,9 +252,9 @@ public final class CsvParser implements Closeable, Serializable { // End of quoted text inQuotes = false; } else { - // 新行 - if ((c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR) { - lineNo++; + // 字段内容中新行 + if (isLineEnd(c)) { + inQuotesLineCount++; } } // 普通字段字符 @@ -280,6 +307,7 @@ public final class CsvParser implements Closeable, Serializable { // restore fields this.preChar = preChar; + lineNo++; return currentFields; } @@ -301,12 +329,24 @@ public final class CsvParser implements Closeable, Serializable { currentFields.add(field); } + /** + * 是否行结束符 + * @param c 符号 + * @return 是否结束 + * @since 5.7.4 + */ + private boolean isLineEnd(char c){ + return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR; + } + /** * 内部Buffer * * @author looly */ - private static class Buffer { + private static class Buffer implements Serializable{ + private static final long serialVersionUID = 1L; + final char[] buf; /** diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvReadConfig.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvReadConfig.java index 897d72d81..b1e7b5b97 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvReadConfig.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvReadConfig.java @@ -17,6 +17,10 @@ public class CsvReadConfig extends CsvConfig implements Serializable { protected boolean skipEmptyRows = true; /** 每行字段个数不同时是否抛出异常,默认false */ protected boolean errorOnDifferentFieldCount; + /** 定义开始的行(包括),此处为原始文件行号 */ + protected long beginLineNo; + /** 结束的行(包括),此处为原始文件行号 */ + protected long endLineNo = Long.MAX_VALUE-1; /** * 默认配置 @@ -59,4 +63,28 @@ public class CsvReadConfig extends CsvConfig implements Serializable { this.errorOnDifferentFieldCount = errorOnDifferentFieldCount; return this; } + + /** + * 设置开始的行(包括),默认0,此处为原始文件行号 + * + * @param beginLineNo 开始的行号(包括) + * @return this + * @since 5.7.4 + */ + public CsvReadConfig setBeginLineNo(long beginLineNo) { + this.beginLineNo = beginLineNo; + return this; + } + + /** + * 设置结束的行(包括),默认不限制,此处为原始文件行号 + * + * @param endLineNo 结束的行号(包括) + * @return this + * @since 5.7.4 + */ + public CsvReadConfig setEndLineNo(long endLineNo) { + this.endLineNo = endLineNo; + return this; + } } diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvRow.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvRow.java index 671a09d54..868df3034 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvRow.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvRow.java @@ -1,6 +1,7 @@ package cn.hutool.core.text.csv; import cn.hutool.core.bean.BeanUtil; +import cn.hutool.core.lang.Assert; import java.util.Collection; import java.util.Iterator; @@ -30,14 +31,14 @@ public final class CsvRow implements List { * @param fields 数据列表 */ public CsvRow(final long originalLineNumber, final Map headerMap, final List fields) { - + Assert.notNull(fields, "fields must be not null!"); this.originalLineNumber = originalLineNumber; this.headerMap = headerMap; this.fields = fields; } /** - * 获取原始行号,多行情况下为首行行号。 + * 获取原始行号,多行情况下为首行行号。忽略注释行 * * @return the original line number 行号 */ diff --git a/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvReaderTest.java b/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvReaderTest.java index 8a579ff5f..0133e102c 100644 --- a/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvReaderTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvReaderTest.java @@ -1,6 +1,7 @@ package cn.hutool.core.text.csv; import cn.hutool.core.annotation.Alias; +import cn.hutool.core.collection.CollUtil; import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.resource.ResourceUtil; import cn.hutool.core.lang.Console; @@ -19,6 +20,7 @@ public class CsvReaderTest { CsvReader reader = new CsvReader(); CsvData data = reader.read(ResourceUtil.getReader("test.csv", CharsetUtil.CHARSET_UTF_8)); Assert.assertEquals("sss,sss", data.getRow(0).get(0)); + Assert.assertEquals(1, data.getRow(0).getOriginalLineNumber()); Assert.assertEquals("性别", data.getRow(0).get(2)); Assert.assertEquals("关注\"对象\"", data.getRow(0).get(3)); } @@ -97,4 +99,50 @@ public class CsvReaderTest { Console.log(row.getByName("案件ID")); } } + + @Test + public void lineNoTest(){ + CsvReader reader = new CsvReader(); + CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8)); + Assert.assertEquals(1, data.getRow(0).getOriginalLineNumber()); + Assert.assertEquals("a,b,c,d", CollUtil.join(data.getRow(0), ",")); + + Assert.assertEquals(4, data.getRow(2).getOriginalLineNumber()); + Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容", CollUtil.join(data.getRow(2), ",")); + + // 文件中第3行数据,对应原始行号是6(从0开始) + Assert.assertEquals(6, data.getRow(3).getOriginalLineNumber()); + Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(3), ",")); + } + + @Test + public void lineLimitTest(){ + // 从原始第2行开始读取 + CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2)); + CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8)); + + Assert.assertEquals(2, data.getRow(0).getOriginalLineNumber()); + Assert.assertEquals("1,2,3,4", CollUtil.join(data.getRow(0), ",")); + + Assert.assertEquals(4, data.getRow(1).getOriginalLineNumber()); + Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容", CollUtil.join(data.getRow(1), ",")); + + // 文件中第3行数据,对应原始行号是6(从0开始) + Assert.assertEquals(6, data.getRow(2).getOriginalLineNumber()); + Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(2), ",")); + } + + @Test + public void lineLimitWithHeaderTest(){ + // 从原始第2行开始读取 + CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2).setContainsHeader(true)); + CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8)); + + Assert.assertEquals(4, data.getRow(0).getOriginalLineNumber()); + Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容", CollUtil.join(data.getRow(0), ",")); + + // 文件中第3行数据,对应原始行号是6(从0开始) + Assert.assertEquals(6, data.getRow(1).getOriginalLineNumber()); + Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(1), ",")); + } } diff --git a/hutool-core/src/test/resources/test.csv b/hutool-core/src/test/resources/test.csv index 2dbdefa30..58b1a68de 100644 --- a/hutool-core/src/test/resources/test.csv +++ b/hutool-core/src/test/resources/test.csv @@ -1,2 +1,2 @@ # 这是一行注释,读取时应忽略 -"sss,sss",姓名,"性别",关注"对象",年龄,"",""" \ No newline at end of file +"sss,sss",姓名,"性别",关注"对象",年龄,"",""" diff --git a/hutool-core/src/test/resources/test_lines.csv b/hutool-core/src/test/resources/test_lines.csv new file mode 100755 index 000000000..7e288c538 --- /dev/null +++ b/hutool-core/src/test/resources/test_lines.csv @@ -0,0 +1,7 @@ +# 这是一行注释,读取时应忽略 +a,b,c,d +1,2,3,4 +# 这是一行注释,读取时应忽略 +q,w,e,r,"我是一段 +带换行的内容" +a,s,d,f