mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-04-19 03:01:48 +08:00
csv support beginLineNo
This commit is contained in:
parent
d68cc83b7d
commit
9fd7c02c86
@ -15,6 +15,7 @@
|
||||
* 【setting】 Props增加toProperties方法(issue#1701@Github)
|
||||
* 【http 】 UserAgent增加getOsVersion方法(issue#I3YZUQ@Gitee)
|
||||
* 【jwt 】 JWT增加validate方法(issue#I3YDM4@Gitee)
|
||||
* 【core 】 CscReader支持指定读取开始行号和结束行号(issue#I3ZMZL@Gitee)
|
||||
|
||||
### 🐞Bug修复
|
||||
* 【core 】 修复RadixUtil.decode非static问题(issue#I3YPEH@Gitee)
|
||||
|
@ -69,4 +69,12 @@ public class CsvData implements Iterable<CsvRow>, Serializable {
|
||||
public Iterator<CsvRow> iterator() {
|
||||
return this.rows.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CsvData{" +
|
||||
"header=" + header +
|
||||
", rows=" + rows +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
@ -52,7 +52,11 @@ public final class CsvParser implements Closeable, Serializable {
|
||||
/**
|
||||
* 当前行号
|
||||
*/
|
||||
private long lineNo;
|
||||
private long lineNo = -1;
|
||||
/**
|
||||
* 引号内的行数
|
||||
*/
|
||||
private long inQuotesLineCount;
|
||||
/**
|
||||
* 第一行字段数,用于检查每行字段数是否一致
|
||||
*/
|
||||
@ -87,7 +91,7 @@ public final class CsvParser implements Closeable, Serializable {
|
||||
if (false == config.containsHeader) {
|
||||
throw new IllegalStateException("No header available - header parsing is disabled");
|
||||
}
|
||||
if (lineNo == 0) {
|
||||
if (lineNo < config.beginLineNo) {
|
||||
throw new IllegalStateException("No header available - call nextRow() first");
|
||||
}
|
||||
return header.fields;
|
||||
@ -100,25 +104,35 @@ public final class CsvParser implements Closeable, Serializable {
|
||||
* @throws IORuntimeException IO读取异常
|
||||
*/
|
||||
public CsvRow nextRow() throws IORuntimeException {
|
||||
long startingLineNo;
|
||||
List<String> currentFields;
|
||||
int fieldCount;
|
||||
while (false == finished) {
|
||||
startingLineNo = ++lineNo;
|
||||
currentFields = readLine();
|
||||
fieldCount = currentFields.size();
|
||||
if (fieldCount < 1) {
|
||||
// 空List表示读取结束
|
||||
break;
|
||||
}
|
||||
|
||||
// 读取范围校验
|
||||
if(lineNo < config.beginLineNo){
|
||||
// 未达到读取起始行,继续
|
||||
continue;
|
||||
}
|
||||
if(lineNo > config.endLineNo){
|
||||
// 超出结束行,读取结束
|
||||
break;
|
||||
}
|
||||
|
||||
// 跳过空行
|
||||
if (config.skipEmptyRows && fieldCount == 1 && currentFields.get(0).isEmpty()) {
|
||||
// [""]表示空行
|
||||
continue;
|
||||
}
|
||||
|
||||
// 检查每行的字段数是否一致
|
||||
if (config.errorOnDifferentFieldCount) {
|
||||
if (firstLineFieldCount == -1) {
|
||||
if (firstLineFieldCount < 0) {
|
||||
firstLineFieldCount = fieldCount;
|
||||
} else if (fieldCount != firstLineFieldCount) {
|
||||
throw new IORuntimeException(String.format("Line %d has %d fields, but first line has %d fields", lineNo, fieldCount, firstLineFieldCount));
|
||||
@ -137,7 +151,7 @@ public final class CsvParser implements Closeable, Serializable {
|
||||
continue;
|
||||
}
|
||||
|
||||
return new CsvRow(startingLineNo, null == header ? null : header.headerMap, currentFields);
|
||||
return new CsvRow(lineNo, null == header ? null : header.headerMap, currentFields);
|
||||
}
|
||||
|
||||
return null;
|
||||
@ -161,12 +175,24 @@ public final class CsvParser implements Closeable, Serializable {
|
||||
}
|
||||
|
||||
/**
|
||||
* 读取一行数据
|
||||
* 读取一行数据,如果读取结束,返回size为0的List<br>
|
||||
* 空行是size为1的List,唯一元素是""
|
||||
*
|
||||
* <p>
|
||||
* 行号要考虑注释行和引号包装的内容中的换行
|
||||
* </p>
|
||||
*
|
||||
* @return 一行数据
|
||||
* @throws IORuntimeException IO异常
|
||||
*/
|
||||
private List<String> readLine() throws IORuntimeException {
|
||||
// 矫正行号
|
||||
// 当一行内容包含多行数据时,记录首行行号,但是读取下一行时,需要把多行内容的行数加上
|
||||
if(inQuotesLineCount > 0){
|
||||
this.lineNo += this.inQuotesLineCount;
|
||||
this.inQuotesLineCount = 0;
|
||||
}
|
||||
|
||||
final List<String> currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY);
|
||||
|
||||
final StrBuilder currentField = this.currentField;
|
||||
@ -211,6 +237,7 @@ public final class CsvParser implements Closeable, Serializable {
|
||||
if(inComment){
|
||||
if (c == CharUtil.CR || c == CharUtil.LF) {
|
||||
// 注释行以换行符为结尾
|
||||
lineNo++;
|
||||
inComment = false;
|
||||
}
|
||||
// 跳过注释行中的任何字符
|
||||
@ -225,9 +252,9 @@ public final class CsvParser implements Closeable, Serializable {
|
||||
// End of quoted text
|
||||
inQuotes = false;
|
||||
} else {
|
||||
// 新行
|
||||
if ((c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR) {
|
||||
lineNo++;
|
||||
// 字段内容中新行
|
||||
if (isLineEnd(c)) {
|
||||
inQuotesLineCount++;
|
||||
}
|
||||
}
|
||||
// 普通字段字符
|
||||
@ -280,6 +307,7 @@ public final class CsvParser implements Closeable, Serializable {
|
||||
// restore fields
|
||||
this.preChar = preChar;
|
||||
|
||||
lineNo++;
|
||||
return currentFields;
|
||||
}
|
||||
|
||||
@ -301,12 +329,24 @@ public final class CsvParser implements Closeable, Serializable {
|
||||
currentFields.add(field);
|
||||
}
|
||||
|
||||
/**
|
||||
* 是否行结束符
|
||||
* @param c 符号
|
||||
* @return 是否结束
|
||||
* @since 5.7.4
|
||||
*/
|
||||
private boolean isLineEnd(char c){
|
||||
return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR;
|
||||
}
|
||||
|
||||
/**
|
||||
* 内部Buffer
|
||||
*
|
||||
* @author looly
|
||||
*/
|
||||
private static class Buffer {
|
||||
private static class Buffer implements Serializable{
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
final char[] buf;
|
||||
|
||||
/**
|
||||
|
@ -17,6 +17,10 @@ public class CsvReadConfig extends CsvConfig implements Serializable {
|
||||
protected boolean skipEmptyRows = true;
|
||||
/** 每行字段个数不同时是否抛出异常,默认false */
|
||||
protected boolean errorOnDifferentFieldCount;
|
||||
/** 定义开始的行(包括),此处为原始文件行号 */
|
||||
protected long beginLineNo;
|
||||
/** 结束的行(包括),此处为原始文件行号 */
|
||||
protected long endLineNo = Long.MAX_VALUE-1;
|
||||
|
||||
/**
|
||||
* 默认配置
|
||||
@ -59,4 +63,28 @@ public class CsvReadConfig extends CsvConfig implements Serializable {
|
||||
this.errorOnDifferentFieldCount = errorOnDifferentFieldCount;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置开始的行(包括),默认0,此处为原始文件行号
|
||||
*
|
||||
* @param beginLineNo 开始的行号(包括)
|
||||
* @return this
|
||||
* @since 5.7.4
|
||||
*/
|
||||
public CsvReadConfig setBeginLineNo(long beginLineNo) {
|
||||
this.beginLineNo = beginLineNo;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置结束的行(包括),默认不限制,此处为原始文件行号
|
||||
*
|
||||
* @param endLineNo 结束的行号(包括)
|
||||
* @return this
|
||||
* @since 5.7.4
|
||||
*/
|
||||
public CsvReadConfig setEndLineNo(long endLineNo) {
|
||||
this.endLineNo = endLineNo;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
package cn.hutool.core.text.csv;
|
||||
|
||||
import cn.hutool.core.bean.BeanUtil;
|
||||
import cn.hutool.core.lang.Assert;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
@ -30,14 +31,14 @@ public final class CsvRow implements List<String> {
|
||||
* @param fields 数据列表
|
||||
*/
|
||||
public CsvRow(final long originalLineNumber, final Map<String, Integer> headerMap, final List<String> fields) {
|
||||
|
||||
Assert.notNull(fields, "fields must be not null!");
|
||||
this.originalLineNumber = originalLineNumber;
|
||||
this.headerMap = headerMap;
|
||||
this.fields = fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取原始行号,多行情况下为首行行号。
|
||||
* 获取原始行号,多行情况下为首行行号。忽略注释行
|
||||
*
|
||||
* @return the original line number 行号
|
||||
*/
|
||||
|
@ -1,6 +1,7 @@
|
||||
package cn.hutool.core.text.csv;
|
||||
|
||||
import cn.hutool.core.annotation.Alias;
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.io.FileUtil;
|
||||
import cn.hutool.core.io.resource.ResourceUtil;
|
||||
import cn.hutool.core.lang.Console;
|
||||
@ -19,6 +20,7 @@ public class CsvReaderTest {
|
||||
CsvReader reader = new CsvReader();
|
||||
CsvData data = reader.read(ResourceUtil.getReader("test.csv", CharsetUtil.CHARSET_UTF_8));
|
||||
Assert.assertEquals("sss,sss", data.getRow(0).get(0));
|
||||
Assert.assertEquals(1, data.getRow(0).getOriginalLineNumber());
|
||||
Assert.assertEquals("性别", data.getRow(0).get(2));
|
||||
Assert.assertEquals("关注\"对象\"", data.getRow(0).get(3));
|
||||
}
|
||||
@ -97,4 +99,50 @@ public class CsvReaderTest {
|
||||
Console.log(row.getByName("案件ID"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void lineNoTest(){
|
||||
CsvReader reader = new CsvReader();
|
||||
CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8));
|
||||
Assert.assertEquals(1, data.getRow(0).getOriginalLineNumber());
|
||||
Assert.assertEquals("a,b,c,d", CollUtil.join(data.getRow(0), ","));
|
||||
|
||||
Assert.assertEquals(4, data.getRow(2).getOriginalLineNumber());
|
||||
Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容", CollUtil.join(data.getRow(2), ","));
|
||||
|
||||
// 文件中第3行数据,对应原始行号是6(从0开始)
|
||||
Assert.assertEquals(6, data.getRow(3).getOriginalLineNumber());
|
||||
Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(3), ","));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void lineLimitTest(){
|
||||
// 从原始第2行开始读取
|
||||
CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2));
|
||||
CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8));
|
||||
|
||||
Assert.assertEquals(2, data.getRow(0).getOriginalLineNumber());
|
||||
Assert.assertEquals("1,2,3,4", CollUtil.join(data.getRow(0), ","));
|
||||
|
||||
Assert.assertEquals(4, data.getRow(1).getOriginalLineNumber());
|
||||
Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容", CollUtil.join(data.getRow(1), ","));
|
||||
|
||||
// 文件中第3行数据,对应原始行号是6(从0开始)
|
||||
Assert.assertEquals(6, data.getRow(2).getOriginalLineNumber());
|
||||
Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(2), ","));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void lineLimitWithHeaderTest(){
|
||||
// 从原始第2行开始读取
|
||||
CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2).setContainsHeader(true));
|
||||
CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8));
|
||||
|
||||
Assert.assertEquals(4, data.getRow(0).getOriginalLineNumber());
|
||||
Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容", CollUtil.join(data.getRow(0), ","));
|
||||
|
||||
// 文件中第3行数据,对应原始行号是6(从0开始)
|
||||
Assert.assertEquals(6, data.getRow(1).getOriginalLineNumber());
|
||||
Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(1), ","));
|
||||
}
|
||||
}
|
||||
|
@ -1,2 +1,2 @@
|
||||
# 这是一行注释,读取时应忽略
|
||||
"sss,sss",姓名,"性别",关注"对象",年龄,"","""
|
||||
"sss,sss",姓名,"性别",关注"对象",年龄,"","""
|
||||
|
Can't render this file because it contains an unexpected character in line 2 and column 33.
|
7
hutool-core/src/test/resources/test_lines.csv
Executable file
7
hutool-core/src/test/resources/test_lines.csv
Executable file
@ -0,0 +1,7 @@
|
||||
# 这是一行注释,读取时应忽略
|
||||
a,b,c,d
|
||||
1,2,3,4
|
||||
# 这是一行注释,读取时应忽略
|
||||
q,w,e,r,"我是一段
|
||||
带换行的内容"
|
||||
a,s,d,f
|
Can't render this file because it has a wrong number of fields in line 2.
|
Loading…
x
Reference in New Issue
Block a user