mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-05-09 23:51:34 +08:00
csv support beginLineNo
This commit is contained in:
parent
d68cc83b7d
commit
9fd7c02c86
@ -15,6 +15,7 @@
|
|||||||
* 【setting】 Props增加toProperties方法(issue#1701@Github)
|
* 【setting】 Props增加toProperties方法(issue#1701@Github)
|
||||||
* 【http 】 UserAgent增加getOsVersion方法(issue#I3YZUQ@Gitee)
|
* 【http 】 UserAgent增加getOsVersion方法(issue#I3YZUQ@Gitee)
|
||||||
* 【jwt 】 JWT增加validate方法(issue#I3YDM4@Gitee)
|
* 【jwt 】 JWT增加validate方法(issue#I3YDM4@Gitee)
|
||||||
|
* 【core 】 CscReader支持指定读取开始行号和结束行号(issue#I3ZMZL@Gitee)
|
||||||
|
|
||||||
### 🐞Bug修复
|
### 🐞Bug修复
|
||||||
* 【core 】 修复RadixUtil.decode非static问题(issue#I3YPEH@Gitee)
|
* 【core 】 修复RadixUtil.decode非static问题(issue#I3YPEH@Gitee)
|
||||||
|
@ -69,4 +69,12 @@ public class CsvData implements Iterable<CsvRow>, Serializable {
|
|||||||
public Iterator<CsvRow> iterator() {
|
public Iterator<CsvRow> iterator() {
|
||||||
return this.rows.iterator();
|
return this.rows.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "CsvData{" +
|
||||||
|
"header=" + header +
|
||||||
|
", rows=" + rows +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -52,7 +52,11 @@ public final class CsvParser implements Closeable, Serializable {
|
|||||||
/**
|
/**
|
||||||
* 当前行号
|
* 当前行号
|
||||||
*/
|
*/
|
||||||
private long lineNo;
|
private long lineNo = -1;
|
||||||
|
/**
|
||||||
|
* 引号内的行数
|
||||||
|
*/
|
||||||
|
private long inQuotesLineCount;
|
||||||
/**
|
/**
|
||||||
* 第一行字段数,用于检查每行字段数是否一致
|
* 第一行字段数,用于检查每行字段数是否一致
|
||||||
*/
|
*/
|
||||||
@ -87,7 +91,7 @@ public final class CsvParser implements Closeable, Serializable {
|
|||||||
if (false == config.containsHeader) {
|
if (false == config.containsHeader) {
|
||||||
throw new IllegalStateException("No header available - header parsing is disabled");
|
throw new IllegalStateException("No header available - header parsing is disabled");
|
||||||
}
|
}
|
||||||
if (lineNo == 0) {
|
if (lineNo < config.beginLineNo) {
|
||||||
throw new IllegalStateException("No header available - call nextRow() first");
|
throw new IllegalStateException("No header available - call nextRow() first");
|
||||||
}
|
}
|
||||||
return header.fields;
|
return header.fields;
|
||||||
@ -100,25 +104,35 @@ public final class CsvParser implements Closeable, Serializable {
|
|||||||
* @throws IORuntimeException IO读取异常
|
* @throws IORuntimeException IO读取异常
|
||||||
*/
|
*/
|
||||||
public CsvRow nextRow() throws IORuntimeException {
|
public CsvRow nextRow() throws IORuntimeException {
|
||||||
long startingLineNo;
|
|
||||||
List<String> currentFields;
|
List<String> currentFields;
|
||||||
int fieldCount;
|
int fieldCount;
|
||||||
while (false == finished) {
|
while (false == finished) {
|
||||||
startingLineNo = ++lineNo;
|
|
||||||
currentFields = readLine();
|
currentFields = readLine();
|
||||||
fieldCount = currentFields.size();
|
fieldCount = currentFields.size();
|
||||||
if (fieldCount < 1) {
|
if (fieldCount < 1) {
|
||||||
|
// 空List表示读取结束
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 读取范围校验
|
||||||
|
if(lineNo < config.beginLineNo){
|
||||||
|
// 未达到读取起始行,继续
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(lineNo > config.endLineNo){
|
||||||
|
// 超出结束行,读取结束
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 跳过空行
|
// 跳过空行
|
||||||
if (config.skipEmptyRows && fieldCount == 1 && currentFields.get(0).isEmpty()) {
|
if (config.skipEmptyRows && fieldCount == 1 && currentFields.get(0).isEmpty()) {
|
||||||
|
// [""]表示空行
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 检查每行的字段数是否一致
|
// 检查每行的字段数是否一致
|
||||||
if (config.errorOnDifferentFieldCount) {
|
if (config.errorOnDifferentFieldCount) {
|
||||||
if (firstLineFieldCount == -1) {
|
if (firstLineFieldCount < 0) {
|
||||||
firstLineFieldCount = fieldCount;
|
firstLineFieldCount = fieldCount;
|
||||||
} else if (fieldCount != firstLineFieldCount) {
|
} else if (fieldCount != firstLineFieldCount) {
|
||||||
throw new IORuntimeException(String.format("Line %d has %d fields, but first line has %d fields", lineNo, fieldCount, firstLineFieldCount));
|
throw new IORuntimeException(String.format("Line %d has %d fields, but first line has %d fields", lineNo, fieldCount, firstLineFieldCount));
|
||||||
@ -137,7 +151,7 @@ public final class CsvParser implements Closeable, Serializable {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new CsvRow(startingLineNo, null == header ? null : header.headerMap, currentFields);
|
return new CsvRow(lineNo, null == header ? null : header.headerMap, currentFields);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
@ -161,12 +175,24 @@ public final class CsvParser implements Closeable, Serializable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 读取一行数据
|
* 读取一行数据,如果读取结束,返回size为0的List<br>
|
||||||
|
* 空行是size为1的List,唯一元素是""
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* 行号要考虑注释行和引号包装的内容中的换行
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* @return 一行数据
|
* @return 一行数据
|
||||||
* @throws IORuntimeException IO异常
|
* @throws IORuntimeException IO异常
|
||||||
*/
|
*/
|
||||||
private List<String> readLine() throws IORuntimeException {
|
private List<String> readLine() throws IORuntimeException {
|
||||||
|
// 矫正行号
|
||||||
|
// 当一行内容包含多行数据时,记录首行行号,但是读取下一行时,需要把多行内容的行数加上
|
||||||
|
if(inQuotesLineCount > 0){
|
||||||
|
this.lineNo += this.inQuotesLineCount;
|
||||||
|
this.inQuotesLineCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
final List<String> currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY);
|
final List<String> currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY);
|
||||||
|
|
||||||
final StrBuilder currentField = this.currentField;
|
final StrBuilder currentField = this.currentField;
|
||||||
@ -211,6 +237,7 @@ public final class CsvParser implements Closeable, Serializable {
|
|||||||
if(inComment){
|
if(inComment){
|
||||||
if (c == CharUtil.CR || c == CharUtil.LF) {
|
if (c == CharUtil.CR || c == CharUtil.LF) {
|
||||||
// 注释行以换行符为结尾
|
// 注释行以换行符为结尾
|
||||||
|
lineNo++;
|
||||||
inComment = false;
|
inComment = false;
|
||||||
}
|
}
|
||||||
// 跳过注释行中的任何字符
|
// 跳过注释行中的任何字符
|
||||||
@ -225,9 +252,9 @@ public final class CsvParser implements Closeable, Serializable {
|
|||||||
// End of quoted text
|
// End of quoted text
|
||||||
inQuotes = false;
|
inQuotes = false;
|
||||||
} else {
|
} else {
|
||||||
// 新行
|
// 字段内容中新行
|
||||||
if ((c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR) {
|
if (isLineEnd(c)) {
|
||||||
lineNo++;
|
inQuotesLineCount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 普通字段字符
|
// 普通字段字符
|
||||||
@ -280,6 +307,7 @@ public final class CsvParser implements Closeable, Serializable {
|
|||||||
// restore fields
|
// restore fields
|
||||||
this.preChar = preChar;
|
this.preChar = preChar;
|
||||||
|
|
||||||
|
lineNo++;
|
||||||
return currentFields;
|
return currentFields;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -301,12 +329,24 @@ public final class CsvParser implements Closeable, Serializable {
|
|||||||
currentFields.add(field);
|
currentFields.add(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 是否行结束符
|
||||||
|
* @param c 符号
|
||||||
|
* @return 是否结束
|
||||||
|
* @since 5.7.4
|
||||||
|
*/
|
||||||
|
private boolean isLineEnd(char c){
|
||||||
|
return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 内部Buffer
|
* 内部Buffer
|
||||||
*
|
*
|
||||||
* @author looly
|
* @author looly
|
||||||
*/
|
*/
|
||||||
private static class Buffer {
|
private static class Buffer implements Serializable{
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
final char[] buf;
|
final char[] buf;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -17,6 +17,10 @@ public class CsvReadConfig extends CsvConfig implements Serializable {
|
|||||||
protected boolean skipEmptyRows = true;
|
protected boolean skipEmptyRows = true;
|
||||||
/** 每行字段个数不同时是否抛出异常,默认false */
|
/** 每行字段个数不同时是否抛出异常,默认false */
|
||||||
protected boolean errorOnDifferentFieldCount;
|
protected boolean errorOnDifferentFieldCount;
|
||||||
|
/** 定义开始的行(包括),此处为原始文件行号 */
|
||||||
|
protected long beginLineNo;
|
||||||
|
/** 结束的行(包括),此处为原始文件行号 */
|
||||||
|
protected long endLineNo = Long.MAX_VALUE-1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 默认配置
|
* 默认配置
|
||||||
@ -59,4 +63,28 @@ public class CsvReadConfig extends CsvConfig implements Serializable {
|
|||||||
this.errorOnDifferentFieldCount = errorOnDifferentFieldCount;
|
this.errorOnDifferentFieldCount = errorOnDifferentFieldCount;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 设置开始的行(包括),默认0,此处为原始文件行号
|
||||||
|
*
|
||||||
|
* @param beginLineNo 开始的行号(包括)
|
||||||
|
* @return this
|
||||||
|
* @since 5.7.4
|
||||||
|
*/
|
||||||
|
public CsvReadConfig setBeginLineNo(long beginLineNo) {
|
||||||
|
this.beginLineNo = beginLineNo;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 设置结束的行(包括),默认不限制,此处为原始文件行号
|
||||||
|
*
|
||||||
|
* @param endLineNo 结束的行号(包括)
|
||||||
|
* @return this
|
||||||
|
* @since 5.7.4
|
||||||
|
*/
|
||||||
|
public CsvReadConfig setEndLineNo(long endLineNo) {
|
||||||
|
this.endLineNo = endLineNo;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package cn.hutool.core.text.csv;
|
package cn.hutool.core.text.csv;
|
||||||
|
|
||||||
import cn.hutool.core.bean.BeanUtil;
|
import cn.hutool.core.bean.BeanUtil;
|
||||||
|
import cn.hutool.core.lang.Assert;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
@ -30,14 +31,14 @@ public final class CsvRow implements List<String> {
|
|||||||
* @param fields 数据列表
|
* @param fields 数据列表
|
||||||
*/
|
*/
|
||||||
public CsvRow(final long originalLineNumber, final Map<String, Integer> headerMap, final List<String> fields) {
|
public CsvRow(final long originalLineNumber, final Map<String, Integer> headerMap, final List<String> fields) {
|
||||||
|
Assert.notNull(fields, "fields must be not null!");
|
||||||
this.originalLineNumber = originalLineNumber;
|
this.originalLineNumber = originalLineNumber;
|
||||||
this.headerMap = headerMap;
|
this.headerMap = headerMap;
|
||||||
this.fields = fields;
|
this.fields = fields;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 获取原始行号,多行情况下为首行行号。
|
* 获取原始行号,多行情况下为首行行号。忽略注释行
|
||||||
*
|
*
|
||||||
* @return the original line number 行号
|
* @return the original line number 行号
|
||||||
*/
|
*/
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package cn.hutool.core.text.csv;
|
package cn.hutool.core.text.csv;
|
||||||
|
|
||||||
import cn.hutool.core.annotation.Alias;
|
import cn.hutool.core.annotation.Alias;
|
||||||
|
import cn.hutool.core.collection.CollUtil;
|
||||||
import cn.hutool.core.io.FileUtil;
|
import cn.hutool.core.io.FileUtil;
|
||||||
import cn.hutool.core.io.resource.ResourceUtil;
|
import cn.hutool.core.io.resource.ResourceUtil;
|
||||||
import cn.hutool.core.lang.Console;
|
import cn.hutool.core.lang.Console;
|
||||||
@ -19,6 +20,7 @@ public class CsvReaderTest {
|
|||||||
CsvReader reader = new CsvReader();
|
CsvReader reader = new CsvReader();
|
||||||
CsvData data = reader.read(ResourceUtil.getReader("test.csv", CharsetUtil.CHARSET_UTF_8));
|
CsvData data = reader.read(ResourceUtil.getReader("test.csv", CharsetUtil.CHARSET_UTF_8));
|
||||||
Assert.assertEquals("sss,sss", data.getRow(0).get(0));
|
Assert.assertEquals("sss,sss", data.getRow(0).get(0));
|
||||||
|
Assert.assertEquals(1, data.getRow(0).getOriginalLineNumber());
|
||||||
Assert.assertEquals("性别", data.getRow(0).get(2));
|
Assert.assertEquals("性别", data.getRow(0).get(2));
|
||||||
Assert.assertEquals("关注\"对象\"", data.getRow(0).get(3));
|
Assert.assertEquals("关注\"对象\"", data.getRow(0).get(3));
|
||||||
}
|
}
|
||||||
@ -97,4 +99,50 @@ public class CsvReaderTest {
|
|||||||
Console.log(row.getByName("案件ID"));
|
Console.log(row.getByName("案件ID"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void lineNoTest(){
|
||||||
|
CsvReader reader = new CsvReader();
|
||||||
|
CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8));
|
||||||
|
Assert.assertEquals(1, data.getRow(0).getOriginalLineNumber());
|
||||||
|
Assert.assertEquals("a,b,c,d", CollUtil.join(data.getRow(0), ","));
|
||||||
|
|
||||||
|
Assert.assertEquals(4, data.getRow(2).getOriginalLineNumber());
|
||||||
|
Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容", CollUtil.join(data.getRow(2), ","));
|
||||||
|
|
||||||
|
// 文件中第3行数据,对应原始行号是6(从0开始)
|
||||||
|
Assert.assertEquals(6, data.getRow(3).getOriginalLineNumber());
|
||||||
|
Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(3), ","));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void lineLimitTest(){
|
||||||
|
// 从原始第2行开始读取
|
||||||
|
CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2));
|
||||||
|
CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8));
|
||||||
|
|
||||||
|
Assert.assertEquals(2, data.getRow(0).getOriginalLineNumber());
|
||||||
|
Assert.assertEquals("1,2,3,4", CollUtil.join(data.getRow(0), ","));
|
||||||
|
|
||||||
|
Assert.assertEquals(4, data.getRow(1).getOriginalLineNumber());
|
||||||
|
Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容", CollUtil.join(data.getRow(1), ","));
|
||||||
|
|
||||||
|
// 文件中第3行数据,对应原始行号是6(从0开始)
|
||||||
|
Assert.assertEquals(6, data.getRow(2).getOriginalLineNumber());
|
||||||
|
Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(2), ","));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void lineLimitWithHeaderTest(){
|
||||||
|
// 从原始第2行开始读取
|
||||||
|
CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2).setContainsHeader(true));
|
||||||
|
CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8));
|
||||||
|
|
||||||
|
Assert.assertEquals(4, data.getRow(0).getOriginalLineNumber());
|
||||||
|
Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容", CollUtil.join(data.getRow(0), ","));
|
||||||
|
|
||||||
|
// 文件中第3行数据,对应原始行号是6(从0开始)
|
||||||
|
Assert.assertEquals(6, data.getRow(1).getOriginalLineNumber());
|
||||||
|
Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(1), ","));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
# 这是一行注释,读取时应忽略
|
# 这是一行注释,读取时应忽略
|
||||||
"sss,sss",姓名,"性别",关注"对象",年龄,"","""
|
"sss,sss",姓名,"性别",关注"对象",年龄,"","""
|
||||||
|
Can't render this file because it contains an unexpected character in line 2 and column 33.
|
7
hutool-core/src/test/resources/test_lines.csv
Executable file
7
hutool-core/src/test/resources/test_lines.csv
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
# 这是一行注释,读取时应忽略
|
||||||
|
a,b,c,d
|
||||||
|
1,2,3,4
|
||||||
|
# 这是一行注释,读取时应忽略
|
||||||
|
q,w,e,r,"我是一段
|
||||||
|
带换行的内容"
|
||||||
|
a,s,d,f
|
Can't render this file because it has a wrong number of fields in line 2.
|
Loading…
x
Reference in New Issue
Block a user