mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-04-19 03:01:48 +08:00
修复双引号转义符转义错误问题,修改规则后,对非闭合双引号字段的策略变更,如"aa,则被识别为aa修复双引号转义符转义错误问题,修改规则后,对非闭合双引号字段的策略变更,如"aa,则被识别为aa
This commit is contained in:
parent
755aed01de
commit
c50625e215
@ -66,7 +66,7 @@ public class CsvBaseReader implements Serializable {
|
||||
* @param config 配置项
|
||||
*/
|
||||
public CsvBaseReader(final CsvReadConfig config) {
|
||||
this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::defaultConfig);
|
||||
this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::of);
|
||||
}
|
||||
//--------------------------------------------------------------------------------------------- Constructor end
|
||||
|
||||
|
@ -18,11 +18,10 @@ package org.dromara.hutool.poi.csv;
|
||||
|
||||
import org.dromara.hutool.core.collection.iter.ComputeIter;
|
||||
import org.dromara.hutool.core.io.IORuntimeException;
|
||||
import org.dromara.hutool.core.io.IoUtil;
|
||||
import org.dromara.hutool.core.map.MapUtil;
|
||||
import org.dromara.hutool.core.text.CharUtil;
|
||||
import org.dromara.hutool.core.text.StrTrimer;
|
||||
import org.dromara.hutool.core.text.StrUtil;
|
||||
import org.dromara.hutool.core.text.CharUtil;
|
||||
import org.dromara.hutool.core.util.ObjUtil;
|
||||
|
||||
import java.io.Closeable;
|
||||
@ -41,10 +40,8 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
|
||||
private static final int DEFAULT_ROW_CAPACITY = 10;
|
||||
|
||||
private final Reader reader;
|
||||
private final CsvReadConfig config;
|
||||
|
||||
private final Buffer buf;
|
||||
private final CsvTokener tokener;
|
||||
/**
|
||||
* 前一个特殊分界字符
|
||||
*/
|
||||
@ -90,20 +87,8 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
* @param config 配置,null则为默认配置
|
||||
*/
|
||||
public CsvParser(final Reader reader, final CsvReadConfig config) {
|
||||
this(reader, config, IoUtil.DEFAULT_LARGE_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* CSV解析器
|
||||
*
|
||||
* @param reader Reader
|
||||
* @param config 配置,null则为默认配置
|
||||
* @param bufferSize 默认缓存大小
|
||||
*/
|
||||
public CsvParser(final Reader reader, final CsvReadConfig config, final int bufferSize) {
|
||||
this.reader = Objects.requireNonNull(reader, "reader must not be null");
|
||||
this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::defaultConfig);
|
||||
this.buf = new Buffer(bufferSize);
|
||||
this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::of);
|
||||
this.tokener = new CsvTokener(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -130,7 +115,7 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
/**
|
||||
* 读取下一行数据
|
||||
*
|
||||
* @return CsvRow
|
||||
* @return CsvRow,{@code null}表示
|
||||
* @throws IORuntimeException IO读取异常
|
||||
*/
|
||||
public CsvRow nextRow() throws IORuntimeException {
|
||||
@ -230,36 +215,28 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
final List<String> currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY);
|
||||
|
||||
final StringBuilder currentField = this.currentField;
|
||||
final Buffer buf = this.buf;
|
||||
int preChar = this.preChar;//前一个特殊分界字符
|
||||
int copyLen = 0; //拷贝长度
|
||||
boolean inComment = false;
|
||||
|
||||
int c;
|
||||
while (true) {
|
||||
if (!buf.hasRemaining()) {
|
||||
// 此Buffer读取结束,开始读取下一段
|
||||
if (copyLen > 0) {
|
||||
buf.appendTo(currentField, copyLen);
|
||||
// 此处无需mark,read方法会重置mark
|
||||
}
|
||||
if (buf.read(this.reader) < 0) {
|
||||
// CSV读取结束
|
||||
finished = true;
|
||||
|
||||
if (currentField.length() > 0 || preChar == config.fieldSeparator) {
|
||||
//剩余部分作为一个字段
|
||||
addField(currentFields, currentField.toString());
|
||||
currentField.setLength(0);
|
||||
c = tokener.next();
|
||||
if(c < 0){
|
||||
if (currentField.length() > 0 || preChar == config.fieldSeparator) {
|
||||
if(this.inQuotes){
|
||||
// 未闭合的文本包装,在末尾补充包装符
|
||||
currentField.append(config.textDelimiter);
|
||||
}
|
||||
break;
|
||||
|
||||
//剩余部分作为一个字段
|
||||
addField(currentFields, currentField.toString());
|
||||
currentField.setLength(0);
|
||||
}
|
||||
|
||||
//重置
|
||||
copyLen = 0;
|
||||
// 读取结束
|
||||
this.finished = true;
|
||||
break;
|
||||
}
|
||||
|
||||
final char c = buf.get();
|
||||
|
||||
// 注释行标记
|
||||
if (preChar < 0 || preChar == CharUtil.CR || preChar == CharUtil.LF) {
|
||||
// 判断行首字符为指定注释字符的注释开始,直到遇到换行符
|
||||
@ -277,16 +254,20 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
inComment = false;
|
||||
}
|
||||
// 跳过注释行中的任何字符
|
||||
buf.mark();
|
||||
preChar = c;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inQuotes) {
|
||||
//引号内,作为内容,直到引号结束
|
||||
if (c == config.textDelimiter) {
|
||||
// End of quoted text
|
||||
inQuotes = false;
|
||||
// issue#IB5UQ8 文本包装符转义
|
||||
final int next = tokener.next();
|
||||
if(next != config.textDelimiter){
|
||||
// 包装结束
|
||||
inQuotes = false;
|
||||
tokener.back();
|
||||
}
|
||||
// https://datatracker.ietf.org/doc/html/rfc4180#section-2 跳过转义符,只保留被转义的包装符
|
||||
} else {
|
||||
// 字段内容中新行
|
||||
if (isLineEnd(c, preChar)) {
|
||||
@ -294,28 +275,19 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
}
|
||||
}
|
||||
// 普通字段字符
|
||||
copyLen++;
|
||||
currentField.append((char)c);
|
||||
} else {
|
||||
// 非引号内
|
||||
if (c == config.fieldSeparator) {
|
||||
//一个字段结束
|
||||
if (copyLen > 0) {
|
||||
buf.appendTo(currentField, copyLen);
|
||||
copyLen = 0;
|
||||
}
|
||||
buf.mark();
|
||||
addField(currentFields, currentField.toString());
|
||||
currentField.setLength(0);
|
||||
} else if (c == config.textDelimiter && isFieldBegin(preChar)) {
|
||||
// 引号开始且出现在字段开头
|
||||
inQuotes = true;
|
||||
copyLen++;
|
||||
currentField.append((char)c);
|
||||
} else if (c == CharUtil.CR) {
|
||||
// \r,直接结束
|
||||
if (copyLen > 0) {
|
||||
buf.appendTo(currentField, copyLen);
|
||||
}
|
||||
buf.mark();
|
||||
// \r
|
||||
addField(currentFields, currentField.toString());
|
||||
currentField.setLength(0);
|
||||
preChar = c;
|
||||
@ -323,20 +295,14 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
} else if (c == CharUtil.LF) {
|
||||
// \n
|
||||
if (preChar != CharUtil.CR) {
|
||||
if (copyLen > 0) {
|
||||
buf.appendTo(currentField, copyLen);
|
||||
}
|
||||
buf.mark();
|
||||
addField(currentFields, currentField.toString());
|
||||
currentField.setLength(0);
|
||||
preChar = c;
|
||||
break;
|
||||
}
|
||||
// 前一个字符是\r,已经处理过这个字段了,此处直接跳过
|
||||
buf.mark();
|
||||
} else {
|
||||
// 普通字符
|
||||
copyLen++;
|
||||
currentField.append((char)c);
|
||||
}
|
||||
}
|
||||
|
||||
@ -352,7 +318,7 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
tokener.close();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -369,9 +335,6 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
|
||||
if(StrUtil.isWrap(field, textDelimiter)){
|
||||
field = StrUtil.sub(field, 1, field.length() - 1);
|
||||
// https://datatracker.ietf.org/doc/html/rfc4180#section-2
|
||||
// 第七条规则,只有包装内的包装符需要转义
|
||||
field = StrUtil.replace(field, String.valueOf(textDelimiter) + textDelimiter, String.valueOf(textDelimiter));
|
||||
}
|
||||
if (this.config.trimField) {
|
||||
// issue#I49M0C@Gitee
|
||||
@ -388,7 +351,7 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
* @return 是否结束
|
||||
* @since 5.7.4
|
||||
*/
|
||||
private boolean isLineEnd(final char c, final int preChar) {
|
||||
private boolean isLineEnd(final int c, final int preChar) {
|
||||
return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR;
|
||||
}
|
||||
|
||||
@ -409,89 +372,4 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
||||
|| preChar == CharUtil.LF
|
||||
|| preChar == CharUtil.CR;
|
||||
}
|
||||
|
||||
/**
|
||||
* 内部Buffer
|
||||
*
|
||||
* @author looly
|
||||
*/
|
||||
private static class Buffer implements Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
final char[] buf;
|
||||
|
||||
/**
|
||||
* 标记位置,用于读数据
|
||||
*/
|
||||
private int mark;
|
||||
/**
|
||||
* 当前位置
|
||||
*/
|
||||
private int position;
|
||||
/**
|
||||
* 读取的数据长度,一般小于buf.length,-1表示无数据
|
||||
*/
|
||||
private int limit;
|
||||
|
||||
Buffer(final int capacity) {
|
||||
buf = new char[capacity];
|
||||
}
|
||||
|
||||
/**
|
||||
* 是否还有未读数据
|
||||
*
|
||||
* @return 是否还有未读数据
|
||||
*/
|
||||
public final boolean hasRemaining() {
|
||||
return position < limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* 读取到缓存<br>
|
||||
* 全量读取,会重置Buffer中所有数据
|
||||
*
|
||||
* @param reader {@link Reader}
|
||||
*/
|
||||
int read(final Reader reader) {
|
||||
final int length;
|
||||
try {
|
||||
length = reader.read(this.buf);
|
||||
} catch (final IOException e) {
|
||||
throw new IORuntimeException(e);
|
||||
}
|
||||
this.mark = 0;
|
||||
this.position = 0;
|
||||
this.limit = length;
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* 先获取当前字符,再将当前位置后移一位<br>
|
||||
* 此方法不检查是否到了数组末尾,请自行使用{@link #hasRemaining()}判断。
|
||||
*
|
||||
* @return 当前位置字符
|
||||
* @see #hasRemaining()
|
||||
*/
|
||||
char get() {
|
||||
return this.buf[this.position++];
|
||||
}
|
||||
|
||||
/**
|
||||
* 标记位置记为下次读取位置
|
||||
*/
|
||||
void mark() {
|
||||
this.mark = this.position;
|
||||
}
|
||||
|
||||
/**
|
||||
* 将数据追加到{@link StringBuilder},追加结束后需手动调用{@link #mark()} 重置读取位置
|
||||
*
|
||||
* @param builder {@link StringBuilder}
|
||||
* @param length 追加的长度
|
||||
* @see #mark()
|
||||
*/
|
||||
void appendTo(final StringBuilder builder, final int length) {
|
||||
builder.append(this.buf, this.mark, length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,362 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2024 Hutool Team and hutool.cn
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.dromara.hutool.poi.csv;
|
||||
|
||||
import org.dromara.hutool.core.collection.iter.ComputeIter;
|
||||
import org.dromara.hutool.core.io.IORuntimeException;
|
||||
import org.dromara.hutool.core.map.MapUtil;
|
||||
import org.dromara.hutool.core.text.CharUtil;
|
||||
import org.dromara.hutool.core.text.StrTrimer;
|
||||
import org.dromara.hutool.core.text.StrUtil;
|
||||
import org.dromara.hutool.core.util.ObjUtil;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* CSV行解析器,参考:FastCSV
|
||||
*
|
||||
* @author Looly
|
||||
*/
|
||||
public final class CsvParser2 extends ComputeIter<CsvRow> implements Closeable, Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private static final int DEFAULT_ROW_CAPACITY = 10;
|
||||
|
||||
private final CsvReadConfig config;
|
||||
private final CsvTokener tokener;
|
||||
/**
|
||||
* 前一个特殊分界字符
|
||||
*/
|
||||
private int preChar = -1;
|
||||
/**
|
||||
* 是否在引号包装内
|
||||
*/
|
||||
private boolean inQuotes;
|
||||
/**
|
||||
* 当前读取字段
|
||||
*/
|
||||
private final StringBuilder currentField = new StringBuilder(512);
|
||||
|
||||
/**
|
||||
* 标题行
|
||||
*/
|
||||
private CsvRow header;
|
||||
/**
|
||||
* 当前行号
|
||||
*/
|
||||
private long lineNo = -1;
|
||||
/**
|
||||
* 引号内的行数
|
||||
*/
|
||||
private long inQuotesLineCount;
|
||||
/**
|
||||
* 第一行字段数,用于检查每行字段数是否一致
|
||||
*/
|
||||
private int firstLineFieldCount = -1;
|
||||
/**
|
||||
* 最大字段数量,用于初始化行,减少扩容
|
||||
*/
|
||||
private int maxFieldCount;
|
||||
/**
|
||||
* 是否读取结束
|
||||
*/
|
||||
private boolean finished;
|
||||
|
||||
/**
|
||||
* CSV解析器
|
||||
*
|
||||
* @param reader Reader
|
||||
* @param config 配置,null则为默认配置
|
||||
*/
|
||||
public CsvParser2(final Reader reader, final CsvReadConfig config) {
|
||||
this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::defaultConfig);
|
||||
this.tokener = new CsvTokener(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取头部字段列表,如果headerLineNo < 0,抛出异常
|
||||
*
|
||||
* @return 头部列表
|
||||
* @throws IllegalStateException 如果不解析头部或者没有调用nextRow()方法
|
||||
*/
|
||||
public List<String> getHeader() {
|
||||
if (config.headerLineNo < 0) {
|
||||
throw new IllegalStateException("No header available - header parsing is disabled");
|
||||
}
|
||||
if (lineNo < config.beginLineNo) {
|
||||
throw new IllegalStateException("No header available - call nextRow() first");
|
||||
}
|
||||
return header.getRaw();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected CsvRow computeNext() {
|
||||
return nextRow();
|
||||
}
|
||||
|
||||
/**
|
||||
* 读取下一行数据
|
||||
*
|
||||
* @return CsvRow
|
||||
* @throws IORuntimeException IO读取异常
|
||||
*/
|
||||
public CsvRow nextRow() throws IORuntimeException {
|
||||
List<String> currentFields;
|
||||
int fieldCount;
|
||||
while (!finished) {
|
||||
currentFields = readLine();
|
||||
fieldCount = currentFields.size();
|
||||
if (fieldCount < 1) {
|
||||
// 空List表示读取结束
|
||||
break;
|
||||
}
|
||||
|
||||
// 读取范围校验
|
||||
if (lineNo < config.beginLineNo) {
|
||||
// 未达到读取起始行,继续
|
||||
continue;
|
||||
}
|
||||
if (lineNo > config.endLineNo) {
|
||||
// 超出结束行,读取结束
|
||||
break;
|
||||
}
|
||||
|
||||
// 跳过空行
|
||||
if (config.skipEmptyRows && fieldCount == 1 && currentFields.get(0).isEmpty()) {
|
||||
// [""]表示空行
|
||||
continue;
|
||||
}
|
||||
|
||||
// 检查每行的字段数是否一致
|
||||
if (config.errorOnDifferentFieldCount) {
|
||||
if (firstLineFieldCount < 0) {
|
||||
firstLineFieldCount = fieldCount;
|
||||
} else if (fieldCount != firstLineFieldCount) {
|
||||
throw new IORuntimeException(String.format("Line %d has %d fields, but first line has %d fields", lineNo, fieldCount, firstLineFieldCount));
|
||||
}
|
||||
}
|
||||
|
||||
// 记录最大字段数
|
||||
if (fieldCount > maxFieldCount) {
|
||||
maxFieldCount = fieldCount;
|
||||
}
|
||||
|
||||
//初始化标题
|
||||
if (lineNo == config.headerLineNo && null == header) {
|
||||
initHeader(currentFields);
|
||||
// 作为标题行后,此行跳过,下一行做为第一行
|
||||
continue;
|
||||
}
|
||||
|
||||
return new CsvRow(lineNo, null == header ? null : header.headerMap, currentFields);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 当前行做为标题行
|
||||
*
|
||||
* @param currentFields 当前行字段列表
|
||||
*/
|
||||
private void initHeader(final List<String> currentFields) {
|
||||
final Map<String, Integer> localHeaderMap = new LinkedHashMap<>(currentFields.size());
|
||||
for (int i = 0; i < currentFields.size(); i++) {
|
||||
String field = currentFields.get(i);
|
||||
if (MapUtil.isNotEmpty(this.config.headerAlias)) {
|
||||
// 自定义别名
|
||||
field = ObjUtil.defaultIfNull(this.config.headerAlias.get(field), field);
|
||||
}
|
||||
if (StrUtil.isNotEmpty(field) && !localHeaderMap.containsKey(field)) {
|
||||
localHeaderMap.put(field, i);
|
||||
}
|
||||
}
|
||||
|
||||
header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields));
|
||||
}
|
||||
|
||||
/**
|
||||
* 读取一行数据,如果读取结束,返回size为0的List<br>
|
||||
* 空行是size为1的List,唯一元素是""
|
||||
*
|
||||
* <p>
|
||||
* 行号要考虑注释行和引号包装的内容中的换行
|
||||
* </p>
|
||||
*
|
||||
* @return 一行数据
|
||||
* @throws IORuntimeException IO异常
|
||||
*/
|
||||
private List<String> readLine() throws IORuntimeException {
|
||||
// 矫正行号
|
||||
// 当一行内容包含多行数据时,记录首行行号,但是读取下一行时,需要把多行内容的行数加上
|
||||
if (inQuotesLineCount > 0) {
|
||||
this.lineNo += this.inQuotesLineCount;
|
||||
this.inQuotesLineCount = 0;
|
||||
}
|
||||
|
||||
final List<String> currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY);
|
||||
|
||||
final StringBuilder currentField = this.currentField;
|
||||
int preChar = this.preChar;//前一个特殊分界字符
|
||||
boolean inComment = false;
|
||||
|
||||
int c;
|
||||
while (true) {
|
||||
c = tokener.next();
|
||||
if(c < 0){
|
||||
// 读取结束
|
||||
this.finished = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// 注释行标记
|
||||
if (preChar < 0 || preChar == CharUtil.CR || preChar == CharUtil.LF) {
|
||||
// 判断行首字符为指定注释字符的注释开始,直到遇到换行符
|
||||
// 行首分两种,1是preChar < 0表示文本开始,2是换行符后紧跟就是下一行的开始
|
||||
// issue#IA8WE0 如果注释符出现在包装符内,被认为是普通字符
|
||||
if (!inQuotes && null != this.config.commentCharacter && c == this.config.commentCharacter) {
|
||||
inComment = true;
|
||||
}
|
||||
}
|
||||
// 注释行处理
|
||||
if (inComment) {
|
||||
if (c == CharUtil.CR || c == CharUtil.LF) {
|
||||
// 注释行以换行符为结尾
|
||||
lineNo++;
|
||||
inComment = false;
|
||||
}
|
||||
// 跳过注释行中的任何字符
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inQuotes) {
|
||||
//引号内,作为内容,直到引号结束
|
||||
if (c == config.textDelimiter) {
|
||||
// End of quoted text
|
||||
inQuotes = false;
|
||||
} else {
|
||||
// 字段内容中新行
|
||||
if (isLineEnd(c, preChar)) {
|
||||
inQuotesLineCount++;
|
||||
}
|
||||
}
|
||||
// 普通字段字符
|
||||
currentField.append((char)c);
|
||||
} else {
|
||||
// 非引号内
|
||||
if (c == config.fieldSeparator) {
|
||||
//一个字段结束
|
||||
addField(currentFields, currentField.toString());
|
||||
currentField.setLength(0);
|
||||
} else if (c == config.textDelimiter && isFieldBegin(preChar)) {
|
||||
// 引号开始且出现在字段开头
|
||||
inQuotes = true;
|
||||
currentField.append((char)c);
|
||||
} else if (c == CharUtil.CR) {
|
||||
// \r
|
||||
addField(currentFields, currentField.toString());
|
||||
currentField.setLength(0);
|
||||
preChar = c;
|
||||
break;
|
||||
} else if (c == CharUtil.LF) {
|
||||
// \n
|
||||
if (preChar != CharUtil.CR) {
|
||||
addField(currentFields, currentField.toString());
|
||||
currentField.setLength(0);
|
||||
preChar = c;
|
||||
break;
|
||||
}
|
||||
// 前一个字符是\r,已经处理过这个字段了,此处直接跳过
|
||||
} else {
|
||||
currentField.append((char)c);
|
||||
}
|
||||
}
|
||||
|
||||
preChar = c;
|
||||
}
|
||||
|
||||
// restore fields
|
||||
this.preChar = preChar;
|
||||
|
||||
lineNo++;
|
||||
return currentFields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
tokener.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* 将字段加入字段列表并自动去包装和去转义
|
||||
*
|
||||
* @param currentFields 当前的字段列表(即为行)
|
||||
* @param field 字段
|
||||
*/
|
||||
private void addField(final List<String> currentFields, String field) {
|
||||
final char textDelimiter = this.config.textDelimiter;
|
||||
|
||||
// 忽略多余引号后的换行符
|
||||
field = StrUtil.trim(field, StrTrimer.TrimMode.SUFFIX, (c -> c == CharUtil.LF || c == CharUtil.CR));
|
||||
|
||||
if(StrUtil.isWrap(field, textDelimiter)){
|
||||
field = StrUtil.sub(field, 1, field.length() - 1);
|
||||
// https://datatracker.ietf.org/doc/html/rfc4180#section-2
|
||||
// 第七条规则,只有包装内的包装符需要转义
|
||||
field = StrUtil.replace(field, String.valueOf(textDelimiter) + textDelimiter, String.valueOf(textDelimiter));
|
||||
}
|
||||
if (this.config.trimField) {
|
||||
// issue#I49M0C@Gitee
|
||||
field = StrUtil.trim(field);
|
||||
}
|
||||
currentFields.add(field);
|
||||
}
|
||||
|
||||
/**
|
||||
* 是否行结束符
|
||||
*
|
||||
* @param c 符号
|
||||
* @param preChar 前一个字符
|
||||
* @return 是否结束
|
||||
* @since 5.7.4
|
||||
*/
|
||||
private boolean isLineEnd(final int c, final int preChar) {
|
||||
return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR;
|
||||
}
|
||||
|
||||
/**
|
||||
* 通过前一个字符,判断是否字段开始,几种情况:
|
||||
* <ul>
|
||||
* <li>正文开头,无前字符</li>
|
||||
* <li>字段分隔符,即上个字段结束</li>
|
||||
* <li>换行符,即新行开始</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param preChar 前字符
|
||||
* @return 是否字段开始
|
||||
*/
|
||||
private boolean isFieldBegin(final int preChar) {
|
||||
return preChar == -1
|
||||
|| preChar == config.fieldSeparator
|
||||
|| preChar == CharUtil.LF
|
||||
|| preChar == CharUtil.CR;
|
||||
}
|
||||
}
|
@ -45,7 +45,7 @@ public class CsvReadConfig extends CsvConfig<CsvReadConfig> implements Serializa
|
||||
*
|
||||
* @return 默认配置
|
||||
*/
|
||||
public static CsvReadConfig defaultConfig() {
|
||||
public static CsvReadConfig of() {
|
||||
return new CsvReadConfig();
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,12 @@ import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* CSV解析器,用于解析CSV文件
|
||||
*
|
||||
* @author looly
|
||||
* @since 5.8.0
|
||||
*/
|
||||
public class CsvTokener extends SimpleWrapper<Reader> implements Closeable {
|
||||
|
||||
/**
|
||||
@ -45,7 +51,7 @@ public class CsvTokener extends SimpleWrapper<Reader> implements Closeable {
|
||||
* @param reader {@link Reader}
|
||||
*/
|
||||
public CsvTokener(final Reader reader) {
|
||||
super(reader);
|
||||
super(IoUtil.toBuffered(reader));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -56,12 +62,12 @@ public class CsvTokener extends SimpleWrapper<Reader> implements Closeable {
|
||||
public int next() {
|
||||
if(this.usePrev){
|
||||
this.usePrev = false;
|
||||
return this.prev;
|
||||
}
|
||||
try {
|
||||
this.prev = this.raw.read();
|
||||
} catch (final IOException e) {
|
||||
throw new IORuntimeException(e);
|
||||
}else{
|
||||
try {
|
||||
this.prev = this.raw.read();
|
||||
} catch (final IOException e) {
|
||||
throw new IORuntimeException(e);
|
||||
}
|
||||
}
|
||||
this.index++;
|
||||
return this.prev;
|
||||
@ -80,6 +86,15 @@ public class CsvTokener extends SimpleWrapper<Reader> implements Closeable {
|
||||
this.usePrev = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取当前位置
|
||||
*
|
||||
* @return 位置
|
||||
*/
|
||||
public long getIndex() {
|
||||
return this.index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IoUtil.nullSafeClose(this.raw);
|
||||
|
@ -287,9 +287,9 @@ public class ExcelSaxUtil {
|
||||
return null;
|
||||
}
|
||||
|
||||
// issue#IB0EJ9 可能精度丢失
|
||||
// issue#IB0EJ9 可能精度丢失,对含有小数的value判断并转为BigDecimal
|
||||
final double number = Double.parseDouble(value);
|
||||
if(false == value.equals(Double.toString(number))){
|
||||
if(StrUtil.contains(value, CharUtil.DOT) && !value.equals(Double.toString(number))){
|
||||
// 精度丢失
|
||||
return NumberUtil.toBigDecimal(value);
|
||||
}
|
||||
|
@ -17,7 +17,6 @@
|
||||
package org.dromara.hutool.poi.csv;
|
||||
|
||||
import org.dromara.hutool.core.io.IoUtil;
|
||||
import org.dromara.hutool.core.lang.Console;
|
||||
import org.dromara.hutool.core.text.StrUtil;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
@ -29,7 +28,7 @@ public class CsvParserTest {
|
||||
@Test
|
||||
public void parseTest1() {
|
||||
final StringReader reader = StrUtil.getReader("aaa,b\"bba\",ccc");
|
||||
final CsvParser2 parser = new CsvParser2(reader, null);
|
||||
final CsvParser parser = new CsvParser(reader, null);
|
||||
final CsvRow row = parser.nextRow();
|
||||
//noinspection ConstantConditions
|
||||
Assertions.assertEquals("b\"bba\"", row.getRaw().get(1));
|
||||
@ -39,7 +38,7 @@ public class CsvParserTest {
|
||||
@Test
|
||||
public void parseTest2() {
|
||||
final StringReader reader = StrUtil.getReader("aaa,\"bba\"bbb,ccc");
|
||||
final CsvParser2 parser = new CsvParser2(reader, null);
|
||||
final CsvParser parser = new CsvParser(reader, null);
|
||||
final CsvRow row = parser.nextRow();
|
||||
//noinspection ConstantConditions
|
||||
Assertions.assertEquals("\"bba\"bbb", row.getRaw().get(1));
|
||||
@ -49,7 +48,7 @@ public class CsvParserTest {
|
||||
@Test
|
||||
public void parseTest3() {
|
||||
final StringReader reader = StrUtil.getReader("aaa,\"bba\",ccc");
|
||||
final CsvParser2 parser = new CsvParser2(reader, null);
|
||||
final CsvParser parser = new CsvParser(reader, null);
|
||||
final CsvRow row = parser.nextRow();
|
||||
//noinspection ConstantConditions
|
||||
Assertions.assertEquals("bba", row.getRaw().get(1));
|
||||
@ -59,7 +58,7 @@ public class CsvParserTest {
|
||||
@Test
|
||||
public void parseTest4() {
|
||||
final StringReader reader = StrUtil.getReader("aaa,\"\",ccc");
|
||||
final CsvParser2 parser = new CsvParser2(reader, null);
|
||||
final CsvParser parser = new CsvParser(reader, null);
|
||||
final CsvRow row = parser.nextRow();
|
||||
//noinspection ConstantConditions
|
||||
Assertions.assertEquals("", row.getRaw().get(1));
|
||||
@ -80,9 +79,36 @@ public class CsvParserTest {
|
||||
|
||||
@Test
|
||||
void issueIB5UQ8Test() {
|
||||
String csv = "\"Consultancy, 10\"\",, food\"";
|
||||
final String csv = "\"Consultancy, 10\"\",, food\"";
|
||||
final CsvReader reader = CsvUtil.getReader(new StringReader(csv));
|
||||
final String s = reader.read().getRow(0).get(0);
|
||||
Console.log(s);
|
||||
Assertions.assertEquals("Consultancy, 10\",, food", s);
|
||||
}
|
||||
|
||||
@Test
|
||||
void textDelimiterAtEndTest() {
|
||||
final String csv = "\"Consultancy, 10\"";
|
||||
final CsvReader reader = CsvUtil.getReader(new StringReader(csv));
|
||||
final String s = reader.read().getRow(0).get(0);
|
||||
Assertions.assertEquals("Consultancy, 10", s);
|
||||
}
|
||||
|
||||
@Test
|
||||
void textDelimiterUncloseTest() {
|
||||
// 未闭合的文本包装符,文本结尾自动结束,文本包装符
|
||||
final String csv = "\"Consultancy,";
|
||||
final CsvReader reader = CsvUtil.getReader(new StringReader(csv));
|
||||
final String s = reader.read().getRow(0).get(0);
|
||||
Assertions.assertEquals("Consultancy,", s);
|
||||
}
|
||||
|
||||
@Test
|
||||
void textDelimiterOfCount3Test() {
|
||||
// 未闭合的文本包装符,文本结尾自动结束,文本包装符
|
||||
final String csv = "\"\"\"";
|
||||
final CsvParser csvParser = new CsvParser(new StringReader(csv), CsvReadConfig.of().setSkipEmptyRows(false));
|
||||
final CsvRow row = csvParser.nextRow();
|
||||
Assertions.assertNotNull(row);
|
||||
Assertions.assertEquals("\"", row.get(0));
|
||||
}
|
||||
}
|
||||
|
@ -68,7 +68,7 @@ public class CsvReaderTest {
|
||||
|
||||
@Test
|
||||
public void readAliasMapListTest() {
|
||||
final CsvReadConfig csvReadConfig = CsvReadConfig.defaultConfig();
|
||||
final CsvReadConfig csvReadConfig = CsvReadConfig.of();
|
||||
csvReadConfig.addHeaderAlias("姓名", "name");
|
||||
|
||||
final CsvReader reader = CsvUtil.getReader(csvReadConfig);
|
||||
@ -135,7 +135,7 @@ public class CsvReaderTest {
|
||||
@Test
|
||||
@Disabled
|
||||
public void readTest3() {
|
||||
final CsvReadConfig csvReadConfig = CsvReadConfig.defaultConfig();
|
||||
final CsvReadConfig csvReadConfig = CsvReadConfig.of();
|
||||
csvReadConfig.setContainsHeader(true);
|
||||
final CsvReader reader = CsvUtil.getReader(csvReadConfig);
|
||||
final CsvData read = reader.read(FileUtil.file("d:/test/ceshi.csv"));
|
||||
@ -164,7 +164,7 @@ public class CsvReaderTest {
|
||||
@Test
|
||||
public void lineLimitTest() {
|
||||
// 从原始第2行开始读取
|
||||
final CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2));
|
||||
final CsvReader reader = new CsvReader(CsvReadConfig.of().setBeginLineNo(2));
|
||||
final CsvData data = reader.read(
|
||||
ResourceUtil.getUtf8Reader("test_lines.csv"), true);
|
||||
|
||||
@ -183,7 +183,7 @@ public class CsvReaderTest {
|
||||
@Test
|
||||
public void lineLimitWithHeaderTest() {
|
||||
// 从原始第2行开始读取
|
||||
final CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2).setContainsHeader(true));
|
||||
final CsvReader reader = new CsvReader(CsvReadConfig.of().setBeginLineNo(2).setContainsHeader(true));
|
||||
final CsvData data = reader.read(
|
||||
ResourceUtil.getUtf8Reader("test_lines.csv"), true);
|
||||
|
||||
@ -199,7 +199,7 @@ public class CsvReaderTest {
|
||||
@Test
|
||||
public void customConfigTest() {
|
||||
final CsvReader reader = CsvUtil.getReader(
|
||||
CsvReadConfig.defaultConfig()
|
||||
CsvReadConfig.of()
|
||||
.setTextDelimiter('\'')
|
||||
.setFieldSeparator(';'));
|
||||
final CsvData csvRows = reader.readFromStr("123;456;'789;0'abc;");
|
||||
@ -211,7 +211,7 @@ public class CsvReaderTest {
|
||||
|
||||
@Test
|
||||
public void readDisableCommentTest() {
|
||||
final CsvReader reader = CsvUtil.getReader(CsvReadConfig.defaultConfig().disableComment());
|
||||
final CsvReader reader = CsvUtil.getReader(CsvReadConfig.of().disableComment());
|
||||
final CsvData read = reader.read(
|
||||
ResourceUtil.getUtf8Reader("test.csv"), true);
|
||||
final CsvRow row = read.getRow(0);
|
||||
|
@ -48,11 +48,12 @@ public class CsvUtilTest {
|
||||
Assertions.assertEquals("关注\"对象\"", row0.get(3));
|
||||
Assertions.assertEquals("年龄", row0.get(4));
|
||||
Assertions.assertEquals("", row0.get(5));
|
||||
Assertions.assertEquals("\"", row0.get(6));
|
||||
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
|
||||
Assertions.assertEquals("\"\n", row0.get(6));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readTest2() {
|
||||
public void readUseConsumerTest() {
|
||||
final CsvReader reader = CsvUtil.getReader();
|
||||
reader.read(FileUtil.getUtf8Reader("test.csv"), true, (csvRow)-> {
|
||||
// 只有一行,所以直接判断
|
||||
@ -62,7 +63,8 @@ public class CsvUtilTest {
|
||||
Assertions.assertEquals("关注\"对象\"", csvRow.get(3));
|
||||
Assertions.assertEquals("年龄", csvRow.get(4));
|
||||
Assertions.assertEquals("", csvRow.get(5));
|
||||
Assertions.assertEquals("\"", csvRow.get(6));
|
||||
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
|
||||
Assertions.assertEquals("\"\n", csvRow.get(6));
|
||||
});
|
||||
}
|
||||
|
||||
@ -75,7 +77,7 @@ public class CsvUtilTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readCsvStr1(){
|
||||
public void readCsvStr1WithUncloseTest(){
|
||||
final CsvData data = CsvUtil.getReader().readFromStr("# 这是一行注释,读取时应忽略\n" +
|
||||
"\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n");
|
||||
final List<CsvRow> rows = data.getRows();
|
||||
@ -86,11 +88,28 @@ public class CsvUtilTest {
|
||||
Assertions.assertEquals("关注\"对象\"", row0.get(3));
|
||||
Assertions.assertEquals("年龄", row0.get(4));
|
||||
Assertions.assertEquals("", row0.get(5));
|
||||
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
|
||||
Assertions.assertEquals("\"\n", row0.get(6));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readCsvStr1WithUncloseTrimTest(){
|
||||
final CsvData data = CsvUtil.getReader(CsvReadConfig.of().setTrimField(true))
|
||||
.readFromStr("# 这是一行注释,读取时应忽略\n" +
|
||||
"\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n");
|
||||
final List<CsvRow> rows = data.getRows();
|
||||
final CsvRow row0 = rows.get(0);
|
||||
Assertions.assertEquals("sss,sss", row0.get(0));
|
||||
Assertions.assertEquals("姓名", row0.get(1));
|
||||
Assertions.assertEquals("性别", row0.get(2));
|
||||
Assertions.assertEquals("关注\"对象\"", row0.get(3));
|
||||
Assertions.assertEquals("年龄", row0.get(4));
|
||||
Assertions.assertEquals("", row0.get(5));
|
||||
Assertions.assertEquals("\"", row0.get(6));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readCsvStr2(){
|
||||
public void readCsvStrUseConsumerTest(){
|
||||
CsvUtil.getReader().readFromStr("# 这是一行注释,读取时应忽略\n" +
|
||||
"\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n",(csvRow)-> {
|
||||
// 只有一行,所以直接判断
|
||||
@ -100,7 +119,8 @@ public class CsvUtilTest {
|
||||
Assertions.assertEquals("关注\"对象\"", csvRow.get(3));
|
||||
Assertions.assertEquals("年龄", csvRow.get(4));
|
||||
Assertions.assertEquals("", csvRow.get(5));
|
||||
Assertions.assertEquals("\"", csvRow.get(6));
|
||||
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
|
||||
Assertions.assertEquals("\"\n", csvRow.get(6));
|
||||
});
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user