fix #I2BMP1

This commit is contained in:
Looly 2021-01-03 12:12:02 +08:00
parent bcaf05c589
commit 3c2f0e46b0
7 changed files with 202 additions and 75 deletions

View File

@ -3,7 +3,7 @@
------------------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------------------------------------
# 5.5.7 (2021-01-02) # 5.5.7 (2021-01-03)
### 新特性 ### 新特性
* 【core 】 DynaBean.create增加重载方法pr#245@Gitee * 【core 】 DynaBean.create增加重载方法pr#245@Gitee
@ -13,6 +13,7 @@
* 【extra 】 MailUtil增加getSession方法 * 【extra 】 MailUtil增加getSession方法
### Bug修复 ### Bug修复
* 【core 】 修复CsvReader读取双引号未转义问题issur#I2BMP1@Gitee
------------------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------------------------------------

View File

@ -1,26 +1,34 @@
package cn.hutool.core.text.csv; package cn.hutool.core.text.csv;
import java.io.Serializable;
import cn.hutool.core.util.CharUtil; import cn.hutool.core.util.CharUtil;
import java.io.Serializable;
/** /**
* CSV基础配置项 * CSV基础配置项此配置项可用于读取和写出CSV定义了包括字段分隔符文本包装符等符号
* *
* @author looly * @author looly
* @since 4.0.5 * @since 4.0.5
*/ */
public class CsvConfig implements Serializable{ public class CsvConfig implements Serializable {
private static final long serialVersionUID = -8069578249066158459L; private static final long serialVersionUID = -8069578249066158459L;
/** 字段分隔符,默认逗号',' */ /**
* 字段分隔符默认逗号','
*/
protected char fieldSeparator = CharUtil.COMMA; protected char fieldSeparator = CharUtil.COMMA;
/** 文本分隔符,文本包装符,默认双引号'"' */ /**
* 文本包装符默认双引号'"'
*/
protected char textDelimiter = CharUtil.DOUBLE_QUOTES; protected char textDelimiter = CharUtil.DOUBLE_QUOTES;
/**
* 注释符号用于区分注释行默认'#'
*/
protected char commentCharacter = '#';
/** /**
* 设置字段分隔符默认逗号',' * 设置字段分隔符默认逗号','
* *
* @param fieldSeparator 字段分隔符默认逗号',' * @param fieldSeparator 字段分隔符默认逗号','
*/ */
public void setFieldSeparator(final char fieldSeparator) { public void setFieldSeparator(final char fieldSeparator) {
@ -29,10 +37,20 @@ public class CsvConfig implements Serializable{
/** /**
* 设置 文本分隔符文本包装符默认双引号'"' * 设置 文本分隔符文本包装符默认双引号'"'
* *
* @param textDelimiter 文本分隔符文本包装符默认双引号'"' * @param textDelimiter 文本分隔符文本包装符默认双引号'"'
*/ */
public void setTextDelimiter(char textDelimiter) { public void setTextDelimiter(char textDelimiter) {
this.textDelimiter = textDelimiter; this.textDelimiter = textDelimiter;
} }
/**
* 设置 注释符号用于区分注释行
*
* @param commentCharacter 注释符号用于区分注释行
* @since 5.5.7
*/
public void setCommentCharacter(char commentCharacter) {
this.commentCharacter = commentCharacter;
}
} }

View File

@ -32,33 +32,55 @@ public final class CsvParser implements Closeable, Serializable {
private final CsvReadConfig config; private final CsvReadConfig config;
private final char[] buf = new char[IoUtil.DEFAULT_LARGE_BUFFER_SIZE]; private final char[] buf = new char[IoUtil.DEFAULT_LARGE_BUFFER_SIZE];
/** 当前位置 */ /**
* 当前位置
*/
private int bufPos; private int bufPos;
/** 读取一段后数据长度 */ /**
* 读取一段后数据长度
*/
private int bufLen; private int bufLen;
/** 拷贝开始的位置,一般为上一行的结束位置 */ /**
* 拷贝开始的位置一般为上一行的结束位置
*/
private int copyStart; private int copyStart;
/** 前一个特殊分界字符 */ /**
* 前一个特殊分界字符
*/
private int preChar = -1; private int preChar = -1;
/** 是否在引号包装内 */ /**
* 是否在引号包装内
*/
private boolean inQuotes; private boolean inQuotes;
/** 当前读取字段 */ /**
* 当前读取字段
*/
private final StrBuilder currentField = new StrBuilder(512); private final StrBuilder currentField = new StrBuilder(512);
/** 标题行 */ /**
* 标题行
*/
private CsvRow header; private CsvRow header;
/** 当前行号 */ /**
* 当前行号
*/
private long lineNo; private long lineNo;
/** 第一行字段数,用于检查每行字段数是否一致 */ /**
* 第一行字段数用于检查每行字段数是否一致
*/
private int firstLineFieldCount = -1; private int firstLineFieldCount = -1;
/** 最大字段数量 */ /**
* 最大字段数量
*/
private int maxFieldCount; private int maxFieldCount;
/** 是否读取结束 */ /**
* 是否读取结束
*/
private boolean finished; private boolean finished;
/** /**
* CSV解析器 * CSV解析器
* *
* @param reader Reader * @param reader Reader
* @param config 配置null则为默认配置 * @param config 配置null则为默认配置
*/ */
@ -84,7 +106,7 @@ public final class CsvParser implements Closeable, Serializable {
} }
/** /**
*读取下一行数据 * 读取下一行数据
* *
* @return CsvRow * @return CsvRow
* @throws IORuntimeException IO读取异常 * @throws IORuntimeException IO读取异常
@ -97,7 +119,7 @@ public final class CsvParser implements Closeable, Serializable {
startingLineNo = ++lineNo; startingLineNo = ++lineNo;
currentFields = readLine(); currentFields = readLine();
fieldCount = currentFields.size(); fieldCount = currentFields.size();
if(fieldCount < 1){ if (fieldCount < 1) {
break; break;
} }
@ -135,24 +157,24 @@ public final class CsvParser implements Closeable, Serializable {
/** /**
* 当前行做为标题行 * 当前行做为标题行
* *
* @param currentFields 当前行字段列表 * @param currentFields 当前行字段列表
*/ */
private void initHeader(final List<String> currentFields) { private void initHeader(final List<String> currentFields) {
final Map<String, Integer> localHeaderMap = new LinkedHashMap<>(currentFields.size()); final Map<String, Integer> localHeaderMap = new LinkedHashMap<>(currentFields.size());
for (int i = 0; i < currentFields.size(); i++) { for (int i = 0; i < currentFields.size(); i++) {
final String field = currentFields.get(i); final String field = currentFields.get(i);
if (StrUtil.isNotEmpty(field) && false ==localHeaderMap.containsKey(field)) { if (StrUtil.isNotEmpty(field) && false == localHeaderMap.containsKey(field)) {
localHeaderMap.put(field, i); localHeaderMap.put(field, i);
} }
} }
header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields)); header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields));
} }
/** /**
* 读取一行数据 * 读取一行数据
* *
* @return 一行数据 * @return 一行数据
* @throws IORuntimeException IO异常 * @throws IORuntimeException IO异常
*/ */
@ -185,7 +207,7 @@ public final class CsvParser implements Closeable, Serializable {
if (localPreChar == config.fieldSeparator || localCurrentField.hasContent()) { if (localPreChar == config.fieldSeparator || localCurrentField.hasContent()) {
//剩余部分作为一个字段 //剩余部分作为一个字段
currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter)); addField(currentFields, localCurrentField.toStringAndReset());
} }
break; break;
} }
@ -208,36 +230,40 @@ public final class CsvParser implements Closeable, Serializable {
} }
copyLen++; copyLen++;
} else { } else {
// 非引号内
if (c == config.fieldSeparator) { if (c == config.fieldSeparator) {
//一个字段结束 //一个字段结束
if (copyLen > 0) { if (copyLen > 0) {
localCurrentField.append(localBuf, localCopyStart, copyLen); localCurrentField.append(localBuf, localCopyStart, copyLen);
copyLen = 0; copyLen = 0;
} }
currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter)); addField(currentFields, localCurrentField.toStringAndReset());
localCopyStart = localBufPos; localCopyStart = localBufPos;
} else if (c == config.textDelimiter) { } else if (c == config.textDelimiter) {
// 引号开始 // 引号开始
inQuotes = true; inQuotes = true;
copyLen++; copyLen++;
} else if (c == CharUtil.CR) { } else if (c == CharUtil.CR) {
// \r直接结束
if (copyLen > 0) { if (copyLen > 0) {
localCurrentField.append(localBuf, localCopyStart, copyLen); localCurrentField.append(localBuf, localCopyStart, copyLen);
} }
currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter)); addField(currentFields, localCurrentField.toStringAndReset());
localPreChar = c; localPreChar = c;
localCopyStart = localBufPos; localCopyStart = localBufPos;
break; break;
} else if (c == CharUtil.LF) { } else if (c == CharUtil.LF) {
// \n
if (localPreChar != CharUtil.CR) { if (localPreChar != CharUtil.CR) {
if (copyLen > 0) { if (copyLen > 0) {
localCurrentField.append(localBuf, localCopyStart, copyLen); localCurrentField.append(localBuf, localCopyStart, copyLen);
} }
currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter)); addField(currentFields, localCurrentField.toStringAndReset());
localPreChar = c; localPreChar = c;
localCopyStart = localBufPos; localCopyStart = localBufPos;
break; break;
} }
// 前一个字符是\r已经处理过这个字段了此处直接跳过
localCopyStart = localBufPos; localCopyStart = localBufPos;
} else { } else {
copyLen++; copyLen++;
@ -254,9 +280,22 @@ public final class CsvParser implements Closeable, Serializable {
return currentFields; return currentFields;
} }
@Override @Override
public void close() throws IOException { public void close() throws IOException {
reader.close(); reader.close();
} }
/**
* 将字段加入字段列表并自动去包装和去转义
*
* @param currentFields 当前的字段列表即为行
* @param field 字段
*/
private void addField(List<String> currentFields, String field) {
field = StrUtil.unWrap(field, config.textDelimiter);
char textDelimiter = this.config.textDelimiter;
field = StrUtil.replace(field, "" + textDelimiter + textDelimiter, textDelimiter + "");
currentFields.add(StrUtil.unWrap(field, textDelimiter));
}
} }

View File

@ -29,17 +29,24 @@ import java.util.Collection;
public final class CsvWriter implements Closeable, Flushable, Serializable { public final class CsvWriter implements Closeable, Flushable, Serializable {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
/** 写出器 */ /**
* 写出器
*/
private final Writer writer; private final Writer writer;
/** 写出配置 */ /**
* 写出配置
*/
private final CsvWriteConfig config; private final CsvWriteConfig config;
/** 是否处于新行开始 */ /**
* 是否处于新行开始
*/
private boolean newline = true; private boolean newline = true;
// --------------------------------------------------------------------------------------------------- Constructor start // --------------------------------------------------------------------------------------------------- Constructor start
/** /**
* 构造覆盖已有文件如果存在默认编码UTF-8 * 构造覆盖已有文件如果存在默认编码UTF-8
* *
* @param filePath File CSV文件路径 * @param filePath File CSV文件路径
*/ */
public CsvWriter(String filePath) { public CsvWriter(String filePath) {
@ -48,7 +55,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 构造覆盖已有文件如果存在默认编码UTF-8 * 构造覆盖已有文件如果存在默认编码UTF-8
* *
* @param file File CSV文件 * @param file File CSV文件
*/ */
public CsvWriter(File file) { public CsvWriter(File file) {
@ -57,9 +64,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 构造覆盖已有文件如果存在 * 构造覆盖已有文件如果存在
* *
* @param filePath File CSV文件路径 * @param filePath File CSV文件路径
* @param charset 编码 * @param charset 编码
*/ */
public CsvWriter(String filePath, Charset charset) { public CsvWriter(String filePath, Charset charset) {
this(FileUtil.file(filePath), charset); this(FileUtil.file(filePath), charset);
@ -67,8 +74,8 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 构造覆盖已有文件如果存在 * 构造覆盖已有文件如果存在
* *
* @param file File CSV文件 * @param file File CSV文件
* @param charset 编码 * @param charset 编码
*/ */
public CsvWriter(File file, Charset charset) { public CsvWriter(File file, Charset charset) {
@ -77,9 +84,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 构造 * 构造
* *
* @param filePath File CSV文件路径 * @param filePath File CSV文件路径
* @param charset 编码 * @param charset 编码
* @param isAppend 是否追加 * @param isAppend 是否追加
*/ */
public CsvWriter(String filePath, Charset charset, boolean isAppend) { public CsvWriter(String filePath, Charset charset, boolean isAppend) {
@ -88,9 +95,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 构造 * 构造
* *
* @param file CSV文件 * @param file CSV文件
* @param charset 编码 * @param charset 编码
* @param isAppend 是否追加 * @param isAppend 是否追加
*/ */
public CsvWriter(File file, Charset charset, boolean isAppend) { public CsvWriter(File file, Charset charset, boolean isAppend) {
@ -99,11 +106,11 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 构造 * 构造
* *
* @param filePath CSV文件路径 * @param filePath CSV文件路径
* @param charset 编码 * @param charset 编码
* @param isAppend 是否追加 * @param isAppend 是否追加
* @param config 写出配置null则使用默认配置 * @param config 写出配置null则使用默认配置
*/ */
public CsvWriter(String filePath, Charset charset, boolean isAppend, CsvWriteConfig config) { public CsvWriter(String filePath, Charset charset, boolean isAppend, CsvWriteConfig config) {
this(FileUtil.file(filePath), charset, isAppend, config); this(FileUtil.file(filePath), charset, isAppend, config);
@ -111,11 +118,11 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 构造 * 构造
* *
* @param file CSV文件 * @param file CSV文件
* @param charset 编码 * @param charset 编码
* @param isAppend 是否追加 * @param isAppend 是否追加
* @param config 写出配置null则使用默认配置 * @param config 写出配置null则使用默认配置
*/ */
public CsvWriter(File file, Charset charset, boolean isAppend, CsvWriteConfig config) { public CsvWriter(File file, Charset charset, boolean isAppend, CsvWriteConfig config) {
this(FileUtil.getWriter(file, charset, isAppend), config); this(FileUtil.getWriter(file, charset, isAppend), config);
@ -123,7 +130,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 构造使用默认配置 * 构造使用默认配置
* *
* @param writer {@link Writer} * @param writer {@link Writer}
*/ */
public CsvWriter(Writer writer) { public CsvWriter(Writer writer) {
@ -132,7 +139,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 构造 * 构造
* *
* @param writer Writer * @param writer Writer
* @param config 写出配置null则使用默认配置 * @param config 写出配置null则使用默认配置
*/ */
@ -144,7 +151,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 设置是否始终使用文本分隔符文本包装符默认false按需添加 * 设置是否始终使用文本分隔符文本包装符默认false按需添加
* *
* @param alwaysDelimitText 是否始终使用文本分隔符文本包装符默认false按需添加 * @param alwaysDelimitText 是否始终使用文本分隔符文本包装符默认false按需添加
* @return this * @return this
*/ */
@ -155,7 +162,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 设置换行符 * 设置换行符
* *
* @param lineDelimiter 换行符 * @param lineDelimiter 换行符
* @return this * @return this
*/ */
@ -166,7 +173,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 将多行写出到Writer * 将多行写出到Writer
* *
* @param lines 多行数据 * @param lines 多行数据
* @return this * @return this
* @throws IORuntimeException IO异常 * @throws IORuntimeException IO异常
@ -183,7 +190,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/** /**
* 将多行写出到Writer * 将多行写出到Writer
* *
* @param lines 多行数据每行数据可以是集合或者数组 * @param lines 多行数据每行数据可以是集合或者数组
* @return this * @return this
* @throws IORuntimeException IO异常 * @throws IORuntimeException IO异常
@ -198,18 +205,55 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
return this; return this;
} }
/**
* 写出一行
*
* @param fields 字段列表 ({@code null} 值会被做为空值追加)
* @return this
* @throws IORuntimeException IO异常
* @since 5.5.7
*/
public CsvWriter writeLine(String... fields) throws IORuntimeException {
if (ArrayUtil.isEmpty(fields)) {
return writeLine();
}
appendLine(fields);
return this;
}
/** /**
* 追加新行换行 * 追加新行换行
* *
* @throws IORuntimeException IO异常 * @throws IORuntimeException IO异常
*/ */
public void writeLine() throws IORuntimeException { public CsvWriter writeLine() throws IORuntimeException {
try { try {
writer.write(config.lineDelimiter); writer.write(config.lineDelimiter);
} catch (IOException e) { } catch (IOException e) {
throw new IORuntimeException(e); throw new IORuntimeException(e);
} }
newline = true; newline = true;
return this;
}
/**
* 写出一行注释注释符号可自定义
*
* @param comment 注释内容
* @return this
* @see CsvConfig#commentCharacter
* @since 5.5.7
*/
public CsvWriter writeComment(String comment) {
try {
writer.write(this.config.commentCharacter);
writer.write(comment);
writer.write(config.lineDelimiter);
newline = true;
} catch (IOException e) {
throw new IORuntimeException(e);
}
return this;
} }
@Override @Override
@ -227,13 +271,14 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
} }
// --------------------------------------------------------------------------------------------------- Private method start // --------------------------------------------------------------------------------------------------- Private method start
/** /**
* 追加一行末尾会自动换行但是追加前不会换行 * 追加一行末尾会自动换行但是追加前不会换行
* *
* @param fields 字段列表 ({@code null} 值会被做为空值追加) * @param fields 字段列表 ({@code null} 值会被做为空值追加)
* @throws IORuntimeException IO异常 * @throws IORuntimeException IO异常
*/ */
private void appendLine(final String... fields) throws IORuntimeException { private void appendLine(String... fields) throws IORuntimeException {
try { try {
doAppendLine(fields); doAppendLine(fields);
} catch (IOException e) { } catch (IOException e) {
@ -276,7 +321,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
if (null == value) { if (null == value) {
if (alwaysDelimitText) { if (alwaysDelimitText) {
writer.write(new char[] { textDelimiter, textDelimiter }); writer.write(new char[]{textDelimiter, textDelimiter});
} }
return; return;
} }

View File

@ -47,7 +47,7 @@ public class CharUtil {
public static final char AMP = '&'; public static final char AMP = '&';
/** 字符常量:冒号 {@code ':'} */ /** 字符常量:冒号 {@code ':'} */
public static final char COLON = ':'; public static final char COLON = ':';
/** 字符常量:艾特 <code>'@'</code> */ /** 字符常量:艾特 {@code '@'} */
public static final char AT = '@'; public static final char AT = '@';
/** /**

View File

@ -1,8 +1,9 @@
package cn.hutool.core.text.csv; package cn.hutool.core.text.csv;
import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Assert; import cn.hutool.core.lang.Console;
import cn.hutool.core.util.CharsetUtil; import cn.hutool.core.util.CharsetUtil;
import org.junit.Assert;
import org.junit.Ignore; import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
@ -16,26 +17,49 @@ public class CsvUtilTest {
//从文件中读取CSV数据 //从文件中读取CSV数据
CsvData data = reader.read(FileUtil.file("test.csv")); CsvData data = reader.read(FileUtil.file("test.csv"));
List<CsvRow> rows = data.getRows(); List<CsvRow> rows = data.getRows();
for (CsvRow csvRow : rows) { final CsvRow row0 = rows.get(0);
Assert.notEmpty(csvRow.getRawList()); Assert.assertEquals("sss,sss", row0.get(0));
} Assert.assertEquals("姓名", row0.get(1));
Assert.assertEquals("性别", row0.get(2));
Assert.assertEquals("关注\"对象\"", row0.get(3));
Assert.assertEquals("年龄", row0.get(4));
Assert.assertEquals("", row0.get(5));
Assert.assertEquals("\"", row0.get(6));
} }
@Test @Test
public void readTest2() { public void readTest2() {
CsvReader reader = CsvUtil.getReader(); CsvReader reader = CsvUtil.getReader();
reader.read(FileUtil.getUtf8Reader("test.csv"), (csvRow)-> Assert.notEmpty(csvRow.getRawList())); reader.read(FileUtil.getUtf8Reader("test.csv"), (csvRow)-> {
// 只有一行所以直接判断
Assert.assertEquals("sss,sss", csvRow.get(0));
Assert.assertEquals("姓名", csvRow.get(1));
Assert.assertEquals("性别", csvRow.get(2));
Assert.assertEquals("关注\"对象\"", csvRow.get(3));
Assert.assertEquals("年龄", csvRow.get(4));
Assert.assertEquals("", csvRow.get(5));
Assert.assertEquals("\"", csvRow.get(6));
});
} }
@Test @Test
@Ignore @Ignore
public void writeTest() { public void writeTest() {
CsvWriter writer = CsvUtil.getWriter("e:/testWrite.csv", CharsetUtil.CHARSET_UTF_8); CsvWriter writer = CsvUtil.getWriter("d:/test/testWrite.csv", CharsetUtil.CHARSET_UTF_8);
writer.write( writer.write(
new String[] {"a1", "b1", "c1", "123345346456745756756785656"}, new String[] {"a1", "b1", "c1", "123345346456745756756785656"},
new String[] {"a2", "b2", "c2"}, new String[] {"a2", "b2", "c2"},
new String[] {"a3", "b3", "c3"} new String[] {"a3", "b3", "c3"}
); );
} }
@Test
@Ignore
public void readLfTest(){
final CsvReader reader = CsvUtil.getReader();
final CsvData read = reader.read(FileUtil.file("d:/test/rw_test.csv"));
for (CsvRow row : read) {
Console.log(row);
}
}
} }

View File

@ -1 +1 @@
"sss,sss",姓名,"性别",关注"对象",年龄 "sss,sss",姓名,"性别",关注"对象",年龄,"","""
Can't render this file because it contains an unexpected character in line 1 and column 33.