diff --git a/CHANGELOG.md b/CHANGELOG.md index 38c61e5ea..0ca63c2ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ------------------------------------------------------------------------------------------------------------- -# 5.5.7 (2021-01-02) +# 5.5.7 (2021-01-03) ### 新特性 * 【core 】 DynaBean.create增加重载方法(pr#245@Gitee) @@ -13,6 +13,7 @@ * 【extra 】 MailUtil增加getSession方法 ### Bug修复 +* 【core 】 修复CsvReader读取双引号未转义问题(issur#I2BMP1@Gitee) ------------------------------------------------------------------------------------------------------------- diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvConfig.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvConfig.java index 65aa48c07..d143a27ba 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvConfig.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvConfig.java @@ -1,26 +1,34 @@ package cn.hutool.core.text.csv; -import java.io.Serializable; - import cn.hutool.core.util.CharUtil; +import java.io.Serializable; + /** - * CSV基础配置项 - * + * CSV基础配置项,此配置项可用于读取和写出CSV,定义了包括字段分隔符、文本包装符等符号 + * * @author looly * @since 4.0.5 */ -public class CsvConfig implements Serializable{ +public class CsvConfig implements Serializable { private static final long serialVersionUID = -8069578249066158459L; - - /** 字段分隔符,默认逗号',' */ + + /** + * 字段分隔符,默认逗号',' + */ protected char fieldSeparator = CharUtil.COMMA; - /** 文本分隔符,文本包装符,默认双引号'"' */ + /** + * 文本包装符,默认双引号'"' + */ protected char textDelimiter = CharUtil.DOUBLE_QUOTES; + /** + * 注释符号,用于区分注释行,默认'#' + */ + protected char commentCharacter = '#'; /** * 设置字段分隔符,默认逗号',' - * + * * @param fieldSeparator 字段分隔符,默认逗号',' */ public void setFieldSeparator(final char fieldSeparator) { @@ -29,10 +37,20 @@ public class CsvConfig implements Serializable{ /** * 设置 文本分隔符,文本包装符,默认双引号'"' - * + * * @param textDelimiter 文本分隔符,文本包装符,默认双引号'"' */ public void setTextDelimiter(char textDelimiter) { this.textDelimiter = textDelimiter; } + + /** + * 设置 注释符号,用于区分注释行 + * + * @param commentCharacter 注释符号,用于区分注释行 + * @since 5.5.7 + */ + public void setCommentCharacter(char commentCharacter) { + this.commentCharacter = commentCharacter; + } } diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java index 0ccd01a16..0a362ed93 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java @@ -32,33 +32,55 @@ public final class CsvParser implements Closeable, Serializable { private final CsvReadConfig config; private final char[] buf = new char[IoUtil.DEFAULT_LARGE_BUFFER_SIZE]; - /** 当前位置 */ + /** + * 当前位置 + */ private int bufPos; - /** 读取一段后数据长度 */ + /** + * 读取一段后数据长度 + */ private int bufLen; - /** 拷贝开始的位置,一般为上一行的结束位置 */ + /** + * 拷贝开始的位置,一般为上一行的结束位置 + */ private int copyStart; - /** 前一个特殊分界字符 */ + /** + * 前一个特殊分界字符 + */ private int preChar = -1; - /** 是否在引号包装内 */ + /** + * 是否在引号包装内 + */ private boolean inQuotes; - /** 当前读取字段 */ + /** + * 当前读取字段 + */ private final StrBuilder currentField = new StrBuilder(512); - - /** 标题行 */ + + /** + * 标题行 + */ private CsvRow header; - /** 当前行号 */ + /** + * 当前行号 + */ private long lineNo; - /** 第一行字段数,用于检查每行字段数是否一致 */ + /** + * 第一行字段数,用于检查每行字段数是否一致 + */ private int firstLineFieldCount = -1; - /** 最大字段数量 */ + /** + * 最大字段数量 + */ private int maxFieldCount; - /** 是否读取结束 */ + /** + * 是否读取结束 + */ private boolean finished; /** * CSV解析器 - * + * * @param reader Reader * @param config 配置,null则为默认配置 */ @@ -84,7 +106,7 @@ public final class CsvParser implements Closeable, Serializable { } /** - *读取下一行数据 + * 读取下一行数据 * * @return CsvRow * @throws IORuntimeException IO读取异常 @@ -97,7 +119,7 @@ public final class CsvParser implements Closeable, Serializable { startingLineNo = ++lineNo; currentFields = readLine(); fieldCount = currentFields.size(); - if(fieldCount < 1){ + if (fieldCount < 1) { break; } @@ -135,24 +157,24 @@ public final class CsvParser implements Closeable, Serializable { /** * 当前行做为标题行 - * + * * @param currentFields 当前行字段列表 */ private void initHeader(final List currentFields) { final Map localHeaderMap = new LinkedHashMap<>(currentFields.size()); for (int i = 0; i < currentFields.size(); i++) { final String field = currentFields.get(i); - if (StrUtil.isNotEmpty(field) && false ==localHeaderMap.containsKey(field)) { + if (StrUtil.isNotEmpty(field) && false == localHeaderMap.containsKey(field)) { localHeaderMap.put(field, i); } } - - header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields)); + + header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields)); } /** * 读取一行数据 - * + * * @return 一行数据 * @throws IORuntimeException IO异常 */ @@ -185,7 +207,7 @@ public final class CsvParser implements Closeable, Serializable { if (localPreChar == config.fieldSeparator || localCurrentField.hasContent()) { //剩余部分作为一个字段 - currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter)); + addField(currentFields, localCurrentField.toStringAndReset()); } break; } @@ -208,36 +230,40 @@ public final class CsvParser implements Closeable, Serializable { } copyLen++; } else { + // 非引号内 if (c == config.fieldSeparator) { //一个字段结束 if (copyLen > 0) { localCurrentField.append(localBuf, localCopyStart, copyLen); copyLen = 0; } - currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter)); + addField(currentFields, localCurrentField.toStringAndReset()); localCopyStart = localBufPos; } else if (c == config.textDelimiter) { // 引号开始 inQuotes = true; copyLen++; } else if (c == CharUtil.CR) { + // \r,直接结束 if (copyLen > 0) { localCurrentField.append(localBuf, localCopyStart, copyLen); } - currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter)); + addField(currentFields, localCurrentField.toStringAndReset()); localPreChar = c; localCopyStart = localBufPos; break; } else if (c == CharUtil.LF) { + // \n if (localPreChar != CharUtil.CR) { if (copyLen > 0) { localCurrentField.append(localBuf, localCopyStart, copyLen); } - currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter)); + addField(currentFields, localCurrentField.toStringAndReset()); localPreChar = c; localCopyStart = localBufPos; break; } + // 前一个字符是\r,已经处理过这个字段了,此处直接跳过 localCopyStart = localBufPos; } else { copyLen++; @@ -254,9 +280,22 @@ public final class CsvParser implements Closeable, Serializable { return currentFields; } - + @Override public void close() throws IOException { reader.close(); } + + /** + * 将字段加入字段列表并自动去包装和去转义 + * + * @param currentFields 当前的字段列表(即为行) + * @param field 字段 + */ + private void addField(List currentFields, String field) { + field = StrUtil.unWrap(field, config.textDelimiter); + char textDelimiter = this.config.textDelimiter; + field = StrUtil.replace(field, "" + textDelimiter + textDelimiter, textDelimiter + ""); + currentFields.add(StrUtil.unWrap(field, textDelimiter)); + } } diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvWriter.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvWriter.java index d9528bc26..a012827a6 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvWriter.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvWriter.java @@ -29,17 +29,24 @@ import java.util.Collection; public final class CsvWriter implements Closeable, Flushable, Serializable { private static final long serialVersionUID = 1L; - /** 写出器 */ + /** + * 写出器 + */ private final Writer writer; - /** 写出配置 */ + /** + * 写出配置 + */ private final CsvWriteConfig config; - /** 是否处于新行开始 */ + /** + * 是否处于新行开始 + */ private boolean newline = true; // --------------------------------------------------------------------------------------------------- Constructor start + /** * 构造,覆盖已有文件(如果存在),默认编码UTF-8 - * + * * @param filePath File CSV文件路径 */ public CsvWriter(String filePath) { @@ -48,7 +55,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 构造,覆盖已有文件(如果存在),默认编码UTF-8 - * + * * @param file File CSV文件 */ public CsvWriter(File file) { @@ -57,9 +64,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 构造,覆盖已有文件(如果存在) - * + * * @param filePath File CSV文件路径 - * @param charset 编码 + * @param charset 编码 */ public CsvWriter(String filePath, Charset charset) { this(FileUtil.file(filePath), charset); @@ -67,8 +74,8 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 构造,覆盖已有文件(如果存在) - * - * @param file File CSV文件 + * + * @param file File CSV文件 * @param charset 编码 */ public CsvWriter(File file, Charset charset) { @@ -77,9 +84,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 构造 - * + * * @param filePath File CSV文件路径 - * @param charset 编码 + * @param charset 编码 * @param isAppend 是否追加 */ public CsvWriter(String filePath, Charset charset, boolean isAppend) { @@ -88,9 +95,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 构造 - * - * @param file CSV文件 - * @param charset 编码 + * + * @param file CSV文件 + * @param charset 编码 * @param isAppend 是否追加 */ public CsvWriter(File file, Charset charset, boolean isAppend) { @@ -99,11 +106,11 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 构造 - * + * * @param filePath CSV文件路径 - * @param charset 编码 + * @param charset 编码 * @param isAppend 是否追加 - * @param config 写出配置,null则使用默认配置 + * @param config 写出配置,null则使用默认配置 */ public CsvWriter(String filePath, Charset charset, boolean isAppend, CsvWriteConfig config) { this(FileUtil.file(filePath), charset, isAppend, config); @@ -111,11 +118,11 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 构造 - * - * @param file CSV文件 - * @param charset 编码 + * + * @param file CSV文件 + * @param charset 编码 * @param isAppend 是否追加 - * @param config 写出配置,null则使用默认配置 + * @param config 写出配置,null则使用默认配置 */ public CsvWriter(File file, Charset charset, boolean isAppend, CsvWriteConfig config) { this(FileUtil.getWriter(file, charset, isAppend), config); @@ -123,7 +130,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 构造,使用默认配置 - * + * * @param writer {@link Writer} */ public CsvWriter(Writer writer) { @@ -132,7 +139,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 构造 - * + * * @param writer Writer * @param config 写出配置,null则使用默认配置 */ @@ -144,7 +151,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 设置是否始终使用文本分隔符,文本包装符,默认false,按需添加 - * + * * @param alwaysDelimitText 是否始终使用文本分隔符,文本包装符,默认false,按需添加 * @return this */ @@ -155,7 +162,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 设置换行符 - * + * * @param lineDelimiter 换行符 * @return this */ @@ -166,7 +173,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 将多行写出到Writer - * + * * @param lines 多行数据 * @return this * @throws IORuntimeException IO异常 @@ -183,7 +190,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { /** * 将多行写出到Writer - * + * * @param lines 多行数据,每行数据可以是集合或者数组 * @return this * @throws IORuntimeException IO异常 @@ -198,18 +205,55 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { return this; } + /** + * 写出一行 + * + * @param fields 字段列表 ({@code null} 值会被做为空值追加) + * @return this + * @throws IORuntimeException IO异常 + * @since 5.5.7 + */ + public CsvWriter writeLine(String... fields) throws IORuntimeException { + if (ArrayUtil.isEmpty(fields)) { + return writeLine(); + } + appendLine(fields); + return this; + } + /** * 追加新行(换行) * * @throws IORuntimeException IO异常 */ - public void writeLine() throws IORuntimeException { + public CsvWriter writeLine() throws IORuntimeException { try { writer.write(config.lineDelimiter); } catch (IOException e) { throw new IORuntimeException(e); } newline = true; + return this; + } + + /** + * 写出一行注释,注释符号可自定义 + * + * @param comment 注释内容 + * @return this + * @see CsvConfig#commentCharacter + * @since 5.5.7 + */ + public CsvWriter writeComment(String comment) { + try { + writer.write(this.config.commentCharacter); + writer.write(comment); + writer.write(config.lineDelimiter); + newline = true; + } catch (IOException e) { + throw new IORuntimeException(e); + } + return this; } @Override @@ -227,13 +271,14 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { } // --------------------------------------------------------------------------------------------------- Private method start + /** * 追加一行,末尾会自动换行,但是追加前不会换行 * * @param fields 字段列表 ({@code null} 值会被做为空值追加) * @throws IORuntimeException IO异常 */ - private void appendLine(final String... fields) throws IORuntimeException { + private void appendLine(String... fields) throws IORuntimeException { try { doAppendLine(fields); } catch (IOException e) { @@ -276,7 +321,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable { if (null == value) { if (alwaysDelimitText) { - writer.write(new char[] { textDelimiter, textDelimiter }); + writer.write(new char[]{textDelimiter, textDelimiter}); } return; } diff --git a/hutool-core/src/main/java/cn/hutool/core/util/CharUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/CharUtil.java index edabec224..610b8baad 100644 --- a/hutool-core/src/main/java/cn/hutool/core/util/CharUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/CharUtil.java @@ -47,7 +47,7 @@ public class CharUtil { public static final char AMP = '&'; /** 字符常量:冒号 {@code ':'} */ public static final char COLON = ':'; - /** 字符常量:艾特 '@' */ + /** 字符常量:艾特 {@code '@'} */ public static final char AT = '@'; /** diff --git a/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java b/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java index 43eea4a40..74b81a29c 100644 --- a/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java @@ -1,8 +1,9 @@ package cn.hutool.core.text.csv; import cn.hutool.core.io.FileUtil; -import cn.hutool.core.lang.Assert; +import cn.hutool.core.lang.Console; import cn.hutool.core.util.CharsetUtil; +import org.junit.Assert; import org.junit.Ignore; import org.junit.Test; @@ -16,26 +17,49 @@ public class CsvUtilTest { //从文件中读取CSV数据 CsvData data = reader.read(FileUtil.file("test.csv")); List rows = data.getRows(); - for (CsvRow csvRow : rows) { - Assert.notEmpty(csvRow.getRawList()); - } + final CsvRow row0 = rows.get(0); + Assert.assertEquals("sss,sss", row0.get(0)); + Assert.assertEquals("姓名", row0.get(1)); + Assert.assertEquals("性别", row0.get(2)); + Assert.assertEquals("关注\"对象\"", row0.get(3)); + Assert.assertEquals("年龄", row0.get(4)); + Assert.assertEquals("", row0.get(5)); + Assert.assertEquals("\"", row0.get(6)); } @Test public void readTest2() { CsvReader reader = CsvUtil.getReader(); - reader.read(FileUtil.getUtf8Reader("test.csv"), (csvRow)-> Assert.notEmpty(csvRow.getRawList())); + reader.read(FileUtil.getUtf8Reader("test.csv"), (csvRow)-> { + // 只有一行,所以直接判断 + Assert.assertEquals("sss,sss", csvRow.get(0)); + Assert.assertEquals("姓名", csvRow.get(1)); + Assert.assertEquals("性别", csvRow.get(2)); + Assert.assertEquals("关注\"对象\"", csvRow.get(3)); + Assert.assertEquals("年龄", csvRow.get(4)); + Assert.assertEquals("", csvRow.get(5)); + Assert.assertEquals("\"", csvRow.get(6)); + }); } @Test @Ignore public void writeTest() { - CsvWriter writer = CsvUtil.getWriter("e:/testWrite.csv", CharsetUtil.CHARSET_UTF_8); + CsvWriter writer = CsvUtil.getWriter("d:/test/testWrite.csv", CharsetUtil.CHARSET_UTF_8); writer.write( new String[] {"a1", "b1", "c1", "123345346456745756756785656"}, new String[] {"a2", "b2", "c2"}, new String[] {"a3", "b3", "c3"} ); } - + + @Test + @Ignore + public void readLfTest(){ + final CsvReader reader = CsvUtil.getReader(); + final CsvData read = reader.read(FileUtil.file("d:/test/rw_test.csv")); + for (CsvRow row : read) { + Console.log(row); + } + } } diff --git a/hutool-core/src/test/resources/test.csv b/hutool-core/src/test/resources/test.csv index a22b8335f..6c6ee2ff4 100644 --- a/hutool-core/src/test/resources/test.csv +++ b/hutool-core/src/test/resources/test.csv @@ -1 +1 @@ -"sss,sss",姓名,"性别",关注"对象",年龄 \ No newline at end of file +"sss,sss",姓名,"性别",关注"对象",年龄,"",""" \ No newline at end of file