diff --git a/hutool-core/src/main/java/cn/hutool/core/io/BufferUtil.java b/hutool-core/src/main/java/cn/hutool/core/io/BufferUtil.java
index fb15159da..007e0194e 100644
--- a/hutool-core/src/main/java/cn/hutool/core/io/BufferUtil.java
+++ b/hutool-core/src/main/java/cn/hutool/core/io/BufferUtil.java
@@ -1,28 +1,28 @@
package cn.hutool.core.io;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.StrUtil;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+
/**
* {@link ByteBuffer} 工具类
* 此工具来自于 t-io 项目以及其它项目的相关部分收集
* ByteBuffer的相关介绍见:https://www.cnblogs.com/ruber/p/6857159.html
- *
+ *
* @author tanyaowu, looly
* @since 4.0.0
- *
*/
public class BufferUtil {
/**
* 拷贝到一个新的ByteBuffer
- *
- * @param src 源ByteBuffer
+ *
+ * @param src 源ByteBuffer
* @param start 起始位置(包括)
- * @param end 结束位置(不包括)
+ * @param end 结束位置(不包括)
* @return 新的ByteBuffer
*/
public static ByteBuffer copy(ByteBuffer src, int start, int end) {
@@ -31,8 +31,8 @@ public class BufferUtil {
/**
* 拷贝ByteBuffer
- *
- * @param src 源ByteBuffer
+ *
+ * @param src 源ByteBuffer
* @param dest 目标ByteBuffer
* @return 目标ByteBuffer
*/
@@ -42,9 +42,9 @@ public class BufferUtil {
/**
* 拷贝ByteBuffer
- *
- * @param src 源ByteBuffer
- * @param dest 目标ByteBuffer
+ *
+ * @param src 源ByteBuffer
+ * @param dest 目标ByteBuffer
* @param length 长度
* @return 目标ByteBuffer
*/
@@ -54,12 +54,12 @@ public class BufferUtil {
/**
* 拷贝ByteBuffer
- *
- * @param src 源ByteBuffer
- * @param srcStart 源开始的位置
- * @param dest 目标ByteBuffer
+ *
+ * @param src 源ByteBuffer
+ * @param srcStart 源开始的位置
+ * @param dest 目标ByteBuffer
* @param destStart 目标开始的位置
- * @param length 长度
+ * @param length 长度
* @return 目标ByteBuffer
*/
public static ByteBuffer copy(ByteBuffer src, int srcStart, ByteBuffer dest, int destStart, int length) {
@@ -69,7 +69,7 @@ public class BufferUtil {
/**
* 读取剩余部分并转为UTF-8编码字符串
- *
+ *
* @param buffer ByteBuffer
* @return 字符串
* @since 4.5.0
@@ -80,8 +80,8 @@ public class BufferUtil {
/**
* 读取剩余部分并转为字符串
- *
- * @param buffer ByteBuffer
+ *
+ * @param buffer ByteBuffer
* @param charset 编码
* @return 字符串
* @since 4.5.0
@@ -92,7 +92,7 @@ public class BufferUtil {
/**
* 读取剩余部分bytes
- *
+ *
* @param buffer ByteBuffer
* @return bytes
*/
@@ -106,8 +106,8 @@ public class BufferUtil {
/**
* 读取指定长度的bytes
* 如果长度不足,则读取剩余部分,此时buffer必须为读模式
- *
- * @param buffer ByteBuffer
+ *
+ * @param buffer ByteBuffer
* @param maxLength 最大长度
* @return bytes
*/
@@ -123,10 +123,10 @@ public class BufferUtil {
/**
* 读取指定区间的数据
- *
+ *
* @param buffer {@link ByteBuffer}
- * @param start 开始位置
- * @param end 结束位置
+ * @param start 开始位置
+ * @param end 结束位置
* @return bytes
*/
public static byte[] readBytes(ByteBuffer buffer, int start, int end) {
@@ -148,13 +148,13 @@ public class BufferUtil {
/**
* 一行的末尾位置,查找位置时位移ByteBuffer到结束位置
* 支持的换行符如下:
- *
+ *
*
* 1. \r\n
* 2. \n
*
*
- * @param buffer {@link ByteBuffer}
+ * @param buffer {@link ByteBuffer}
* @param maxLength 读取最大长度
* @return 末尾位置,未找到或达到最大长度返回-1
*/
@@ -191,13 +191,13 @@ public class BufferUtil {
/**
* 读取一行,如果buffer中最后一部分并非完整一行,则返回null
* 支持的换行符如下:
- *
+ *
*
* 1. \r\n
* 2. \n
*
- *
- * @param buffer ByteBuffer
+ *
+ * @param buffer ByteBuffer
* @param charset 编码
* @return 一行
*/
@@ -217,7 +217,7 @@ public class BufferUtil {
/**
* 创建新Buffer
- *
+ *
* @param data 数据
* @return {@link ByteBuffer}
* @since 4.5.0
@@ -228,8 +228,8 @@ public class BufferUtil {
/**
* 从字符串创建新Buffer
- *
- * @param data 数据
+ *
+ * @param data 数据
* @param charset 编码
* @return {@link ByteBuffer}
* @since 4.5.0
@@ -237,10 +237,10 @@ public class BufferUtil {
public static ByteBuffer create(CharSequence data, Charset charset) {
return create(StrUtil.bytes(data, charset));
}
-
+
/**
* 从字符串创建新Buffer,使用UTF-8编码
- *
+ *
* @param data 数据
* @return {@link ByteBuffer}
* @since 4.5.0
@@ -248,4 +248,15 @@ public class BufferUtil {
public static ByteBuffer createUtf8(CharSequence data) {
return create(StrUtil.utf8Bytes(data));
}
+
+ /**
+ * 创建{@link CharBuffer}
+ *
+ * @param capacity 容量
+ * @return {@link CharBuffer}
+ * @since 5.5.7
+ */
+ public static CharBuffer createCharBuffer(int capacity) {
+ return CharBuffer.allocate(capacity);
+ }
}
diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java
index 0a362ed93..440556216 100644
--- a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java
+++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java
@@ -31,19 +31,7 @@ public final class CsvParser implements Closeable, Serializable {
private final Reader reader;
private final CsvReadConfig config;
- private final char[] buf = new char[IoUtil.DEFAULT_LARGE_BUFFER_SIZE];
- /**
- * 当前位置
- */
- private int bufPos;
- /**
- * 读取一段后数据长度
- */
- private int bufLen;
- /**
- * 拷贝开始的位置,一般为上一行的结束位置
- */
- private int copyStart;
+ private final Buffer buf = new Buffer(IoUtil.DEFAULT_LARGE_BUFFER_SIZE);
/**
* 前一个特殊分界字符
*/
@@ -70,7 +58,7 @@ public final class CsvParser implements Closeable, Serializable {
*/
private int firstLineFieldCount = -1;
/**
- * 最大字段数量
+ * 最大字段数量,用于初始化行,减少扩容
*/
private int maxFieldCount;
/**
@@ -181,42 +169,55 @@ public final class CsvParser implements Closeable, Serializable {
private List readLine() throws IORuntimeException {
final List currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY);
- final StrBuilder localCurrentField = currentField;
- final char[] localBuf = this.buf;
- int localBufPos = bufPos;//当前位置
- int localPreChar = preChar;//前一个特殊分界字符
- int localCopyStart = copyStart;//拷贝起始位置
+ final StrBuilder currentField = this.currentField;
+ final Buffer buf = this.buf;
+ int preChar = this.preChar;//前一个特殊分界字符
int copyLen = 0; //拷贝长度
+ boolean lineStart = true;
+ boolean inComment = false;
while (true) {
- if (bufLen == localBufPos) {
+ if (false == buf.hasRemaining()) {
// 此Buffer读取结束,开始读取下一段
-
if (copyLen > 0) {
- localCurrentField.append(localBuf, localCopyStart, copyLen);
+ buf.appendTo(currentField, copyLen);
+ // 此处无需mark,read方法会重置mark
}
- try {
- bufLen = reader.read(localBuf);
- } catch (IOException e) {
- throw new IORuntimeException(e);
- }
-
- if (bufLen < 0) {
+ if (buf.read(this.reader) < 0) {
// CSV读取结束
finished = true;
- if (localPreChar == config.fieldSeparator || localCurrentField.hasContent()) {
+ if (currentField.hasContent() || preChar == config.fieldSeparator) {
//剩余部分作为一个字段
- addField(currentFields, localCurrentField.toStringAndReset());
+ addField(currentFields, currentField.toStringAndReset());
}
break;
}
//重置
- localCopyStart = localBufPos = copyLen = 0;
+ copyLen = 0;
}
- final char c = localBuf[localBufPos++];
+ final char c = buf.get();
+
+ // 注释行标记
+ if(lineStart){
+ if(c == this.config.commentCharacter){
+ inComment = true;
+ }
+ lineStart = false;
+ }
+ // 注释行处理
+ if(inComment){
+ if ((c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR) {
+ // 注释行以换行符为结尾
+ inComment = false;
+ }
+ // 跳过注释行中的任何字符
+ buf.mark();
+ preChar = c;
+ continue;
+ }
if (inQuotes) {
//引号内,做为内容,直到引号结束
@@ -224,21 +225,23 @@ public final class CsvParser implements Closeable, Serializable {
// End of quoted text
inQuotes = false;
} else {
- if ((c == CharUtil.CR || c == CharUtil.LF) && localPreChar != CharUtil.CR) {
+ // 新行
+ if ((c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR) {
lineNo++;
}
}
+ // 普通字段字符
copyLen++;
} else {
// 非引号内
if (c == config.fieldSeparator) {
//一个字段结束
if (copyLen > 0) {
- localCurrentField.append(localBuf, localCopyStart, copyLen);
+ buf.appendTo(currentField, copyLen);
copyLen = 0;
}
- addField(currentFields, localCurrentField.toStringAndReset());
- localCopyStart = localBufPos;
+ buf.mark();
+ addField(currentFields, currentField.toStringAndReset());
} else if (c == config.textDelimiter) {
// 引号开始
inQuotes = true;
@@ -246,37 +249,36 @@ public final class CsvParser implements Closeable, Serializable {
} else if (c == CharUtil.CR) {
// \r,直接结束
if (copyLen > 0) {
- localCurrentField.append(localBuf, localCopyStart, copyLen);
+ buf.appendTo(currentField, copyLen);
}
- addField(currentFields, localCurrentField.toStringAndReset());
- localPreChar = c;
- localCopyStart = localBufPos;
+ buf.mark();
+ addField(currentFields, currentField.toStringAndReset());
+ preChar = c;
break;
} else if (c == CharUtil.LF) {
// \n
- if (localPreChar != CharUtil.CR) {
+ if (preChar != CharUtil.CR) {
if (copyLen > 0) {
- localCurrentField.append(localBuf, localCopyStart, copyLen);
+ buf.appendTo(currentField, copyLen);
}
- addField(currentFields, localCurrentField.toStringAndReset());
- localPreChar = c;
- localCopyStart = localBufPos;
+ buf.mark();
+ addField(currentFields, currentField.toStringAndReset());
+ preChar = c;
break;
}
// 前一个字符是\r,已经处理过这个字段了,此处直接跳过
- localCopyStart = localBufPos;
+ buf.mark();
} else {
+ // 普通字符
copyLen++;
}
}
- localPreChar = c;
+ preChar = c;
}
// restore fields
- bufPos = localBufPos;
- preChar = localPreChar;
- copyStart = localCopyStart;
+ this.preChar = preChar;
return currentFields;
}
@@ -298,4 +300,86 @@ public final class CsvParser implements Closeable, Serializable {
field = StrUtil.replace(field, "" + textDelimiter + textDelimiter, textDelimiter + "");
currentFields.add(StrUtil.unWrap(field, textDelimiter));
}
+
+ /**
+ * 内部Buffer
+ *
+ * @author looly
+ */
+ private static class Buffer {
+ final char[] buf;
+
+ /**
+ * 标记位置,用于读数据
+ */
+ private int mark;
+ /**
+ * 当前位置
+ */
+ private int position;
+ /**
+ * 读取的数据长度,一般小于buf.length,-1表示无数据
+ */
+ private int limit;
+
+ Buffer(int capacity) {
+ buf = new char[capacity];
+ }
+
+ /**
+ * 是否还有未读数据
+ *
+ * @return 是否还有未读数据
+ */
+ public final boolean hasRemaining() {
+ return position < limit;
+ }
+
+ /**
+ * 读取到缓存
+ *
+ * @param reader {@link Reader}
+ */
+ int read(Reader reader) {
+ int length;
+ try {
+ length = reader.read(this.buf);
+ } catch (IOException e) {
+ throw new IORuntimeException(e);
+ }
+ this.mark = 0;
+ this.position = 0;
+ this.limit = length;
+ return length;
+ }
+
+ /**
+ * 先获取当前字符,再将当前位置后移一位
+ * 此方法不检查是否到了数组末尾,请自行使用{@link #hasRemaining()}判断。
+ *
+ * @return 当前位置字符
+ * @see #hasRemaining()
+ */
+ char get() {
+ return this.buf[this.position++];
+ }
+
+ /**
+ * 标记位置记为下次读取位置
+ */
+ void mark() {
+ this.mark = this.position;
+ }
+
+ /**
+ * 将数据追加到{@link StrBuilder},追加结束后需手动调用{@link #mark()} 重置读取位置
+ *
+ * @param builder {@link StrBuilder}
+ * @param length 追加的长度
+ * @see #mark()
+ */
+ void appendTo(StrBuilder builder, int length) {
+ builder.append(this.buf, this.mark, length);
+ }
+ }
}
diff --git a/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java b/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java
index 74b81a29c..0ace0b405 100644
--- a/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java
+++ b/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java
@@ -41,6 +41,12 @@ public class CsvUtilTest {
Assert.assertEquals("\"", csvRow.get(6));
});
}
+
+ @Test
+ public void readTest3() {
+ CsvReader reader = CsvUtil.getReader();
+ reader.read(FileUtil.getUtf8Reader("test.csv"), Console::log);
+ }
@Test
@Ignore
diff --git a/hutool-core/src/test/resources/test.csv b/hutool-core/src/test/resources/test.csv
index 6c6ee2ff4..9c5057d09 100644
--- a/hutool-core/src/test/resources/test.csv
+++ b/hutool-core/src/test/resources/test.csv
@@ -1 +1,2 @@
+# 这是一行注释,读取时应忽略
"sss,sss",姓名,"性别",关注"对象",年龄,"","""
\ No newline at end of file