mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-04-19 03:01:48 +08:00
【可能的向下兼容问题】修复双引号转义符转义错误问题,修改规则后,对非闭合双引号字段的策略变更,如"aa,则被识别为aa
This commit is contained in:
parent
ecf4193280
commit
fc60818326
@ -2,7 +2,7 @@
|
|||||||
# 🚀Changelog
|
# 🚀Changelog
|
||||||
|
|
||||||
-------------------------------------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------------------------------------
|
||||||
# 5.8.34(2024-11-21)
|
# 5.8.34(2024-11-24)
|
||||||
|
|
||||||
### 🐣新特性
|
### 🐣新特性
|
||||||
* 【http 】 增加Windows微信浏览器识别(issue#IB3SJF@Gitee)
|
* 【http 】 增加Windows微信浏览器识别(issue#IB3SJF@Gitee)
|
||||||
@ -15,6 +15,7 @@
|
|||||||
* 【core 】 修复DateUtil.rangeToList中step小于等于0时无限循环问题(issue#3783@Github)
|
* 【core 】 修复DateUtil.rangeToList中step小于等于0时无限循环问题(issue#3783@Github)
|
||||||
* 【cron 】 修复cron模块依赖log模块问题
|
* 【cron 】 修复cron模块依赖log模块问题
|
||||||
* 【extra 】 修复MailUtil发送html格式邮件无法正常展示图片问题(pr#1279@Gitee)
|
* 【extra 】 修复MailUtil发送html格式邮件无法正常展示图片问题(pr#1279@Gitee)
|
||||||
|
* 【core 】 【可能的向下兼容问题】修复双引号转义符转义错误问题,修改规则后,对非闭合双引号字段的策略变更,如"aa,则被识别为aa(issue#IB5UQ8@Gitee)
|
||||||
|
|
||||||
-------------------------------------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------------------------------------
|
||||||
# 5.8.33(2024-11-05)
|
# 5.8.33(2024-11-05)
|
||||||
|
242
hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java
Executable file → Normal file
242
hutool-core/src/main/java/cn/hutool/core/text/csv/CsvParser.java
Executable file → Normal file
@ -1,24 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2013-2024 Hutool Team and hutool.cn
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
package cn.hutool.core.text.csv;
|
package cn.hutool.core.text.csv;
|
||||||
|
|
||||||
import cn.hutool.core.collection.ComputeIter;
|
import cn.hutool.core.collection.ComputeIter;
|
||||||
import cn.hutool.core.io.IORuntimeException;
|
import cn.hutool.core.io.IORuntimeException;
|
||||||
import cn.hutool.core.io.IoUtil;
|
|
||||||
import cn.hutool.core.map.MapUtil;
|
import cn.hutool.core.map.MapUtil;
|
||||||
import cn.hutool.core.text.StrBuilder;
|
|
||||||
import cn.hutool.core.util.CharUtil;
|
import cn.hutool.core.util.CharUtil;
|
||||||
import cn.hutool.core.util.ObjectUtil;
|
import cn.hutool.core.util.ObjUtil;
|
||||||
import cn.hutool.core.util.StrUtil;
|
import cn.hutool.core.util.StrUtil;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CSV行解析器,参考:FastCSV
|
* CSV行解析器,参考:FastCSV
|
||||||
@ -30,10 +39,8 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
|
|
||||||
private static final int DEFAULT_ROW_CAPACITY = 10;
|
private static final int DEFAULT_ROW_CAPACITY = 10;
|
||||||
|
|
||||||
private final Reader reader;
|
|
||||||
private final CsvReadConfig config;
|
private final CsvReadConfig config;
|
||||||
|
private final CsvTokener tokener;
|
||||||
private final Buffer buf = new Buffer(IoUtil.DEFAULT_LARGE_BUFFER_SIZE);
|
|
||||||
/**
|
/**
|
||||||
* 前一个特殊分界字符
|
* 前一个特殊分界字符
|
||||||
*/
|
*/
|
||||||
@ -45,7 +52,7 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
/**
|
/**
|
||||||
* 当前读取字段
|
* 当前读取字段
|
||||||
*/
|
*/
|
||||||
private final StrBuilder currentField = new StrBuilder(512);
|
private final StringBuilder currentField = new StringBuilder(512);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 标题行
|
* 标题行
|
||||||
@ -78,9 +85,9 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
* @param reader Reader
|
* @param reader Reader
|
||||||
* @param config 配置,null则为默认配置
|
* @param config 配置,null则为默认配置
|
||||||
*/
|
*/
|
||||||
public CsvParser(final Reader reader, CsvReadConfig config) {
|
public CsvParser(final Reader reader, final CsvReadConfig config) {
|
||||||
this.reader = Objects.requireNonNull(reader, "reader must not be null");
|
this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::defaultConfig);
|
||||||
this.config = ObjectUtil.defaultIfNull(config, CsvReadConfig::defaultConfig);
|
this.tokener = new CsvTokener(reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -90,13 +97,13 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
* @throws IllegalStateException 如果不解析头部或者没有调用nextRow()方法
|
* @throws IllegalStateException 如果不解析头部或者没有调用nextRow()方法
|
||||||
*/
|
*/
|
||||||
public List<String> getHeader() {
|
public List<String> getHeader() {
|
||||||
if (config.headerLineNo < 0) {
|
if (config.headerLineNo < 0) {
|
||||||
throw new IllegalStateException("No header available - header parsing is disabled");
|
throw new IllegalStateException("No header available - header parsing is disabled");
|
||||||
}
|
}
|
||||||
if (lineNo < config.beginLineNo) {
|
if (lineNo < config.beginLineNo) {
|
||||||
throw new IllegalStateException("No header available - call nextRow() first");
|
throw new IllegalStateException("No header available - call nextRow() first");
|
||||||
}
|
}
|
||||||
return header.fields;
|
return header.getRawList();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -107,13 +114,13 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
/**
|
/**
|
||||||
* 读取下一行数据
|
* 读取下一行数据
|
||||||
*
|
*
|
||||||
* @return CsvRow
|
* @return CsvRow,{@code null}表示
|
||||||
* @throws IORuntimeException IO读取异常
|
* @throws IORuntimeException IO读取异常
|
||||||
*/
|
*/
|
||||||
public CsvRow nextRow() throws IORuntimeException {
|
public CsvRow nextRow() throws IORuntimeException {
|
||||||
List<String> currentFields;
|
List<String> currentFields;
|
||||||
int fieldCount;
|
int fieldCount;
|
||||||
while (false == finished) {
|
while (!finished) {
|
||||||
currentFields = readLine();
|
currentFields = readLine();
|
||||||
fieldCount = currentFields.size();
|
fieldCount = currentFields.size();
|
||||||
if (fieldCount < 1) {
|
if (fieldCount < 1) {
|
||||||
@ -122,11 +129,11 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 读取范围校验
|
// 读取范围校验
|
||||||
if(lineNo < config.beginLineNo){
|
if (lineNo < config.beginLineNo) {
|
||||||
// 未达到读取起始行,继续
|
// 未达到读取起始行,继续
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if(lineNo > config.endLineNo){
|
if (lineNo > config.endLineNo) {
|
||||||
// 超出结束行,读取结束
|
// 超出结束行,读取结束
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -175,9 +182,9 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
String field = currentFields.get(i);
|
String field = currentFields.get(i);
|
||||||
if (MapUtil.isNotEmpty(this.config.headerAlias)) {
|
if (MapUtil.isNotEmpty(this.config.headerAlias)) {
|
||||||
// 自定义别名
|
// 自定义别名
|
||||||
field = ObjectUtil.defaultIfNull(this.config.headerAlias.get(field), field);
|
field = ObjUtil.defaultIfNull(this.config.headerAlias.get(field), field);
|
||||||
}
|
}
|
||||||
if (StrUtil.isNotEmpty(field) && false == localHeaderMap.containsKey(field)) {
|
if (StrUtil.isNotEmpty(field) && !localHeaderMap.containsKey(field)) {
|
||||||
localHeaderMap.put(field, i);
|
localHeaderMap.put(field, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -190,7 +197,7 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
* 空行是size为1的List,唯一元素是""
|
* 空行是size为1的List,唯一元素是""
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* 行号要考虑注释行和引号包装的内容中的换行
|
* 行号要考虑注释行和引号包装的内容中的换行
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @return 一行数据
|
* @return 一行数据
|
||||||
@ -199,70 +206,67 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
private List<String> readLine() throws IORuntimeException {
|
private List<String> readLine() throws IORuntimeException {
|
||||||
// 矫正行号
|
// 矫正行号
|
||||||
// 当一行内容包含多行数据时,记录首行行号,但是读取下一行时,需要把多行内容的行数加上
|
// 当一行内容包含多行数据时,记录首行行号,但是读取下一行时,需要把多行内容的行数加上
|
||||||
if(inQuotesLineCount > 0){
|
if (inQuotesLineCount > 0) {
|
||||||
this.lineNo += this.inQuotesLineCount;
|
this.lineNo += this.inQuotesLineCount;
|
||||||
this.inQuotesLineCount = 0;
|
this.inQuotesLineCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<String> currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY);
|
final List<String> currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY);
|
||||||
|
|
||||||
final StrBuilder currentField = this.currentField;
|
final StringBuilder currentField = this.currentField;
|
||||||
final Buffer buf = this.buf;
|
|
||||||
int preChar = this.preChar;//前一个特殊分界字符
|
int preChar = this.preChar;//前一个特殊分界字符
|
||||||
int copyLen = 0; //拷贝长度
|
|
||||||
boolean inComment = false;
|
boolean inComment = false;
|
||||||
|
|
||||||
|
int c;
|
||||||
while (true) {
|
while (true) {
|
||||||
if (false == buf.hasRemaining()) {
|
c = tokener.next();
|
||||||
// 此Buffer读取结束,开始读取下一段
|
if(c < 0){
|
||||||
if (copyLen > 0) {
|
if (currentField.length() > 0 || preChar == config.fieldSeparator) {
|
||||||
buf.appendTo(currentField, copyLen);
|
if(this.inQuotes){
|
||||||
// 此处无需mark,read方法会重置mark
|
// 未闭合的文本包装,在末尾补充包装符
|
||||||
}
|
currentField.append(config.textDelimiter);
|
||||||
if (buf.read(this.reader) < 0) {
|
|
||||||
// CSV读取结束
|
|
||||||
finished = true;
|
|
||||||
|
|
||||||
if (currentField.hasContent() || preChar == config.fieldSeparator) {
|
|
||||||
//剩余部分作为一个字段
|
|
||||||
addField(currentFields, currentField.toStringAndReset());
|
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
//重置
|
//剩余部分作为一个字段
|
||||||
copyLen = 0;
|
addField(currentFields, currentField.toString());
|
||||||
|
currentField.setLength(0);
|
||||||
|
}
|
||||||
|
// 读取结束
|
||||||
|
this.finished = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
final char c = buf.get();
|
|
||||||
|
|
||||||
// 注释行标记
|
// 注释行标记
|
||||||
if(preChar < 0 || preChar == CharUtil.CR || preChar == CharUtil.LF){
|
if (preChar < 0 || preChar == CharUtil.CR || preChar == CharUtil.LF) {
|
||||||
// 判断行首字符为指定注释字符的注释开始,直到遇到换行符
|
// 判断行首字符为指定注释字符的注释开始,直到遇到换行符
|
||||||
// 行首分两种,1是preChar < 0表示文本开始,2是换行符后紧跟就是下一行的开始
|
// 行首分两种,1是preChar < 0表示文本开始,2是换行符后紧跟就是下一行的开始
|
||||||
// issue#IA8WE0 如果注释符出现在包装符内,被认为是普通字符
|
// issue#IA8WE0 如果注释符出现在包装符内,被认为是普通字符
|
||||||
if((false == inQuotes) && null != this.config.commentCharacter && c == this.config.commentCharacter){
|
if (!inQuotes && null != this.config.commentCharacter && c == this.config.commentCharacter) {
|
||||||
inComment = true;
|
inComment = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 注释行处理
|
// 注释行处理
|
||||||
if(inComment){
|
if (inComment) {
|
||||||
if (c == CharUtil.CR || c == CharUtil.LF) {
|
if (c == CharUtil.CR || c == CharUtil.LF) {
|
||||||
// 注释行以换行符为结尾
|
// 注释行以换行符为结尾
|
||||||
lineNo++;
|
lineNo++;
|
||||||
inComment = false;
|
inComment = false;
|
||||||
}
|
}
|
||||||
// 跳过注释行中的任何字符
|
// 跳过注释行中的任何字符
|
||||||
buf.mark();
|
|
||||||
preChar = c;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inQuotes) {
|
if (inQuotes) {
|
||||||
//引号内,作为内容,直到引号结束
|
//引号内,作为内容,直到引号结束
|
||||||
if (c == config.textDelimiter) {
|
if (c == config.textDelimiter) {
|
||||||
// End of quoted text
|
// issue#IB5UQ8 文本包装符转义
|
||||||
inQuotes = false;
|
final int next = tokener.next();
|
||||||
|
if(next != config.textDelimiter){
|
||||||
|
// 包装结束
|
||||||
|
inQuotes = false;
|
||||||
|
tokener.back();
|
||||||
|
}
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc4180#section-2 跳过转义符,只保留被转义的包装符
|
||||||
} else {
|
} else {
|
||||||
// 字段内容中新行
|
// 字段内容中新行
|
||||||
if (isLineEnd(c, preChar)) {
|
if (isLineEnd(c, preChar)) {
|
||||||
@ -270,46 +274,34 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 普通字段字符
|
// 普通字段字符
|
||||||
copyLen++;
|
currentField.append((char)c);
|
||||||
} else {
|
} else {
|
||||||
// 非引号内
|
// 非引号内
|
||||||
if (c == config.fieldSeparator) {
|
if (c == config.fieldSeparator) {
|
||||||
//一个字段结束
|
//一个字段结束
|
||||||
if (copyLen > 0) {
|
addField(currentFields, currentField.toString());
|
||||||
buf.appendTo(currentField, copyLen);
|
currentField.setLength(0);
|
||||||
copyLen = 0;
|
|
||||||
}
|
|
||||||
buf.mark();
|
|
||||||
addField(currentFields, currentField.toStringAndReset());
|
|
||||||
} else if (c == config.textDelimiter && isFieldBegin(preChar)) {
|
} else if (c == config.textDelimiter && isFieldBegin(preChar)) {
|
||||||
// 引号开始且出现在字段开头
|
// 引号开始且出现在字段开头
|
||||||
inQuotes = true;
|
inQuotes = true;
|
||||||
copyLen++;
|
currentField.append((char)c);
|
||||||
} else if (c == CharUtil.CR) {
|
} else if (c == CharUtil.CR) {
|
||||||
// \r,直接结束
|
// \r
|
||||||
if (copyLen > 0) {
|
addField(currentFields, currentField.toString());
|
||||||
buf.appendTo(currentField, copyLen);
|
currentField.setLength(0);
|
||||||
}
|
|
||||||
buf.mark();
|
|
||||||
addField(currentFields, currentField.toStringAndReset());
|
|
||||||
preChar = c;
|
preChar = c;
|
||||||
break;
|
break;
|
||||||
} else if (c == CharUtil.LF) {
|
} else if (c == CharUtil.LF) {
|
||||||
// \n
|
// \n
|
||||||
if (preChar != CharUtil.CR) {
|
if (preChar != CharUtil.CR) {
|
||||||
if (copyLen > 0) {
|
addField(currentFields, currentField.toString());
|
||||||
buf.appendTo(currentField, copyLen);
|
currentField.setLength(0);
|
||||||
}
|
|
||||||
buf.mark();
|
|
||||||
addField(currentFields, currentField.toStringAndReset());
|
|
||||||
preChar = c;
|
preChar = c;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// 前一个字符是\r,已经处理过这个字段了,此处直接跳过
|
// 前一个字符是\r,已经处理过这个字段了,此处直接跳过
|
||||||
buf.mark();
|
|
||||||
} else {
|
} else {
|
||||||
// 普通字符
|
currentField.append((char)c);
|
||||||
copyLen++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -325,7 +317,7 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
reader.close();
|
tokener.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -334,7 +326,7 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
* @param currentFields 当前的字段列表(即为行)
|
* @param currentFields 当前的字段列表(即为行)
|
||||||
* @param field 字段
|
* @param field 字段
|
||||||
*/
|
*/
|
||||||
private void addField(List<String> currentFields, String field) {
|
private void addField(final List<String> currentFields, String field) {
|
||||||
final char textDelimiter = this.config.textDelimiter;
|
final char textDelimiter = this.config.textDelimiter;
|
||||||
|
|
||||||
// 忽略多余引号后的换行符
|
// 忽略多余引号后的换行符
|
||||||
@ -342,12 +334,9 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
|
|
||||||
if(StrUtil.isWrap(field, textDelimiter)){
|
if(StrUtil.isWrap(field, textDelimiter)){
|
||||||
field = StrUtil.sub(field, 1, field.length() - 1);
|
field = StrUtil.sub(field, 1, field.length() - 1);
|
||||||
// https://datatracker.ietf.org/doc/html/rfc4180#section-2
|
|
||||||
// 第七条规则,只有包装内的包装符需要转义
|
|
||||||
field = StrUtil.replace(field, String.valueOf(textDelimiter) + textDelimiter, String.valueOf(textDelimiter));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(this.config.trimField){
|
if (this.config.trimField) {
|
||||||
// issue#I49M0C@Gitee
|
// issue#I49M0C@Gitee
|
||||||
field = StrUtil.trim(field);
|
field = StrUtil.trim(field);
|
||||||
}
|
}
|
||||||
@ -362,7 +351,7 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
* @return 是否结束
|
* @return 是否结束
|
||||||
* @since 5.7.4
|
* @since 5.7.4
|
||||||
*/
|
*/
|
||||||
private boolean isLineEnd(char c, int preChar) {
|
private boolean isLineEnd(final int c, final int preChar) {
|
||||||
return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR;
|
return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,89 +372,4 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
|
|||||||
|| preChar == CharUtil.LF
|
|| preChar == CharUtil.LF
|
||||||
|| preChar == CharUtil.CR;
|
|| preChar == CharUtil.CR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* 内部Buffer
|
|
||||||
*
|
|
||||||
* @author looly
|
|
||||||
*/
|
|
||||||
private static class Buffer implements Serializable{
|
|
||||||
private static final long serialVersionUID = 1L;
|
|
||||||
|
|
||||||
final char[] buf;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 标记位置,用于读数据
|
|
||||||
*/
|
|
||||||
private int mark;
|
|
||||||
/**
|
|
||||||
* 当前位置
|
|
||||||
*/
|
|
||||||
private int position;
|
|
||||||
/**
|
|
||||||
* 读取的数据长度,一般小于buf.length,-1表示无数据
|
|
||||||
*/
|
|
||||||
private int limit;
|
|
||||||
|
|
||||||
Buffer(int capacity) {
|
|
||||||
buf = new char[capacity];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 是否还有未读数据
|
|
||||||
*
|
|
||||||
* @return 是否还有未读数据
|
|
||||||
*/
|
|
||||||
public final boolean hasRemaining() {
|
|
||||||
return position < limit;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 读取到缓存<br>
|
|
||||||
* 全量读取,会重置Buffer中所有数据
|
|
||||||
*
|
|
||||||
* @param reader {@link Reader}
|
|
||||||
*/
|
|
||||||
int read(Reader reader) {
|
|
||||||
int length;
|
|
||||||
try {
|
|
||||||
length = reader.read(this.buf);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new IORuntimeException(e);
|
|
||||||
}
|
|
||||||
this.mark = 0;
|
|
||||||
this.position = 0;
|
|
||||||
this.limit = length;
|
|
||||||
return length;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 先获取当前字符,再将当前位置后移一位<br>
|
|
||||||
* 此方法不检查是否到了数组末尾,请自行使用{@link #hasRemaining()}判断。
|
|
||||||
*
|
|
||||||
* @return 当前位置字符
|
|
||||||
* @see #hasRemaining()
|
|
||||||
*/
|
|
||||||
char get() {
|
|
||||||
return this.buf[this.position++];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 标记位置记为下次读取位置
|
|
||||||
*/
|
|
||||||
void mark() {
|
|
||||||
this.mark = this.position;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 将数据追加到{@link StrBuilder},追加结束后需手动调用{@link #mark()} 重置读取位置
|
|
||||||
*
|
|
||||||
* @param builder {@link StrBuilder}
|
|
||||||
* @param length 追加的长度
|
|
||||||
* @see #mark()
|
|
||||||
*/
|
|
||||||
void appendTo(StrBuilder builder, int length) {
|
|
||||||
builder.append(this.buf, this.mark, length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,102 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Hutool Team and hutool.cn
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package cn.hutool.core.text.csv;
|
||||||
|
|
||||||
|
import cn.hutool.core.io.IORuntimeException;
|
||||||
|
import cn.hutool.core.io.IoUtil;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CSV解析器,用于解析CSV文件
|
||||||
|
*
|
||||||
|
* @author looly
|
||||||
|
* @since 5.8.0
|
||||||
|
*/
|
||||||
|
public class CsvTokener implements Closeable {
|
||||||
|
|
||||||
|
private final Reader raw;
|
||||||
|
/**
|
||||||
|
* 在Reader的位置(解析到第几个字符)
|
||||||
|
*/
|
||||||
|
private long index;
|
||||||
|
/**
|
||||||
|
* 前一个字符
|
||||||
|
*/
|
||||||
|
private int prev;
|
||||||
|
/**
|
||||||
|
* 是否使用前一个字符
|
||||||
|
*/
|
||||||
|
private boolean usePrev;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 构造
|
||||||
|
*
|
||||||
|
* @param reader {@link Reader}
|
||||||
|
*/
|
||||||
|
public CsvTokener(final Reader reader) {
|
||||||
|
this.raw = IoUtil.toBuffered(reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 读取下一个字符,并记录位置
|
||||||
|
*
|
||||||
|
* @return 下一个字符
|
||||||
|
*/
|
||||||
|
public int next() {
|
||||||
|
if(this.usePrev){
|
||||||
|
this.usePrev = false;
|
||||||
|
}else{
|
||||||
|
try {
|
||||||
|
this.prev = this.raw.read();
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new IORuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.index++;
|
||||||
|
return this.prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 将标记回退到第一个字符
|
||||||
|
*
|
||||||
|
* @throws IllegalStateException 当多次调用back时,抛出此异常
|
||||||
|
*/
|
||||||
|
public void back() throws IllegalStateException {
|
||||||
|
if (this.usePrev || this.index <= 0) {
|
||||||
|
throw new IllegalStateException("Stepping back two steps is not supported");
|
||||||
|
}
|
||||||
|
this.index --;
|
||||||
|
this.usePrev = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取当前位置
|
||||||
|
*
|
||||||
|
* @return 位置
|
||||||
|
*/
|
||||||
|
public long getIndex() {
|
||||||
|
return this.index;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
IoUtil.close(this.raw);
|
||||||
|
}
|
||||||
|
}
|
@ -7,7 +7,6 @@ import cn.hutool.core.lang.Console;
|
|||||||
import cn.hutool.core.util.CharsetUtil;
|
import cn.hutool.core.util.CharsetUtil;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
import org.junit.jupiter.api.Disabled;
|
import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
@ -17,6 +16,8 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
public class CsvUtilTest {
|
public class CsvUtilTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -32,7 +33,8 @@ public class CsvUtilTest {
|
|||||||
assertEquals("关注\"对象\"", row0.get(3));
|
assertEquals("关注\"对象\"", row0.get(3));
|
||||||
assertEquals("年龄", row0.get(4));
|
assertEquals("年龄", row0.get(4));
|
||||||
assertEquals("", row0.get(5));
|
assertEquals("", row0.get(5));
|
||||||
assertEquals("\"", row0.get(6));
|
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
|
||||||
|
assertEquals("\"\n", row0.get(6));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -46,7 +48,8 @@ public class CsvUtilTest {
|
|||||||
assertEquals("关注\"对象\"", csvRow.get(3));
|
assertEquals("关注\"对象\"", csvRow.get(3));
|
||||||
assertEquals("年龄", csvRow.get(4));
|
assertEquals("年龄", csvRow.get(4));
|
||||||
assertEquals("", csvRow.get(5));
|
assertEquals("", csvRow.get(5));
|
||||||
assertEquals("\"", csvRow.get(6));
|
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
|
||||||
|
assertEquals("\"\n", csvRow.get(6));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -70,7 +73,8 @@ public class CsvUtilTest {
|
|||||||
assertEquals("关注\"对象\"", row0.get(3));
|
assertEquals("关注\"对象\"", row0.get(3));
|
||||||
assertEquals("年龄", row0.get(4));
|
assertEquals("年龄", row0.get(4));
|
||||||
assertEquals("", row0.get(5));
|
assertEquals("", row0.get(5));
|
||||||
assertEquals("\"", row0.get(6));
|
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
|
||||||
|
assertEquals("\"\n", row0.get(6));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -84,7 +88,8 @@ public class CsvUtilTest {
|
|||||||
assertEquals("关注\"对象\"", csvRow.get(3));
|
assertEquals("关注\"对象\"", csvRow.get(3));
|
||||||
assertEquals("年龄", csvRow.get(4));
|
assertEquals("年龄", csvRow.get(4));
|
||||||
assertEquals("", csvRow.get(5));
|
assertEquals("", csvRow.get(5));
|
||||||
assertEquals("\"", csvRow.get(6));
|
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
|
||||||
|
assertEquals("\"\n", csvRow.get(6));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user