From da39e9e6461ef8d4b68f69a1f6bef77a222e843d Mon Sep 17 00:00:00 2001 From: Looly Date: Tue, 29 Oct 2019 14:06:32 +0800 Subject: [PATCH] enhance Csv --- CHANGELOG.md | 2 + .../hutool/core/text/csv/CsvBaseReader.java | 205 +++++++++++++++++ .../cn/hutool/core/text/csv/CsvReader.java | 210 +++++++----------- .../hutool/core/text/csv/CsvRowHandler.java | 18 ++ .../cn/hutool/core/text/csv/CsvUtilTest.java | 17 +- 5 files changed, 316 insertions(+), 136 deletions(-) create mode 100644 hutool-core/src/main/java/cn/hutool/core/text/csv/CsvBaseReader.java create mode 100644 hutool-core/src/main/java/cn/hutool/core/text/csv/CsvRowHandler.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 272f6c9a0..f0911dd7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ * 【setting】 增加System.getenv变量替换支持 * 【core】 XmlUtil中mapToStr支持namespace(pr#599@Github) * 【core】 ZipUtil修改策略:默认关闭输入流(issue#604@Github) +* 【core】 改进CsvReader,支持RowHandler按行处理(issue#608@Github) + ### Bug修复 * 【core】 解决ConcurrentHashSet不能序列化的问题(issue#600@Github) * 【core】 解决CsvReader.setErrorOnDifferentFieldCount循环调用问题 diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvBaseReader.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvBaseReader.java new file mode 100644 index 000000000..d65bb56ee --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvBaseReader.java @@ -0,0 +1,205 @@ +package cn.hutool.core.text.csv; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.io.IORuntimeException; +import cn.hutool.core.io.IoUtil; +import cn.hutool.core.lang.Assert; +import cn.hutool.core.util.CharsetUtil; +import cn.hutool.core.util.ObjectUtil; + +import java.io.File; +import java.io.Reader; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * CSV文件读取器基础类,提供灵活的文件、路径中的CSV读取,一次构造可多次调用读取不同数据,参考:FastCSV + * + * @author Looly + * @since 5.0.4 + */ +public class CsvBaseReader implements Serializable { + private static final long serialVersionUID = 1L; + + /** + * 默认编码 + */ + protected static final Charset DEFAULT_CHARSET = CharsetUtil.CHARSET_UTF_8; + + private final CsvReadConfig config; + + //--------------------------------------------------------------------------------------------- Constructor start + + /** + * 构造,使用默认配置项 + */ + public CsvBaseReader() { + this(null); + } + + /** + * 构造 + * + * @param config 配置项 + */ + public CsvBaseReader(CsvReadConfig config) { + this.config = ObjectUtil.defaultIfNull(config, CsvReadConfig.defaultConfig()); + } + //--------------------------------------------------------------------------------------------- Constructor end + + /** + * 设置字段分隔符,默认逗号',' + * + * @param fieldSeparator 字段分隔符,默认逗号',' + */ + public void setFieldSeparator(char fieldSeparator) { + this.config.setFieldSeparator(fieldSeparator); + } + + /** + * 设置 文本分隔符,文本包装符,默认双引号'"' + * + * @param textDelimiter 文本分隔符,文本包装符,默认双引号'"' + */ + public void setTextDelimiter(char textDelimiter) { + this.config.setTextDelimiter(textDelimiter); + } + + /** + * 设置是否首行做为标题行,默认false + * + * @param containsHeader 是否首行做为标题行,默认false + */ + public void setContainsHeader(boolean containsHeader) { + this.config.setContainsHeader(containsHeader); + } + + /** + * 设置是否跳过空白行,默认true + * + * @param skipEmptyRows 是否跳过空白行,默认true + */ + public void setSkipEmptyRows(boolean skipEmptyRows) { + this.config.setSkipEmptyRows(skipEmptyRows); + } + + /** + * 设置每行字段个数不同时是否抛出异常,默认false + * + * @param errorOnDifferentFieldCount 每行字段个数不同时是否抛出异常,默认false + */ + public void setErrorOnDifferentFieldCount(boolean errorOnDifferentFieldCount) { + this.config.setErrorOnDifferentFieldCount(errorOnDifferentFieldCount); + } + + /** + * 读取CSV文件,默认UTF-8编码 + * + * @param file CSV文件 + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(File file) throws IORuntimeException { + return read(file, DEFAULT_CHARSET); + } + + /** + * 读取CSV文件 + * + * @param file CSV文件 + * @param charset 文件编码,默认系统编码 + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(File file, Charset charset) throws IORuntimeException { + return read(Objects.requireNonNull(file.toPath(), "file must not be null"), charset); + } + + /** + * 读取CSV文件,默认UTF-8编码 + * + * @param path CSV文件 + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(Path path) throws IORuntimeException { + return read(path, DEFAULT_CHARSET); + } + + /** + * 读取CSV文件 + * + * @param path CSV文件 + * @param charset 文件编码,默认系统编码 + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(Path path, Charset charset) throws IORuntimeException { + Assert.notNull(path, "path must not be null"); + return read(FileUtil.getReader(path, charset)); + } + + /** + * 从Reader中读取CSV数据,读取后关闭Reader + * + * @param reader Reader + * @param rowHandler 行处理器,用于一行一行的处理数据 + * @throws IORuntimeException IO异常 + */ + public void read(Reader reader, CsvRowHandler rowHandler) throws IORuntimeException { + read(parse(reader), rowHandler); + } + + /** + * 从Reader中读取CSV数据,读取后关闭Reader + * + * @param reader Reader + * @return {@link CsvData},包含数据列表和行信息 + * @throws IORuntimeException IO异常 + */ + public CsvData read(Reader reader) throws IORuntimeException { + final CsvParser csvParser = parse(reader); + final List rows = new ArrayList<>(); + read(csvParser, rows::add); + final List header = config.containsHeader ? csvParser.getHeader() : null; + + return new CsvData(header, rows); + } + + //--------------------------------------------------------------------------------------------- Private method start + + /** + * 读取CSV数据,读取后关闭Parser + * + * @param csvParser CSV解析器 + * @param rowHandler 行处理器,用于一行一行的处理数据 + * @throws IORuntimeException IO异常 + * @since 5.0.4 + */ + private void read(CsvParser csvParser, CsvRowHandler rowHandler) throws IORuntimeException { + try { + CsvRow csvRow; + while ((csvRow = csvParser.nextRow()) != null) { + rowHandler.handle(csvRow); + } + } finally { + IoUtil.close(csvParser); + } + } + + /** + * 构建 {@link CsvParser} + * + * @param reader Reader + * @return CsvParser + * @throws IORuntimeException IO异常 + */ + private CsvParser parse(Reader reader) throws IORuntimeException { + return new CsvParser(reader, this.config); + } + //--------------------------------------------------------------------------------------------- Private method start +} diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvReader.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvReader.java index 1c0007ab7..fa575b637 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvReader.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvReader.java @@ -1,20 +1,12 @@ package cn.hutool.core.text.csv; -import java.io.File; -import java.io.IOException; -import java.io.Reader; -import java.io.Serializable; -import java.nio.charset.Charset; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; - import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.IORuntimeException; -import cn.hutool.core.lang.Assert; -import cn.hutool.core.util.CharsetUtil; -import cn.hutool.core.util.ObjectUtil; + +import java.io.File; +import java.io.Reader; +import java.nio.charset.Charset; +import java.nio.file.Path; /** * CSV文件读取器,参考:FastCSV @@ -22,10 +14,12 @@ import cn.hutool.core.util.ObjectUtil; * @author Looly * @since 4.0.1 */ -public final class CsvReader implements Serializable{ +public class CsvReader extends CsvBaseReader { private static final long serialVersionUID = 1L; - CsvReadConfig config; + private final Reader reader; + + //--------------------------------------------------------------------------------------------- Constructor start /** * 构造,使用默认配置项 @@ -36,137 +30,91 @@ public final class CsvReader implements Serializable{ /** * 构造 - * + * * @param config 配置项 */ public CsvReader(CsvReadConfig config) { - this.config = ObjectUtil.defaultIfNull(config, CsvReadConfig.defaultConfig()); + this((Reader) null, config); } /** - * 设置字段分隔符,默认逗号',' - * - * @param fieldSeparator 字段分隔符,默认逗号',' - */ - public void setFieldSeparator(char fieldSeparator) { - this.config.setFieldSeparator(fieldSeparator); - } - - /** - * 设置 文本分隔符,文本包装符,默认双引号'"' - * - * @param textDelimiter 文本分隔符,文本包装符,默认双引号'"' - */ - public void setTextDelimiter(char textDelimiter) { - this.config.setTextDelimiter(textDelimiter); - } - - /** - * 设置是否首行做为标题行,默认false - * - * @param containsHeader 是否首行做为标题行,默认false - */ - public void setContainsHeader(boolean containsHeader) { - this.config.setContainsHeader(containsHeader); - } - - /** - * 设置是否跳过空白行,默认true - * - * @param skipEmptyRows 是否跳过空白行,默认true - */ - public void setSkipEmptyRows(boolean skipEmptyRows) { - this.config.setSkipEmptyRows(skipEmptyRows); - } - - /** - * 设置每行字段个数不同时是否抛出异常,默认false - * - * @param errorOnDifferentFieldCount 每行字段个数不同时是否抛出异常,默认false - */ - public void setErrorOnDifferentFieldCount(boolean errorOnDifferentFieldCount) { - this.config.setErrorOnDifferentFieldCount(errorOnDifferentFieldCount); - } - - /** - * 读取CSV文件,默认UTF-8编码 + * 构造,默认{@link #DEFAULT_CHARSET}编码 + * + * @param file CSV文件路径,null表示不设置路径 + * @param config 配置项,null表示默认配置 + * @since 5.0.4 + */ + public CsvReader(File file, CsvReadConfig config) { + this(file, DEFAULT_CHARSET, config); + } + + /** + * 构造,默认{@link #DEFAULT_CHARSET}编码 + * + * @param path CSV文件路径,null表示不设置路径 + * @param config 配置项,null表示默认配置 + * @since 5.0.4 + */ + public CsvReader(Path path, CsvReadConfig config) { + this(path, DEFAULT_CHARSET, config); + } + + /** + * 构造 + * + * @param file CSV文件路径,null表示不设置路径 + * @param charset 编码 + * @param config 配置项,null表示默认配置 + * @since 5.0.4 + */ + public CsvReader(File file, Charset charset, CsvReadConfig config) { + this(FileUtil.getReader(file, charset), config); + } + + /** + * 构造 + * + * @param path CSV文件路径,null表示不设置路径 + * @param charset 编码 + * @param config 配置项,null表示默认配置 + * @since 5.0.4 + */ + public CsvReader(Path path, Charset charset, CsvReadConfig config) { + this(FileUtil.getReader(path, charset), config); + } + + /** + * 构造 + * + * @param reader {@link Reader},null表示不设置默认reader + * @param config 配置项,null表示默认配置 + * @since 5.0.4 + */ + public CsvReader(Reader reader, CsvReadConfig config) { + super(config); + this.reader = reader; + } + //--------------------------------------------------------------------------------------------- Constructor end + /** + * 读取CSV文件,此方法只能调用一次
+ * 调用此方法的前提是构造中传入文件路径或Reader * - * @param file CSV文件 * @return {@link CsvData},包含数据列表和行信息 * @throws IORuntimeException IO异常 */ - public CsvData read(File file) throws IORuntimeException { - return read(file, CharsetUtil.CHARSET_UTF_8); + public CsvData read() throws IORuntimeException { + return read(this.reader); } /** - * 读取CSV文件 + * 读取CSV数据,此方法只能调用一次
+ * 调用此方法的前提是构造中传入文件路径或Reader * - * @param file CSV文件 - * @param charset 文件编码,默认系统编码 - * @return {@link CsvData},包含数据列表和行信息 + * @param rowHandler 行处理器,用于一行一行的处理数据 * @throws IORuntimeException IO异常 + * @since 5.0.4 */ - public CsvData read(File file, Charset charset) throws IORuntimeException { - return read(Objects.requireNonNull(file.toPath(), "file must not be null"), charset); - } - - /** - * 读取CSV文件,默认UTF-8编码 - * - * @param path CSV文件 - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public CsvData read(Path path) throws IORuntimeException { - return read(path, CharsetUtil.CHARSET_UTF_8); - } - - /** - * 读取CSV文件 - * - * @param path CSV文件 - * @param charset 文件编码,默认系统编码 - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public CsvData read(Path path, Charset charset) throws IORuntimeException { - Assert.notNull(path, "path must not be null"); - try (Reader reader = FileUtil.getReader(path, charset)) { - return read(reader); - } catch (IOException e) { - throw new IORuntimeException(e); - } - } - - /** - * 从Reader中读取CSV数据 - * - * @param reader Reader - * @return {@link CsvData},包含数据列表和行信息 - * @throws IORuntimeException IO异常 - */ - public CsvData read(Reader reader) throws IORuntimeException { - final CsvParser csvParser = parse(reader); - - final List rows = new ArrayList<>(); - CsvRow csvRow; - while ((csvRow = csvParser.nextRow()) != null) { - rows.add(csvRow); - } - - final List header = config.containsHeader ? csvParser.getHeader() : null; - return new CsvData(header, rows); - } - - /** - * 构建 {@link CsvParser} - * - * @param reader Reader - * @return CsvParser - * @throws IORuntimeException IO异常 - */ - private CsvParser parse(Reader reader) throws IORuntimeException { - return new CsvParser(reader, config); + public void read(CsvRowHandler rowHandler) throws IORuntimeException { + read(this.reader, rowHandler); } } diff --git a/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvRowHandler.java b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvRowHandler.java new file mode 100644 index 000000000..02333bef6 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/text/csv/CsvRowHandler.java @@ -0,0 +1,18 @@ +package cn.hutool.core.text.csv; + +/** + * CSV的行处理器,实现此接口用于按照行处理数据 + * + * @author Looly + * @since 5.0.4 + */ +@FunctionalInterface +public interface CsvRowHandler { + + /** + * 处理行数据 + * + * @param row 行数据 + */ + void handle(CsvRow row); +} diff --git a/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java b/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java index 6b5e8eaf8..cf04be8fd 100644 --- a/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/text/csv/CsvUtilTest.java @@ -1,13 +1,12 @@ package cn.hutool.core.text.csv; -import java.util.List; - -import org.junit.Ignore; -import org.junit.Test; - import cn.hutool.core.io.FileUtil; import cn.hutool.core.lang.Assert; import cn.hutool.core.util.CharsetUtil; +import org.junit.Ignore; +import org.junit.Test; + +import java.util.List; public class CsvUtilTest { @@ -21,6 +20,14 @@ public class CsvUtilTest { Assert.notEmpty(csvRow.getRawList()); } } + + @Test + public void readTest2() { + CsvReader reader = CsvUtil.getReader(); + reader.read(FileUtil.getUtf8Reader("test.csv"), (csvRow)->{ + Assert.notEmpty(csvRow.getRawList()); + }); + } @Test @Ignore