From c459a0f61389c245fec82fc70f3b1c2a6395f847 Mon Sep 17 00:00:00 2001 From: Looly Date: Thu, 29 Aug 2024 09:50:07 +0800 Subject: [PATCH] =?UTF-8?q?FileUtil.getTotalLines()=E6=94=AF=E6=8C=81CR?= =?UTF-8?q?=E6=8D=A2=E8=A1=8C=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 3 ++- .../main/java/cn/hutool/core/io/FileUtil.java | 24 ++++++++++++++----- .../java/cn/hutool/core/io/FileUtilTest.java | 14 +++++++++++ .../src/test/resources/test_lines_cr.csv | 1 + .../src/test/resources/test_lines_crlf.csv | 7 ++++++ 5 files changed, 42 insertions(+), 7 deletions(-) create mode 100644 hutool-core/src/test/resources/test_lines_cr.csv create mode 100644 hutool-core/src/test/resources/test_lines_crlf.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index ae3f2efe9..0165b911f 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,10 @@ # 🚀Changelog ------------------------------------------------------------------------------------------------------------- -# 5.8.32(2024-08-24) +# 5.8.32(2024-08-29) ### 🐣新特性 +* 【core 】 FileUtil.getTotalLines()支持CR换行符(issue#IAMZYR@Gitee) ### 🐞Bug修复 * 【http 】 修复getFileNameFromDisposition不符合规范问题(issue#IAKBPD@Gitee) diff --git a/hutool-core/src/main/java/cn/hutool/core/io/FileUtil.java b/hutool-core/src/main/java/cn/hutool/core/io/FileUtil.java index 2be92d5b3..b81ac8ad2 100755 --- a/hutool-core/src/main/java/cn/hutool/core/io/FileUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/io/FileUtil.java @@ -572,8 +572,8 @@ public class FileUtil extends PathUtil { bufferSize = 1024; } try (InputStream is = getInputStream(file)) { - byte[] c = new byte[bufferSize]; - int readChars = is.read(c); + byte[] chars = new byte[bufferSize]; + int readChars = is.read(chars); if (readChars == -1) { // 空文件,返回0 return 0; @@ -584,23 +584,35 @@ public class FileUtil extends PathUtil { // 如果多行,最后一行无换行符,最后一行需要单独计数 // 如果多行,最后一行有换行符,则空行算作一行 int count = 1; + byte pre; + byte c = 0; while (readChars == bufferSize) { for (int i = 0; i < bufferSize; i++) { - if (c[i] == CharUtil.LF) { + pre = c; + c = chars[i]; + // 换行符兼容MAC + if (c == CharUtil.LF || pre == CharUtil.CR) { ++count; } } - readChars = is.read(c); + readChars = is.read(chars); } // count remaining characters while (readChars != -1) { for (int i = 0; i < readChars; i++) { - if (c[i] == CharUtil.LF) { + pre = c; + c = chars[i]; + if (c == CharUtil.LF || pre == CharUtil.CR) { ++count; } } - readChars = is.read(c); + readChars = is.read(chars); + } + + // 最后一个字符为换行符,则单独计数行 + if(c == CharUtil.CR){ + ++count; } return count; diff --git a/hutool-core/src/test/java/cn/hutool/core/io/FileUtilTest.java b/hutool-core/src/test/java/cn/hutool/core/io/FileUtilTest.java index 0374517e7..5f1fb6b7e 100644 --- a/hutool-core/src/test/java/cn/hutool/core/io/FileUtilTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/io/FileUtilTest.java @@ -532,6 +532,20 @@ public class FileUtilTest { assertEquals(8, totalLines); } + @Test + public void getTotalLinesCrTest() { + // 此文件最后一行有换行符,则最后的空行算作一行 + final int totalLines = FileUtil.getTotalLines(FileUtil.file("test_lines_cr.csv")); + assertEquals(8, totalLines); + } + + @Test + public void getTotalLinesCrlfTest() { + // 此文件最后一行有换行符,则最后的空行算作一行 + final int totalLines = FileUtil.getTotalLines(FileUtil.file("test_lines_crlf.csv")); + assertEquals(8, totalLines); + } + @Test public void issue3591Test() { // 此文件最后一行末尾无换行符 diff --git a/hutool-core/src/test/resources/test_lines_cr.csv b/hutool-core/src/test/resources/test_lines_cr.csv new file mode 100644 index 000000000..4e9622be3 --- /dev/null +++ b/hutool-core/src/test/resources/test_lines_cr.csv @@ -0,0 +1 @@ +# 这是一行注释,读取时应忽略 a,b,c,d 1,2,3,4 # 这是一行注释,读取时应忽略 q,w,e,r,"我是一段 带换行的内容" a,s,d,f \ No newline at end of file diff --git a/hutool-core/src/test/resources/test_lines_crlf.csv b/hutool-core/src/test/resources/test_lines_crlf.csv new file mode 100644 index 000000000..32daf5460 --- /dev/null +++ b/hutool-core/src/test/resources/test_lines_crlf.csv @@ -0,0 +1,7 @@ +# 这是一行注释,读取时应忽略 +a,b,c,d +1,2,3,4 +# 这是一行注释,读取时应忽略 +q,w,e,r,"我是一段 +带换行的内容" +a,s,d,f