From d815b0f64ee7b735a794a0429e5c806672168ee5 Mon Sep 17 00:00:00 2001 From: Looly Date: Wed, 2 Jun 2021 08:40:50 +0800 Subject: [PATCH] fix io bug --- CHANGELOG.md | 3 +- .../cn/hutool/core/io/CharsetDetector.java | 73 +++++++++++-------- .../hutool/core/io/CharsetDetectorTest.java | 19 +++++ 3 files changed, 62 insertions(+), 33 deletions(-) create mode 100644 hutool-core/src/test/java/cn/hutool/core/io/CharsetDetectorTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a8a9001b..210afeaf5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,14 @@ ------------------------------------------------------------------------------------------------------------- -# 5.6.7 (2021-05-29) +# 5.6.7 (2021-06-02) ### 🐣新特性 * 【core 】 CharSequenceUtil增加join重载(issue#I3TFJ5@Gitee) ### 🐞Bug修复 * 【core 】 修复FileUtil.normalize去掉末尾空格问题(issue#1603@Github) +* 【core 】 修复CharsetDetector流关闭问题(issue#1603@Github) ------------------------------------------------------------------------------------------------------------- diff --git a/hutool-core/src/main/java/cn/hutool/core/io/CharsetDetector.java b/hutool-core/src/main/java/cn/hutool/core/io/CharsetDetector.java index 0f5c31862..04a8da001 100644 --- a/hutool-core/src/main/java/cn/hutool/core/io/CharsetDetector.java +++ b/hutool-core/src/main/java/cn/hutool/core/io/CharsetDetector.java @@ -1,17 +1,15 @@ package cn.hutool.core.io; +import cn.hutool.core.convert.Convert; import cn.hutool.core.util.ArrayUtil; -import java.io.BufferedInputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; -import java.nio.charset.UnsupportedCharsetException; -import java.util.ArrayList; -import java.util.List; /** * 编码探测器 @@ -28,29 +26,34 @@ public class CharsetDetector { static { String[] names = { - "US-ASCII", "UTF-8", "GBK", "GB2312", - "BIG5", "GB18030", "UTF-16BE", "UTF-16LE", "UTF-16", - "UNICODE"}; - final List list = new ArrayList<>(); - for (String name : names) { - try { - list.add(Charset.forName(name)); - } catch (UnsupportedCharsetException ignore) { - //ignore - } - } - DEFAULT_CHARSETS = list.toArray(new Charset[0]); + "BIG5", + "UNICODE", + "US-ASCII"}; + DEFAULT_CHARSETS = Convert.convert(Charset[].class, names); } /** - * 探测编码 + * 探测文件编码 + * + * @param file 文件 + * @param charsets 需要测试用的编码,null或空使用默认的编码数组 + * @return 编码 + * @since 5.6.7 + */ + public static Charset detect(File file, Charset... charsets) { + return detect(FileUtil.getInputStream(file), charsets); + } + + /** + * 探测编码
+ * 注意:此方法会读取流的一部分,然后关闭流,如重复使用流,请使用使用支持reset方法的流 * * @param in 流,使用后关闭此流 * @param charsets 需要测试用的编码,null或空使用默认的编码数组 @@ -60,34 +63,40 @@ public class CharsetDetector { if (ArrayUtil.isEmpty(charsets)) { charsets = DEFAULT_CHARSETS; } - for (Charset charset : charsets) { - charset = detectCharset(in, charset); - if (null != charset) { - return charset; + + final byte[] buffer = new byte[512]; + try { + while (in.read(buffer) > -1) { + for (Charset charset : charsets) { + final CharsetDecoder decoder = charset.newDecoder(); + if (identify(buffer, decoder)) { + return charset; + } + } } + } catch (IOException e) { + throw new IORuntimeException(e); + } finally { + IoUtil.close(in); } return null; } /** - * 判断编码 + * 判断编码,判断后会关闭流 * * @param in 流 * @param charset 编码 * @return 编码 */ - private static Charset detectCharset(InputStream in, Charset charset) { - try (BufferedInputStream input = IoUtil.toBuffered(in)) { - CharsetDecoder decoder = charset.newDecoder(); + private static Charset detectCharset(InputStream in, Charset charset) throws IOException { + CharsetDecoder decoder = charset.newDecoder(); - byte[] buffer = new byte[512]; - while (input.read(buffer) > -1) { - if (identify(buffer, decoder)) { - return charset; - } + final byte[] buffer = new byte[512]; + while (in.read(buffer) > -1) { + if (identify(buffer, decoder)) { + return charset; } - } catch (IOException e) { - throw new IORuntimeException(e); } return null; } diff --git a/hutool-core/src/test/java/cn/hutool/core/io/CharsetDetectorTest.java b/hutool-core/src/test/java/cn/hutool/core/io/CharsetDetectorTest.java new file mode 100644 index 000000000..6fcb72d8e --- /dev/null +++ b/hutool-core/src/test/java/cn/hutool/core/io/CharsetDetectorTest.java @@ -0,0 +1,19 @@ +package cn.hutool.core.io; + +import cn.hutool.core.io.resource.ResourceUtil; +import cn.hutool.core.util.CharsetUtil; +import org.junit.Assert; +import org.junit.Test; + +import java.nio.charset.Charset; + +public class CharsetDetectorTest { + + @Test + public void detectTest(){ + // 测试多个Charset对同一个流的处理是否有问题 + final Charset detect = CharsetDetector.detect(ResourceUtil.getStream("test.xml"), + CharsetUtil.CHARSET_GBK, CharsetUtil.CHARSET_UTF_8); + Assert.assertEquals(CharsetUtil.CHARSET_UTF_8, detect); + } +}