diff --git a/hutool-core/src/main/java/cn/hutool/core/io/ByteOrderMark.java b/hutool-core/src/main/java/cn/hutool/core/io/ByteOrderMark.java new file mode 100755 index 000000000..cb85df0b2 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/io/ByteOrderMark.java @@ -0,0 +1,189 @@ +package cn.hutool.core.io; + +import cn.hutool.core.lang.Assert; +import cn.hutool.core.util.ArrayUtil; +import cn.hutool.core.util.CharsetUtil; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.Locale; +import java.util.function.Predicate; + +/** + * Byte Order Mark (BOM) 头描述
+ * BOM定义:http://www.unicode.org/unicode/faq/utf_bom.html + * + * + *

来自:Apache-commons-io

+ * + * @author Apache-commons-io + */ +public class ByteOrderMark implements Predicate, Comparable, Serializable { + private static final long serialVersionUID = 1L; + + // region ----- BOMs + /** + * UTF-8 BOM. + */ + public static final ByteOrderMark UTF_8 = new ByteOrderMark(CharsetUtil.NAME_UTF_8, 0xEF, 0xBB, 0xBF); + + /** + * UTF-16BE BOM (Big-Endian). + */ + public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF); + + /** + * UTF-16LE BOM (Little-Endian). + */ + public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE); + + /** + * UTF-32BE BOM (Big-Endian). + */ + public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF); + + /** + * UTF-32LE BOM (Little-Endian). + */ + public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00); + + /** + * 预定义的所有BOM信息 + */ + public static final ByteOrderMark[] ALL = new ByteOrderMark[]{ + UTF_32BE, + UTF_32LE, + UTF_8, + UTF_16BE, + UTF_16LE + }; + // endregion + + private final String charsetName; + private final int[] bytes; + + /** + * 构造 + * + * @param charsetName BOM定义的编码名称 + * @param bytes BOM bytes + * @throws IllegalArgumentException 编码名称为空或者bytes为空 + */ + public ByteOrderMark(final String charsetName, final int... bytes) { + if (ArrayUtil.isEmpty(bytes)) { + throw new IllegalArgumentException("No bytes specified"); + } + this.charsetName = Assert.notEmpty(charsetName, "No charsetName specified"); + this.bytes = new int[bytes.length]; + System.arraycopy(bytes, 0, this.bytes, 0, bytes.length); + } + + /** + * 获取BOM头定义的编码名称. + * + * @return 编码名称 + */ + public String getCharsetName() { + return charsetName; + } + + /** + * 获取BOM头byte数 + * + * @return BOM头byte数 + */ + public int length() { + return bytes.length; + } + + /** + * 获取指定位置的byte值 + * + * @param pos The position + * @return The specified byte + */ + public int get(final int pos) { + return bytes[pos]; + } + + /** + * Gets a copy of the BOM's bytes. + * + * @return a copy of the BOM's bytes + */ + public byte[] getBytes() { + final byte[] copy = new byte[bytes.length]; + for (int i = 0; i < bytes.length; i++) { + copy[i] = (byte) bytes[i]; + } + return copy; + } + + /** + * 是否匹配头部BOM信息
+ * 当提供的长度小于BOM需要检查的长度时,返回{code false} + * + * @param headBytes 头部bytes + * @return 是否匹配头部BOM信息 + */ + @Override + public boolean test(final byte[] headBytes) { + if (headBytes.length < bytes.length) { + return false; + } + for (int i = 0; i < bytes.length; i++) { + if (bytes[i] != headBytes[i]) { + return false; + } + } + return true; + } + + @Override + public boolean equals(final Object obj) { + if (!(obj instanceof ByteOrderMark)) { + return false; + } + final ByteOrderMark bom = (ByteOrderMark) obj; + return Arrays.equals(this.bytes, bom.bytes); + } + + @Override + public int hashCode() { + int hashCode = getClass().hashCode(); + for (final int b : bytes) { + hashCode += b; + } + return hashCode; + } + + @Override + public String toString() { + final StringBuilder builder = new StringBuilder(); + builder.append(getClass().getSimpleName()); + builder.append('['); + builder.append(charsetName); + builder.append(": "); + for (int i = 0; i < bytes.length; i++) { + if (i > 0) { + builder.append(","); + } + builder.append("0x"); + builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT)); + } + builder.append(']'); + return builder.toString(); + } + + @Override + public int compareTo(final ByteOrderMark o) { + // 按照长度倒序 + return Integer.compare(o.length(), this.length()); + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/io/file/FileMagicNumber.java b/hutool-core/src/main/java/cn/hutool/core/io/file/FileMagicNumber.java index cb5c63d95..bbdde0d0f 100644 --- a/hutool-core/src/main/java/cn/hutool/core/io/file/FileMagicNumber.java +++ b/hutool-core/src/main/java/cn/hutool/core/io/file/FileMagicNumber.java @@ -13,6 +13,7 @@ package cn.hutool.core.io.file; import cn.hutool.core.util.ArrayUtil; +import cn.hutool.core.util.CharsetUtil; import java.math.BigInteger; import java.util.Arrays; @@ -82,8 +83,7 @@ public enum FileMagicNumber { try { final int dataLength = new BigInteger(1, Arrays.copyOfRange(bytes, i, i + 4)).intValue(); i += 4; - final byte[] bytes1 = Arrays.copyOfRange(bytes, i, i + 4); - final String chunkType = new String(bytes1); + final String chunkType = new String(bytes, i, 4, CharsetUtil.ISO_8859_1); i += 4; if (Objects.equals(chunkType, "IDAT") || Objects.equals(chunkType, "IEND")) { return false; @@ -606,6 +606,7 @@ public enum FileMagicNumber { //去除bom头并且跳过三个字节 if (bytes.length > 3 && Objects.equals(bytes[0], (byte) 0xEF) && Objects.equals(bytes[1], (byte) 0xBB) && Objects.equals(bytes[2], (byte) 0xBF)) { + // UTF8 Bom bytes = Arrays.copyOfRange(bytes, 3, bytes.length); } return bytes.length > 3 diff --git a/hutool-core/src/main/java/cn/hutool/core/io/stream/BOMInputStream.java b/hutool-core/src/main/java/cn/hutool/core/io/stream/BOMInputStream.java index 1c2fe9c8a..fa0319372 100644 --- a/hutool-core/src/main/java/cn/hutool/core/io/stream/BOMInputStream.java +++ b/hutool-core/src/main/java/cn/hutool/core/io/stream/BOMInputStream.java @@ -12,6 +12,7 @@ package cn.hutool.core.io.stream; +import cn.hutool.core.io.ByteOrderMark; import cn.hutool.core.io.IORuntimeException; import cn.hutool.core.util.CharsetUtil; @@ -121,25 +122,17 @@ public class BOMInputStream extends InputStream { final byte[] bom = new byte[BOM_SIZE]; final int n; - final int unread; + int unread = 0; n = in.read(bom, 0, bom.length); - if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { - charset = "UTF-32BE"; - unread = n - 4; - } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { - charset = "UTF-32LE"; - unread = n - 4; - } else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { - charset = "UTF-8"; - unread = n - 3; - } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { - charset = "UTF-16BE"; - unread = n - 2; - } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { - charset = "UTF-16LE"; - unread = n - 2; - } else { + for (final ByteOrderMark byteOrderMark : ByteOrderMark.ALL) { + if(byteOrderMark.test(bom)){ + charset = byteOrderMark.getCharsetName(); + unread = n - byteOrderMark.length(); + break; + } + } + if(0 == unread) { // Unicode BOM mark not found, unread all bytes charset = defaultCharset; unread = n; diff --git a/hutool-core/src/main/java/cn/hutool/core/util/ObjUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/ObjUtil.java index fc4e88ad1..aebd65190 100644 --- a/hutool-core/src/main/java/cn/hutool/core/util/ObjUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/ObjUtil.java @@ -42,6 +42,7 @@ public class ObjUtil { *

比较两个对象是否相等,满足下述任意条件即返回{@code true}: *

@@ -54,6 +55,8 @@ public class ObjUtil { public static boolean equals(final Object obj1, final Object obj2) { if (obj1 instanceof BigDecimal && obj2 instanceof BigDecimal) { return NumberUtil.equals((BigDecimal) obj1, (BigDecimal) obj2); + } else if(ArrayUtil.isArray(obj1) && ArrayUtil.isArray(obj2)){ + return ArrayUtil.equals(obj1, obj2); } return Objects.equals(obj1, obj2); } diff --git a/hutool-core/src/main/java/cn/hutool/core/util/PrimitiveArrayUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/PrimitiveArrayUtil.java index 2767631f0..13cfe610a 100644 --- a/hutool-core/src/main/java/cn/hutool/core/util/PrimitiveArrayUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/PrimitiveArrayUtil.java @@ -3387,4 +3387,9 @@ public class PrimitiveArrayUtil { return true; } // endregion + + // region rangeMatches + public static boolean rangeMatches(final byte[] bytes1){ + return false; + } } diff --git a/hutool-core/src/test/java/cn/hutool/core/util/ArrayUtilTest.java b/hutool-core/src/test/java/cn/hutool/core/util/ArrayUtilTest.java index f085dbe88..04da83242 100755 --- a/hutool-core/src/test/java/cn/hutool/core/util/ArrayUtilTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/util/ArrayUtilTest.java @@ -719,4 +719,19 @@ public class ArrayUtilTest { b = ArrayUtil.startWith((int[])null, null); Assert.assertTrue(b); } + + @Test + public void equalsTest() { + final boolean b = ObjUtil.equals(new int[]{1, 2, 3}, new int[]{1, 2, 3}); + Assert.assertTrue(b); + } + + @Test + public void copyOfRangeTest() { + String a = "aIDAT"; + final byte[] bytes1 = Arrays.copyOfRange(a.getBytes(CharsetUtil.UTF_8), 1, 1 + 4); + + Assert.assertEquals(new String(bytes1), + new String(a.getBytes(CharsetUtil.UTF_8), 1, 4)); + } }