mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-04-19 03:01:48 +08:00
add ByteOrderMark
This commit is contained in:
parent
41cb8a6db7
commit
27e1f5f61e
189
hutool-core/src/main/java/cn/hutool/core/io/ByteOrderMark.java
Executable file
189
hutool-core/src/main/java/cn/hutool/core/io/ByteOrderMark.java
Executable file
@ -0,0 +1,189 @@
|
||||
package cn.hutool.core.io;
|
||||
|
||||
import cn.hutool.core.lang.Assert;
|
||||
import cn.hutool.core.util.ArrayUtil;
|
||||
import cn.hutool.core.util.CharsetUtil;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
/**
|
||||
* Byte Order Mark (BOM) 头描述<br>
|
||||
* BOM定义:<a href="http://www.unicode.org/unicode/faq/utf_bom.html">http://www.unicode.org/unicode/faq/utf_bom.html</a>
|
||||
* <ul>
|
||||
* <li>EF BB BF = UTF-8</li>
|
||||
* <li>FE FF = UTF-16BE, big-endian</li>
|
||||
* <li>FF FE = UTF-16LE, little-endian</li>
|
||||
* <li>00 00 FE FF = UTF-32BE, big-endian</li>
|
||||
* <li>FF FE 00 00 = UTF-32LE, little-endian</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>来自:Apache-commons-io</p>
|
||||
*
|
||||
* @author Apache-commons-io
|
||||
*/
|
||||
public class ByteOrderMark implements Predicate<byte[]>, Comparable<ByteOrderMark>, Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
// region ----- BOMs
|
||||
/**
|
||||
* UTF-8 BOM.
|
||||
*/
|
||||
public static final ByteOrderMark UTF_8 = new ByteOrderMark(CharsetUtil.NAME_UTF_8, 0xEF, 0xBB, 0xBF);
|
||||
|
||||
/**
|
||||
* UTF-16BE BOM (Big-Endian).
|
||||
*/
|
||||
public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF);
|
||||
|
||||
/**
|
||||
* UTF-16LE BOM (Little-Endian).
|
||||
*/
|
||||
public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE);
|
||||
|
||||
/**
|
||||
* UTF-32BE BOM (Big-Endian).
|
||||
*/
|
||||
public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
|
||||
|
||||
/**
|
||||
* UTF-32LE BOM (Little-Endian).
|
||||
*/
|
||||
public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
|
||||
|
||||
/**
|
||||
* 预定义的所有BOM信息
|
||||
*/
|
||||
public static final ByteOrderMark[] ALL = new ByteOrderMark[]{
|
||||
UTF_32BE,
|
||||
UTF_32LE,
|
||||
UTF_8,
|
||||
UTF_16BE,
|
||||
UTF_16LE
|
||||
};
|
||||
// endregion
|
||||
|
||||
private final String charsetName;
|
||||
private final int[] bytes;
|
||||
|
||||
/**
|
||||
* 构造
|
||||
*
|
||||
* @param charsetName BOM定义的编码名称
|
||||
* @param bytes BOM bytes
|
||||
* @throws IllegalArgumentException 编码名称为空或者bytes为空
|
||||
*/
|
||||
public ByteOrderMark(final String charsetName, final int... bytes) {
|
||||
if (ArrayUtil.isEmpty(bytes)) {
|
||||
throw new IllegalArgumentException("No bytes specified");
|
||||
}
|
||||
this.charsetName = Assert.notEmpty(charsetName, "No charsetName specified");
|
||||
this.bytes = new int[bytes.length];
|
||||
System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取BOM头定义的编码名称.
|
||||
*
|
||||
* @return 编码名称
|
||||
*/
|
||||
public String getCharsetName() {
|
||||
return charsetName;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取BOM头byte数
|
||||
*
|
||||
* @return BOM头byte数
|
||||
*/
|
||||
public int length() {
|
||||
return bytes.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取指定位置的byte值
|
||||
*
|
||||
* @param pos The position
|
||||
* @return The specified byte
|
||||
*/
|
||||
public int get(final int pos) {
|
||||
return bytes[pos];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a copy of the BOM's bytes.
|
||||
*
|
||||
* @return a copy of the BOM's bytes
|
||||
*/
|
||||
public byte[] getBytes() {
|
||||
final byte[] copy = new byte[bytes.length];
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
copy[i] = (byte) bytes[i];
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
||||
/**
|
||||
* 是否匹配头部BOM信息<br>
|
||||
* 当提供的长度小于BOM需要检查的长度时,返回{code false}
|
||||
*
|
||||
* @param headBytes 头部bytes
|
||||
* @return 是否匹配头部BOM信息
|
||||
*/
|
||||
@Override
|
||||
public boolean test(final byte[] headBytes) {
|
||||
if (headBytes.length < bytes.length) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
if (bytes[i] != headBytes[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (!(obj instanceof ByteOrderMark)) {
|
||||
return false;
|
||||
}
|
||||
final ByteOrderMark bom = (ByteOrderMark) obj;
|
||||
return Arrays.equals(this.bytes, bom.bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hashCode = getClass().hashCode();
|
||||
for (final int b : bytes) {
|
||||
hashCode += b;
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
builder.append(getClass().getSimpleName());
|
||||
builder.append('[');
|
||||
builder.append(charsetName);
|
||||
builder.append(": ");
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
if (i > 0) {
|
||||
builder.append(",");
|
||||
}
|
||||
builder.append("0x");
|
||||
builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT));
|
||||
}
|
||||
builder.append(']');
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final ByteOrderMark o) {
|
||||
// 按照长度倒序
|
||||
return Integer.compare(o.length(), this.length());
|
||||
}
|
||||
}
|
@ -13,6 +13,7 @@
|
||||
package cn.hutool.core.io.file;
|
||||
|
||||
import cn.hutool.core.util.ArrayUtil;
|
||||
import cn.hutool.core.util.CharsetUtil;
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.Arrays;
|
||||
@ -82,8 +83,7 @@ public enum FileMagicNumber {
|
||||
try {
|
||||
final int dataLength = new BigInteger(1, Arrays.copyOfRange(bytes, i, i + 4)).intValue();
|
||||
i += 4;
|
||||
final byte[] bytes1 = Arrays.copyOfRange(bytes, i, i + 4);
|
||||
final String chunkType = new String(bytes1);
|
||||
final String chunkType = new String(bytes, i, 4, CharsetUtil.ISO_8859_1);
|
||||
i += 4;
|
||||
if (Objects.equals(chunkType, "IDAT") || Objects.equals(chunkType, "IEND")) {
|
||||
return false;
|
||||
@ -606,6 +606,7 @@ public enum FileMagicNumber {
|
||||
//去除bom头并且跳过三个字节
|
||||
if (bytes.length > 3 && Objects.equals(bytes[0], (byte) 0xEF)
|
||||
&& Objects.equals(bytes[1], (byte) 0xBB) && Objects.equals(bytes[2], (byte) 0xBF)) {
|
||||
// UTF8 Bom
|
||||
bytes = Arrays.copyOfRange(bytes, 3, bytes.length);
|
||||
}
|
||||
return bytes.length > 3
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
package cn.hutool.core.io.stream;
|
||||
|
||||
import cn.hutool.core.io.ByteOrderMark;
|
||||
import cn.hutool.core.io.IORuntimeException;
|
||||
import cn.hutool.core.util.CharsetUtil;
|
||||
|
||||
@ -121,25 +122,17 @@ public class BOMInputStream extends InputStream {
|
||||
|
||||
final byte[] bom = new byte[BOM_SIZE];
|
||||
final int n;
|
||||
final int unread;
|
||||
int unread = 0;
|
||||
n = in.read(bom, 0, bom.length);
|
||||
|
||||
if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
|
||||
charset = "UTF-32BE";
|
||||
unread = n - 4;
|
||||
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
|
||||
charset = "UTF-32LE";
|
||||
unread = n - 4;
|
||||
} else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
|
||||
charset = "UTF-8";
|
||||
unread = n - 3;
|
||||
} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
|
||||
charset = "UTF-16BE";
|
||||
unread = n - 2;
|
||||
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
|
||||
charset = "UTF-16LE";
|
||||
unread = n - 2;
|
||||
} else {
|
||||
for (final ByteOrderMark byteOrderMark : ByteOrderMark.ALL) {
|
||||
if(byteOrderMark.test(bom)){
|
||||
charset = byteOrderMark.getCharsetName();
|
||||
unread = n - byteOrderMark.length();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(0 == unread) {
|
||||
// Unicode BOM mark not found, unread all bytes
|
||||
charset = defaultCharset;
|
||||
unread = n;
|
||||
|
@ -42,6 +42,7 @@ public class ObjUtil {
|
||||
* <p>比较两个对象是否相等,满足下述任意条件即返回{@code true}:
|
||||
* <ul>
|
||||
* <li>若两对象皆为{@link BigDecimal},且满足{@code 0 == obj1.compareTo(obj2)}</li>
|
||||
* <li>若两对象都为数组,调用Arrays.equals完成判断</li>
|
||||
* <li>{@code obj1 == null && obj2 == null}</li>
|
||||
* <li>{@code obj1.equals(obj2)}</li>
|
||||
* </ul>
|
||||
@ -54,6 +55,8 @@ public class ObjUtil {
|
||||
public static boolean equals(final Object obj1, final Object obj2) {
|
||||
if (obj1 instanceof BigDecimal && obj2 instanceof BigDecimal) {
|
||||
return NumberUtil.equals((BigDecimal) obj1, (BigDecimal) obj2);
|
||||
} else if(ArrayUtil.isArray(obj1) && ArrayUtil.isArray(obj2)){
|
||||
return ArrayUtil.equals(obj1, obj2);
|
||||
}
|
||||
return Objects.equals(obj1, obj2);
|
||||
}
|
||||
|
@ -3387,4 +3387,9 @@ public class PrimitiveArrayUtil {
|
||||
return true;
|
||||
}
|
||||
// endregion
|
||||
|
||||
// region rangeMatches
|
||||
public static boolean rangeMatches(final byte[] bytes1){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -719,4 +719,19 @@ public class ArrayUtilTest {
|
||||
b = ArrayUtil.startWith((int[])null, null);
|
||||
Assert.assertTrue(b);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void equalsTest() {
|
||||
final boolean b = ObjUtil.equals(new int[]{1, 2, 3}, new int[]{1, 2, 3});
|
||||
Assert.assertTrue(b);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void copyOfRangeTest() {
|
||||
String a = "aIDAT";
|
||||
final byte[] bytes1 = Arrays.copyOfRange(a.getBytes(CharsetUtil.UTF_8), 1, 1 + 4);
|
||||
|
||||
Assert.assertEquals(new String(bytes1),
|
||||
new String(a.getBytes(CharsetUtil.UTF_8), 1, 4));
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user