mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-04-19 03:01:48 +08:00
fix code
This commit is contained in:
parent
f525f21196
commit
24503fd23d
@ -1,4 +1,4 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec;
|
||||
|
||||
/**
|
||||
* 128位数字表示,分高位和低位
|
@ -1,5 +1,6 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.codec.Number128;
|
||||
import cn.hutool.core.util.ByteUtil;
|
||||
|
||||
import java.util.Arrays;
|
||||
@ -15,18 +16,22 @@ import java.util.Arrays;
|
||||
* @author hexiufeng
|
||||
* @since 5.2.5
|
||||
*/
|
||||
public class CityHash {
|
||||
public class CityHash implements Hash32<byte[]>, Hash64<byte[]>, Hash128<byte[]>{
|
||||
public static CityHash INSTANCE = new CityHash();
|
||||
|
||||
// Some primes between 2^63 and 2^64 for various uses.
|
||||
private static final long k0 = 0xc3a5c85c97cb3127L;
|
||||
private static final long k1 = 0xb492b66fbe98f273L;
|
||||
private static final long k2 = 0x9ae16a3b2f90404fL;
|
||||
private static final long kMul = 0x9ddfea08eb382d69L;
|
||||
|
||||
// Magic numbers for 32-bit hashing. Copied from Murmur3.
|
||||
private static final int c1 = 0xcc9e2d51;
|
||||
private static final int c2 = 0x1b873593;
|
||||
|
||||
@Override
|
||||
public Number encode(final byte[] bytes) {
|
||||
return hash64(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算32位City Hash值
|
||||
@ -34,7 +39,8 @@ public class CityHash {
|
||||
* @param data 数据
|
||||
* @return hash值
|
||||
*/
|
||||
public static int hash32(final byte[] data) {
|
||||
@Override
|
||||
public int hash32(final byte[] data) {
|
||||
final int len = data.length;
|
||||
if (len <= 24) {
|
||||
return len <= 12 ?
|
||||
@ -117,7 +123,8 @@ public class CityHash {
|
||||
* @param data 数据
|
||||
* @return hash值
|
||||
*/
|
||||
public static long hash64(final byte[] data) {
|
||||
@Override
|
||||
public long hash64(final byte[] data) {
|
||||
int len = data.length;
|
||||
if (len <= 32) {
|
||||
if (len <= 16) {
|
||||
@ -168,7 +175,7 @@ public class CityHash {
|
||||
* @param seed1 种子2
|
||||
* @return hash值
|
||||
*/
|
||||
public static long hash64(final byte[] data, final long seed0, final long seed1) {
|
||||
public long hash64(final byte[] data, final long seed0, final long seed1) {
|
||||
return hashLen16(hash64(data) - seed0, seed1);
|
||||
}
|
||||
|
||||
@ -179,7 +186,7 @@ public class CityHash {
|
||||
* @param seed 种子2
|
||||
* @return hash值
|
||||
*/
|
||||
public static long hash64(final byte[] data, final long seed) {
|
||||
public long hash64(final byte[] data, final long seed) {
|
||||
return hash64(data, k2, seed);
|
||||
}
|
||||
|
||||
@ -189,7 +196,8 @@ public class CityHash {
|
||||
* @param data 数据
|
||||
* @return hash值
|
||||
*/
|
||||
public static Number128 hash128(final byte[] data) {
|
||||
@Override
|
||||
public Number128 hash128(final byte[] data) {
|
||||
final int len = data.length;
|
||||
return len >= 16 ?
|
||||
hash128(data, 16,
|
||||
@ -204,12 +212,12 @@ public class CityHash {
|
||||
* @param seed 种子
|
||||
* @return hash值
|
||||
*/
|
||||
public static Number128 hash128(final byte[] data, final Number128 seed) {
|
||||
public Number128 hash128(final byte[] data, final Number128 seed) {
|
||||
return hash128(data, 0, seed);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------------------------------- Private method start
|
||||
private static Number128 hash128(final byte[] byteArray, final int start, final Number128 seed) {
|
||||
private Number128 hash128(final byte[] byteArray, final int start, final Number128 seed) {
|
||||
int len = byteArray.length - start;
|
||||
|
||||
if (len < 128) {
|
||||
@ -283,7 +291,7 @@ public class CityHash {
|
||||
|
||||
}
|
||||
|
||||
private static int hash32Len0to4(final byte[] byteArray) {
|
||||
private int hash32Len0to4(final byte[] byteArray) {
|
||||
int b = 0;
|
||||
int c = 9;
|
||||
final int len = byteArray.length;
|
||||
@ -294,7 +302,7 @@ public class CityHash {
|
||||
return fmix(mur(b, mur(len, c)));
|
||||
}
|
||||
|
||||
private static int hash32Len5to12(final byte[] byteArray) {
|
||||
private int hash32Len5to12(final byte[] byteArray) {
|
||||
final int len = byteArray.length;
|
||||
int a = len;
|
||||
int b = len * 5;
|
||||
@ -306,7 +314,7 @@ public class CityHash {
|
||||
return fmix(mur(c, mur(b, mur(a, d))));
|
||||
}
|
||||
|
||||
private static int hash32Len13to24(final byte[] byteArray) {
|
||||
private int hash32Len13to24(final byte[] byteArray) {
|
||||
final int len = byteArray.length;
|
||||
final int a = fetch32(byteArray, (len >>> 1) - 4);
|
||||
final int b = fetch32(byteArray, 4);
|
||||
@ -319,7 +327,7 @@ public class CityHash {
|
||||
return fmix(mur(f, mur(e, mur(d, mur(c, mur(b, mur(a, h)))))));
|
||||
}
|
||||
|
||||
private static long hashLen0to16(final byte[] byteArray) {
|
||||
private long hashLen0to16(final byte[] byteArray) {
|
||||
final int len = byteArray.length;
|
||||
if (len >= 8) {
|
||||
final long mul = k2 + len * 2L;
|
||||
@ -346,7 +354,7 @@ public class CityHash {
|
||||
}
|
||||
|
||||
// This probably works well for 16-byte strings as well, but it may be overkill in that case.
|
||||
private static long hashLen17to32(final byte[] byteArray) {
|
||||
private long hashLen17to32(final byte[] byteArray) {
|
||||
final int len = byteArray.length;
|
||||
final long mul = k2 + len * 2L;
|
||||
final long a = fetch64(byteArray, 0) * k1;
|
||||
@ -357,7 +365,7 @@ public class CityHash {
|
||||
a + rotate64(b + k2, 18) + c, mul);
|
||||
}
|
||||
|
||||
private static long hashLen33to64(final byte[] byteArray) {
|
||||
private long hashLen33to64(final byte[] byteArray) {
|
||||
final int len = byteArray.length;
|
||||
final long mul = k2 + len * 2L;
|
||||
long a = fetch64(byteArray, 0) * k2;
|
||||
@ -407,12 +415,13 @@ public class CityHash {
|
||||
return b;
|
||||
}
|
||||
|
||||
private static long hashLen16(final long u, final long v) {
|
||||
private long hashLen16(final long u, final long v) {
|
||||
return hash128to64(new Number128(u, v));
|
||||
}
|
||||
|
||||
private static long hash128to64(final Number128 number128) {
|
||||
private long hash128to64(final Number128 number128) {
|
||||
// Murmur-inspired hashing.
|
||||
final long kMul = 0x9ddfea08eb382d69L;
|
||||
long a = (number128.getLowValue() ^ number128.getHighValue()) * kMul;
|
||||
a ^= (a >>> 47);
|
||||
long b = (number128.getHighValue() ^ a) * kMul;
|
||||
@ -434,7 +443,7 @@ public class CityHash {
|
||||
return h;
|
||||
}
|
||||
|
||||
private static int mur(int a, int h) {
|
||||
private int mur(int a, int h) {
|
||||
// Helper from Murmur3 for combining two 32-bit values.
|
||||
a *= c1;
|
||||
a = rotate32(a, 17);
|
||||
@ -466,7 +475,7 @@ public class CityHash {
|
||||
b);
|
||||
}
|
||||
|
||||
private static Number128 cityMurmur(final byte[] byteArray, final Number128 seed) {
|
||||
private Number128 cityMurmur(final byte[] byteArray, final Number128 seed) {
|
||||
final int len = byteArray.length;
|
||||
long a = seed.getLowValue();
|
||||
long b = seed.getHighValue();
|
@ -1,4 +1,4 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Collection;
|
@ -1,4 +1,7 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.codec.Encoder;
|
||||
import cn.hutool.core.codec.Number128;
|
||||
|
||||
/**
|
||||
* Hash计算接口
|
||||
@ -8,7 +11,7 @@ package cn.hutool.core.lang.hash;
|
||||
* @since 5.2.5
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface Hash128<T> extends Hash<T>{
|
||||
public interface Hash128<T> extends Encoder<T, Number> {
|
||||
|
||||
/**
|
||||
* 计算Hash值
|
||||
@ -19,7 +22,7 @@ public interface Hash128<T> extends Hash<T>{
|
||||
Number128 hash128(T t);
|
||||
|
||||
@Override
|
||||
default Number hash(final T t){
|
||||
default Number encode(final T t){
|
||||
return hash128(t);
|
||||
}
|
||||
}
|
8
hutool-core/src/main/java/cn/hutool/core/lang/hash/Hash32.java → hutool-core/src/main/java/cn/hutool/core/codec/hash/Hash32.java
Normal file → Executable file
8
hutool-core/src/main/java/cn/hutool/core/lang/hash/Hash32.java → hutool-core/src/main/java/cn/hutool/core/codec/hash/Hash32.java
Normal file → Executable file
@ -1,4 +1,6 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.codec.Encoder;
|
||||
|
||||
/**
|
||||
* Hash计算接口
|
||||
@ -8,7 +10,7 @@ package cn.hutool.core.lang.hash;
|
||||
* @since 5.2.5
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface Hash32<T> extends Hash<T>{
|
||||
public interface Hash32<T> extends Encoder<T, Number> {
|
||||
/**
|
||||
* 计算Hash值
|
||||
*
|
||||
@ -18,7 +20,7 @@ public interface Hash32<T> extends Hash<T>{
|
||||
int hash32(T t);
|
||||
|
||||
@Override
|
||||
default Number hash(final T t){
|
||||
default Number encode(final T t){
|
||||
return hash32(t);
|
||||
}
|
||||
}
|
8
hutool-core/src/main/java/cn/hutool/core/lang/hash/Hash64.java → hutool-core/src/main/java/cn/hutool/core/codec/hash/Hash64.java
Normal file → Executable file
8
hutool-core/src/main/java/cn/hutool/core/lang/hash/Hash64.java → hutool-core/src/main/java/cn/hutool/core/codec/hash/Hash64.java
Normal file → Executable file
@ -1,4 +1,6 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.codec.Encoder;
|
||||
|
||||
/**
|
||||
* Hash计算接口
|
||||
@ -8,7 +10,7 @@ package cn.hutool.core.lang.hash;
|
||||
* @since 5.2.5
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface Hash64<T> extends Hash<T>{
|
||||
public interface Hash64<T> extends Encoder<T, Number> {
|
||||
/**
|
||||
* 计算Hash值
|
||||
*
|
||||
@ -18,7 +20,7 @@ public interface Hash64<T> extends Hash<T>{
|
||||
long hash64(T t);
|
||||
|
||||
@Override
|
||||
default Number hash(final T t){
|
||||
default Number encode(final T t){
|
||||
return hash64(t);
|
||||
}
|
||||
}
|
@ -1,4 +1,6 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.codec.Number128;
|
||||
|
||||
/**
|
||||
* Hash算法大全<br>
|
||||
@ -449,7 +451,7 @@ public class HashUtil {
|
||||
* @since 4.3.3
|
||||
*/
|
||||
public static int murmur32(final byte[] data) {
|
||||
return MurmurHash.hash32(data);
|
||||
return MurmurHash.INSTANCE.hash32(data);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -460,7 +462,7 @@ public class HashUtil {
|
||||
* @since 4.3.3
|
||||
*/
|
||||
public static long murmur64(final byte[] data) {
|
||||
return MurmurHash.hash64(data);
|
||||
return MurmurHash.INSTANCE.hash64(data);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -471,7 +473,7 @@ public class HashUtil {
|
||||
* @since 4.3.3
|
||||
*/
|
||||
public static Number128 murmur128(final byte[] data) {
|
||||
return MurmurHash.hash128(data);
|
||||
return MurmurHash.INSTANCE.hash128(data);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -482,7 +484,7 @@ public class HashUtil {
|
||||
* @since 5.2.5
|
||||
*/
|
||||
public static int cityHash32(final byte[] data) {
|
||||
return CityHash.hash32(data);
|
||||
return CityHash.INSTANCE.hash32(data);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -494,7 +496,7 @@ public class HashUtil {
|
||||
* @since 5.2.5
|
||||
*/
|
||||
public static long cityHash64(final byte[] data, final long seed) {
|
||||
return CityHash.hash64(data, seed);
|
||||
return CityHash.INSTANCE.hash64(data, seed);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -507,7 +509,7 @@ public class HashUtil {
|
||||
* @since 5.2.5
|
||||
*/
|
||||
public static long cityHash64(final byte[] data, final long seed0, final long seed1) {
|
||||
return CityHash.hash64(data, seed0, seed1);
|
||||
return CityHash.INSTANCE.hash64(data, seed0, seed1);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -518,7 +520,7 @@ public class HashUtil {
|
||||
* @since 5.2.5
|
||||
*/
|
||||
public static long cityHash64(final byte[] data) {
|
||||
return CityHash.hash64(data);
|
||||
return CityHash.INSTANCE.hash64(data);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -529,7 +531,7 @@ public class HashUtil {
|
||||
* @since 5.2.5
|
||||
*/
|
||||
public static long[] cityHash128(final byte[] data) {
|
||||
return CityHash.hash128(data).getLongArray();
|
||||
return CityHash.INSTANCE.hash128(data).getLongArray();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -541,7 +543,7 @@ public class HashUtil {
|
||||
* @since 5.2.5
|
||||
*/
|
||||
public static long[] cityHash128(final byte[] data, final Number128 seed) {
|
||||
return CityHash.hash128(data, seed).getLongArray();
|
||||
return CityHash.INSTANCE.hash128(data, seed).getLongArray();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -552,7 +554,7 @@ public class HashUtil {
|
||||
* @return hash值
|
||||
*/
|
||||
public static long metroHash64(final byte[] data, final long seed) {
|
||||
return MetroHash.hash64(data, seed);
|
||||
return MetroHash.INSTANCE.hash64(data, seed);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -562,7 +564,7 @@ public class HashUtil {
|
||||
* @return hash值
|
||||
*/
|
||||
public static long metroHash64(final byte[] data) {
|
||||
return MetroHash.hash64(data);
|
||||
return MetroHash.INSTANCE.hash64(data);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -573,7 +575,7 @@ public class HashUtil {
|
||||
* @return hash值,long[0]:低位,long[1]:高位
|
||||
*/
|
||||
public static long[] metroHash128(final byte[] data, final long seed) {
|
||||
return MetroHash.hash128(data, seed).getLongArray();
|
||||
return MetroHash.INSTANCE.hash128(data, seed).getLongArray();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -583,7 +585,7 @@ public class HashUtil {
|
||||
* @return hash值,long[0]:低位,long[1]:高位
|
||||
*/
|
||||
public static long[] metroHash128(final byte[] data) {
|
||||
return MetroHash.hash128(data).getLongArray();
|
||||
return MetroHash.INSTANCE.hash128(data).getLongArray();
|
||||
}
|
||||
|
||||
/**
|
@ -1,7 +1,6 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.exceptions.UtilException;
|
||||
import cn.hutool.core.text.StrUtil;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
@ -12,10 +11,10 @@ import java.security.NoSuchAlgorithmException;
|
||||
* @author looly
|
||||
* @since 5.7.20
|
||||
*/
|
||||
public class KetamaHash implements Hash64<String>, Hash32<String> {
|
||||
public class KetamaHash implements Hash64<byte[]>, Hash32<byte[]> {
|
||||
|
||||
@Override
|
||||
public long hash64(final String key) {
|
||||
public long hash64(final byte[] key) {
|
||||
final byte[] bKey = md5(key);
|
||||
return ((long) (bKey[3] & 0xFF) << 24)
|
||||
| ((long) (bKey[2] & 0xFF) << 16)
|
||||
@ -24,12 +23,12 @@ public class KetamaHash implements Hash64<String>, Hash32<String> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hash32(final String key) {
|
||||
public int hash32(final byte[] key) {
|
||||
return (int) (hash64(key) & 0xffffffffL);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number hash(final String key) {
|
||||
public Number encode(final byte[] key) {
|
||||
return hash64(key);
|
||||
}
|
||||
|
||||
@ -39,13 +38,13 @@ public class KetamaHash implements Hash64<String>, Hash32<String> {
|
||||
* @param key 被计算的键
|
||||
* @return MD5值
|
||||
*/
|
||||
private static byte[] md5(final String key) {
|
||||
private static byte[] md5(final byte[] key) {
|
||||
final MessageDigest md5;
|
||||
try {
|
||||
md5 = MessageDigest.getInstance("MD5");
|
||||
} catch (final NoSuchAlgorithmException e) {
|
||||
throw new UtilException("MD5 algorithm not suooport!", e);
|
||||
}
|
||||
return md5.digest(StrUtil.utf8Bytes(key));
|
||||
return md5.digest(key);
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.codec.Number128;
|
||||
import cn.hutool.core.util.ByteUtil;
|
||||
|
||||
import java.nio.ByteOrder;
|
||||
@ -13,35 +14,35 @@ import java.util.Arrays;
|
||||
* 官方实现:https://github.com/jandrewrogers/MetroHash
|
||||
* 官方文档:http://www.jandrewrogers.com/2015/05/27/metrohash/
|
||||
* Go语言实现:https://github.com/linvon/cuckoo-filter/blob/main/vendor/github.com/dgryski/go-metro/
|
||||
*
|
||||
* @author li
|
||||
*/
|
||||
public class MetroHash {
|
||||
public class MetroHash implements Hash64<byte[]>, Hash128<byte[]> {
|
||||
public static MetroHash INSTANCE = new MetroHash();
|
||||
|
||||
/**
|
||||
* hash64 种子加盐
|
||||
*/
|
||||
private final static long k0_64 = 0xD6D018F5;
|
||||
private final static long k1_64 = 0xA2AA033B;
|
||||
private final static long k2_64 = 0x62992FC1;
|
||||
private final static long k3_64 = 0x30BC5B29;
|
||||
@Override
|
||||
public Number encode(final byte[] bytes) {
|
||||
return hash64(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* hash128 种子加盐
|
||||
*/
|
||||
private final static long k0_128 = 0xC83A91E1;
|
||||
private final static long k1_128 = 0x8648DBDB;
|
||||
private final static long k2_128 = 0x7BDEC03B;
|
||||
private final static long k3_128 = 0x2F5870A5;
|
||||
|
||||
public static long hash64(final byte[] data) {
|
||||
@Override
|
||||
public long hash64(final byte[] data) {
|
||||
return hash64(data, 1337);
|
||||
}
|
||||
|
||||
public static Number128 hash128(final byte[] data) {
|
||||
return hash128(data, 1337);
|
||||
}
|
||||
/**
|
||||
* 计算64位Hash值
|
||||
*
|
||||
* @param data 数据
|
||||
* @param seed 种子
|
||||
* @return hash64
|
||||
*/
|
||||
public long hash64(final byte[] data, final long seed) {
|
||||
final long k0_64 = 0xD6D018F5;
|
||||
final long k1_64 = 0xA2AA033B;
|
||||
final long k2_64 = 0x62992FC1;
|
||||
final long k3_64 = 0x30BC5B29;
|
||||
|
||||
public static long hash64(final byte[] data, final long seed) {
|
||||
byte[] buffer = data;
|
||||
long hash = (seed + k2_64) * k0_64;
|
||||
|
||||
@ -113,7 +114,24 @@ public class MetroHash {
|
||||
return hash;
|
||||
}
|
||||
|
||||
public static Number128 hash128(final byte[] data, final long seed) {
|
||||
@Override
|
||||
public Number128 hash128(final byte[] data) {
|
||||
return hash128(data, 1337);
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算128位hash值
|
||||
*
|
||||
* @param data 数据
|
||||
* @param seed 种子
|
||||
* @return hash128
|
||||
*/
|
||||
public Number128 hash128(final byte[] data, final long seed) {
|
||||
final long k0_128 = 0xC83A91E1;
|
||||
final long k1_128 = 0x8648DBDB;
|
||||
final long k2_128 = 0x7BDEC03B;
|
||||
final long k3_128 = 0x2F5870A5;
|
||||
|
||||
byte[] buffer = data;
|
||||
|
||||
long v0, v1, v2, v3;
|
||||
@ -193,6 +211,7 @@ public class MetroHash {
|
||||
}
|
||||
|
||||
|
||||
// region =========== Private methods
|
||||
private static long littleEndian64(final byte[] b, final int start) {
|
||||
return ByteUtil.bytesToLong(b, start, ByteOrder.LITTLE_ENDIAN);
|
||||
}
|
||||
@ -214,4 +233,5 @@ public class MetroHash {
|
||||
private static long rotateRight(final long val, final int shift) {
|
||||
return (val >> shift) | (val << (64 - shift));
|
||||
}
|
||||
// endregion =========== Private methods
|
||||
}
|
@ -1,10 +1,10 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.codec.Number128;
|
||||
import cn.hutool.core.text.StrUtil;
|
||||
import cn.hutool.core.util.ByteUtil;
|
||||
import cn.hutool.core.util.CharsetUtil;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
@ -20,8 +20,8 @@ import java.nio.charset.Charset;
|
||||
* @author looly, Simhash4J
|
||||
* @since 4.3.3
|
||||
*/
|
||||
public class MurmurHash implements Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
public class MurmurHash implements Hash32<byte[]>, Hash64<byte[]>, Hash128<byte[]>{
|
||||
public static final MurmurHash INSTANCE = new MurmurHash();
|
||||
|
||||
// Constants for 32 bit variant
|
||||
private static final int C1_32 = 0xcc9e2d51;
|
||||
@ -45,13 +45,18 @@ public class MurmurHash implements Serializable {
|
||||
private static final Charset DEFAULT_CHARSET = CharsetUtil.UTF_8;
|
||||
private static final ByteOrder DEFAULT_ORDER = ByteOrder.LITTLE_ENDIAN;
|
||||
|
||||
@Override
|
||||
public Number encode(final byte[] bytes) {
|
||||
return hash128(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Murmur3 32-bit Hash值计算
|
||||
*
|
||||
* @param data 数据
|
||||
* @return Hash值
|
||||
*/
|
||||
public static int hash32(final CharSequence data) {
|
||||
public int hash32(final CharSequence data) {
|
||||
return hash32(StrUtil.bytes(data, DEFAULT_CHARSET));
|
||||
}
|
||||
|
||||
@ -61,7 +66,8 @@ public class MurmurHash implements Serializable {
|
||||
* @param data 数据
|
||||
* @return Hash值
|
||||
*/
|
||||
public static int hash32(final byte[] data) {
|
||||
@Override
|
||||
public int hash32(final byte[] data) {
|
||||
return hash32(data, data.length, DEFAULT_SEED);
|
||||
}
|
||||
|
||||
@ -73,7 +79,7 @@ public class MurmurHash implements Serializable {
|
||||
* @param seed 种子,默认0
|
||||
* @return Hash值
|
||||
*/
|
||||
public static int hash32(final byte[] data, final int length, final int seed) {
|
||||
public int hash32(final byte[] data, final int length, final int seed) {
|
||||
return hash32(data, 0, length, seed);
|
||||
}
|
||||
|
||||
@ -86,7 +92,7 @@ public class MurmurHash implements Serializable {
|
||||
* @param seed 种子,默认0
|
||||
* @return Hash值
|
||||
*/
|
||||
public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
|
||||
public int hash32(final byte[] data, final int offset, final int length, final int seed) {
|
||||
int hash = seed;
|
||||
final int nblocks = length >> 2;
|
||||
|
||||
@ -127,7 +133,7 @@ public class MurmurHash implements Serializable {
|
||||
* @param data 数据
|
||||
* @return Hash值
|
||||
*/
|
||||
public static long hash64(final CharSequence data) {
|
||||
public long hash64(final CharSequence data) {
|
||||
return hash64(StrUtil.bytes(data, DEFAULT_CHARSET));
|
||||
}
|
||||
|
||||
@ -138,7 +144,8 @@ public class MurmurHash implements Serializable {
|
||||
* @param data 数据
|
||||
* @return Hash值
|
||||
*/
|
||||
public static long hash64(final byte[] data) {
|
||||
@Override
|
||||
public long hash64(final byte[] data) {
|
||||
return hash64(data, data.length, DEFAULT_SEED);
|
||||
}
|
||||
|
||||
@ -151,7 +158,7 @@ public class MurmurHash implements Serializable {
|
||||
* @param seed 种子,默认0
|
||||
* @return Hash值
|
||||
*/
|
||||
public static long hash64(final byte[] data, final int length, final int seed) {
|
||||
public long hash64(final byte[] data, final int length, final int seed) {
|
||||
long hash = seed;
|
||||
final int nblocks = length >> 3;
|
||||
|
||||
@ -205,7 +212,7 @@ public class MurmurHash implements Serializable {
|
||||
* @param data 数据
|
||||
* @return Hash值 (2 longs)
|
||||
*/
|
||||
public static Number128 hash128(final CharSequence data) {
|
||||
public Number128 hash128(final CharSequence data) {
|
||||
return hash128(StrUtil.bytes(data, DEFAULT_CHARSET));
|
||||
}
|
||||
|
||||
@ -215,7 +222,8 @@ public class MurmurHash implements Serializable {
|
||||
* @param data -数据
|
||||
* @return Hash值 (2 longs)
|
||||
*/
|
||||
public static Number128 hash128(final byte[] data) {
|
||||
@Override
|
||||
public Number128 hash128(final byte[] data) {
|
||||
return hash128(data, data.length, DEFAULT_SEED);
|
||||
}
|
||||
|
||||
@ -227,7 +235,7 @@ public class MurmurHash implements Serializable {
|
||||
* @param seed 种子,默认0
|
||||
* @return Hash值(2 longs)
|
||||
*/
|
||||
public static Number128 hash128(final byte[] data, final int length, final int seed) {
|
||||
public Number128 hash128(final byte[] data, final int length, final int seed) {
|
||||
return hash128(data, 0, length, seed);
|
||||
}
|
||||
|
||||
@ -240,7 +248,7 @@ public class MurmurHash implements Serializable {
|
||||
* @param seed 种子,默认0
|
||||
* @return Hash值(2 longs)
|
||||
*/
|
||||
public static Number128 hash128(final byte[] data, final int offset, final int length, int seed) {
|
||||
public Number128 hash128(final byte[] data, final int offset, final int length, int seed) {
|
||||
// 避免负数的种子
|
||||
seed &= 0xffffffffL;
|
||||
|
@ -1,6 +1,4 @@
|
||||
package cn.hutool.core.text;
|
||||
|
||||
import cn.hutool.core.lang.hash.MurmurHash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
@ -23,16 +21,22 @@ import java.util.concurrent.locks.StampedLock;
|
||||
* @author Looly, litaoxiao
|
||||
* @since 4.3.3
|
||||
*/
|
||||
public class Simhash {
|
||||
public class Simhash implements Hash64<Collection<? extends CharSequence>> {
|
||||
|
||||
private final int bitNum = 64;
|
||||
/** 存储段数,默认按照4段进行simhash存储 */
|
||||
/**
|
||||
* 存储段数,默认按照4段进行simhash存储
|
||||
*/
|
||||
private final int fracCount;
|
||||
private final int fracBitNum;
|
||||
/** 汉明距离的衡量标准,小于此距离标准表示相似 */
|
||||
/**
|
||||
* 汉明距离的衡量标准,小于此距离标准表示相似
|
||||
*/
|
||||
private final int hammingThresh;
|
||||
|
||||
/** 按照分段存储simhash,查找更快速 */
|
||||
/**
|
||||
* 按照分段存储simhash,查找更快速
|
||||
*/
|
||||
private final List<Map<String, List<Long>>> storage;
|
||||
private final StampedLock lock = new StampedLock();
|
||||
|
||||
@ -46,7 +50,7 @@ public class Simhash {
|
||||
/**
|
||||
* 构造
|
||||
*
|
||||
* @param fracCount 存储段数
|
||||
* @param fracCount 存储段数
|
||||
* @param hammingThresh 汉明距离的衡量标准
|
||||
*/
|
||||
public Simhash(final int fracCount, final int hammingThresh) {
|
||||
@ -65,13 +69,14 @@ public class Simhash {
|
||||
* @param segList 分词的词列表
|
||||
* @return Hash值
|
||||
*/
|
||||
public long hash(final Collection<? extends CharSequence> segList) {
|
||||
@Override
|
||||
public long hash64(final Collection<? extends CharSequence> segList) {
|
||||
final int bitNum = this.bitNum;
|
||||
// 按照词语的hash值,计算simHashWeight(低位对齐)
|
||||
final int[] weight = new int[bitNum];
|
||||
long wordHash;
|
||||
for (final CharSequence seg : segList) {
|
||||
wordHash = MurmurHash.hash64(seg);
|
||||
wordHash = MurmurHash.INSTANCE.hash64(seg);
|
||||
for (int i = 0; i < bitNum; i++) {
|
||||
if (((wordHash >> i) & 1) == 1)
|
||||
weight[i] += 1;
|
||||
@ -96,7 +101,7 @@ public class Simhash {
|
||||
* @return 是否重复
|
||||
*/
|
||||
public boolean equals(final Collection<? extends CharSequence> segList) {
|
||||
final long simhash = hash(segList);
|
||||
final long simhash = hash64(segList);
|
||||
final List<String> fracList = splitSimhash(simhash);
|
||||
final int hammingThresh = this.hammingThresh;
|
||||
|
||||
@ -153,6 +158,7 @@ public class Simhash {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------------------------------ Private method start
|
||||
|
||||
/**
|
||||
* 计算汉明距离
|
||||
*
|
@ -4,4 +4,4 @@
|
||||
* @author looly
|
||||
*
|
||||
*/
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
@ -9,7 +9,7 @@ import cn.hutool.core.comparator.PropertyComparator;
|
||||
import cn.hutool.core.convert.CompositeConverter;
|
||||
import cn.hutool.core.convert.Convert;
|
||||
import cn.hutool.core.exceptions.UtilException;
|
||||
import cn.hutool.core.lang.hash.Hash32;
|
||||
import cn.hutool.core.codec.hash.Hash32;
|
||||
import cn.hutool.core.map.MapUtil;
|
||||
import cn.hutool.core.reflect.ClassUtil;
|
||||
import cn.hutool.core.reflect.ConstructorUtil;
|
||||
|
@ -1,19 +0,0 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
|
||||
/**
|
||||
* Hash计算接口
|
||||
*
|
||||
* @param <T> 被计算hash的对象类型
|
||||
* @author looly
|
||||
* @since 5.7.15
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface Hash<T> {
|
||||
/**
|
||||
* 计算Hash值
|
||||
*
|
||||
* @param t 对象
|
||||
* @return hash
|
||||
*/
|
||||
Number hash(T t);
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.text.StrUtil;
|
||||
import org.junit.Assert;
|
||||
@ -8,29 +8,31 @@ public class CityHashTest {
|
||||
|
||||
@Test
|
||||
public void hash32Test() {
|
||||
int hv = CityHash.hash32(StrUtil.utf8Bytes("你"));
|
||||
final CityHash cityHash = CityHash.INSTANCE;
|
||||
int hv = cityHash.hash32(StrUtil.utf8Bytes("你"));
|
||||
Assert.assertEquals(1290029860, hv);
|
||||
|
||||
hv = CityHash.hash32(StrUtil.utf8Bytes("你好"));
|
||||
hv = cityHash.hash32(StrUtil.utf8Bytes("你好"));
|
||||
Assert.assertEquals(1374181357, hv);
|
||||
|
||||
hv = CityHash.hash32(StrUtil.utf8Bytes("见到你很高兴"));
|
||||
hv = cityHash.hash32(StrUtil.utf8Bytes("见到你很高兴"));
|
||||
Assert.assertEquals(1475516842, hv);
|
||||
hv = CityHash.hash32(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
|
||||
hv = cityHash.hash32(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
|
||||
Assert.assertEquals(0x51020cae, hv);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void hash64Test() {
|
||||
long hv = CityHash.hash64(StrUtil.utf8Bytes("你"));
|
||||
final CityHash cityHash = CityHash.INSTANCE;
|
||||
long hv = cityHash.hash64(StrUtil.utf8Bytes("你"));
|
||||
Assert.assertEquals(-4296898700418225525L, hv);
|
||||
|
||||
hv = CityHash.hash64(StrUtil.utf8Bytes("你好"));
|
||||
hv = cityHash.hash64(StrUtil.utf8Bytes("你好"));
|
||||
Assert.assertEquals(-4294276205456761303L, hv);
|
||||
|
||||
hv = CityHash.hash64(StrUtil.utf8Bytes("见到你很高兴"));
|
||||
hv = cityHash.hash64(StrUtil.utf8Bytes("见到你很高兴"));
|
||||
Assert.assertEquals(272351505337503793L, hv);
|
||||
hv = CityHash.hash64(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
|
||||
hv = cityHash.hash64(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
|
||||
Assert.assertEquals(-8234735310919228703L, hv);
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
|
||||
import cn.hutool.core.util.CharsetUtil;
|
||||
@ -16,20 +16,20 @@ public class MetroHashTest {
|
||||
|
||||
@Test
|
||||
public void testEmpty() {
|
||||
Assert.assertEquals("31290877cceaea29", HexUtil.toHex(MetroHash.hash64(StrUtil.utf8Bytes(""), 0)));
|
||||
Assert.assertEquals("31290877cceaea29", HexUtil.toHex(MetroHash.INSTANCE.hash64(StrUtil.utf8Bytes(""), 0)));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void metroHash64Test() {
|
||||
final byte[] str = "我是一段测试123".getBytes(CharsetUtil.UTF_8);
|
||||
final long hash64 = MetroHash.hash64(str);
|
||||
final long hash64 = MetroHash.INSTANCE.hash64(str);
|
||||
Assert.assertEquals(62920234463891865L, hash64);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void metroHash128Test() {
|
||||
final byte[] str = "我是一段测试123".getBytes(CharsetUtil.UTF_8);
|
||||
final long[] hash128 = MetroHash.hash128(str).getLongArray();
|
||||
final long[] hash128 = MetroHash.INSTANCE.hash128(str).getLongArray();
|
||||
Assert.assertEquals(4956592424592439349L, hash128[0]);
|
||||
Assert.assertEquals(6301214698325086246L, hash128[1]);
|
||||
}
|
||||
@ -43,13 +43,13 @@ public class MetroHashTest {
|
||||
final String[] strArray = getRandomStringArray();
|
||||
final long startCity = System.currentTimeMillis();
|
||||
for (final String s : strArray) {
|
||||
CityHash.hash64(s.getBytes());
|
||||
CityHash.INSTANCE.hash64(s.getBytes());
|
||||
}
|
||||
final long endCity = System.currentTimeMillis();
|
||||
|
||||
final long startMetro = System.currentTimeMillis();
|
||||
for (final String s : strArray) {
|
||||
MetroHash.hash64(StrUtil.utf8Bytes(s));
|
||||
MetroHash.INSTANCE.hash64(StrUtil.utf8Bytes(s));
|
||||
}
|
||||
final long endMetro = System.currentTimeMillis();
|
||||
|
||||
@ -67,13 +67,13 @@ public class MetroHashTest {
|
||||
final String[] strArray = getRandomStringArray();
|
||||
final long startCity = System.currentTimeMillis();
|
||||
for (final String s : strArray) {
|
||||
CityHash.hash128(s.getBytes());
|
||||
CityHash.INSTANCE.hash128(s.getBytes());
|
||||
}
|
||||
final long endCity = System.currentTimeMillis();
|
||||
|
||||
final long startMetro = System.currentTimeMillis();
|
||||
for (final String s : strArray) {
|
||||
MetroHash.hash128(StrUtil.utf8Bytes(s));
|
||||
MetroHash.INSTANCE.hash128(StrUtil.utf8Bytes(s));
|
||||
}
|
||||
final long endMetro = System.currentTimeMillis();
|
||||
|
36
hutool-core/src/test/java/cn/hutool/core/codec/hash/MurmurHashTest.java
Executable file
36
hutool-core/src/test/java/cn/hutool/core/codec/hash/MurmurHashTest.java
Executable file
@ -0,0 +1,36 @@
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.text.StrUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class MurmurHashTest {
|
||||
|
||||
@Test
|
||||
public void hash32Test() {
|
||||
int hv = MurmurHash.INSTANCE.hash32(StrUtil.utf8Bytes("你"));
|
||||
Assert.assertEquals(-1898877446, hv);
|
||||
|
||||
hv = MurmurHash.INSTANCE.hash32(StrUtil.utf8Bytes("你好"));
|
||||
Assert.assertEquals(337357348, hv);
|
||||
|
||||
hv = MurmurHash.INSTANCE.hash32(StrUtil.utf8Bytes("见到你很高兴"));
|
||||
Assert.assertEquals(1101306141, hv);
|
||||
hv = MurmurHash.INSTANCE.hash32(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
|
||||
Assert.assertEquals(-785444229, hv);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void hash64Test() {
|
||||
long hv = MurmurHash.INSTANCE.hash64(StrUtil.utf8Bytes("你"));
|
||||
Assert.assertEquals(-1349759534971957051L, hv);
|
||||
|
||||
hv = MurmurHash.INSTANCE.hash64(StrUtil.utf8Bytes("你好"));
|
||||
Assert.assertEquals(-7563732748897304996L, hv);
|
||||
|
||||
hv = MurmurHash.INSTANCE.hash64(StrUtil.utf8Bytes("见到你很高兴"));
|
||||
Assert.assertEquals(-766658210119995316L, hv);
|
||||
hv = MurmurHash.INSTANCE.hash64(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
|
||||
Assert.assertEquals(-7469283059271653317L, hv);
|
||||
}
|
||||
}
|
@ -1,11 +1,9 @@
|
||||
package cn.hutool.core.lang;
|
||||
package cn.hutool.core.codec.hash;
|
||||
|
||||
import cn.hutool.core.text.StrUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import cn.hutool.core.text.Simhash;
|
||||
import cn.hutool.core.text.StrUtil;
|
||||
|
||||
public class SimhashTest {
|
||||
|
||||
@Test
|
||||
@ -14,7 +12,7 @@ public class SimhashTest {
|
||||
final String text2 = "我是 一个 普通 字符串";
|
||||
|
||||
final Simhash simhash = new Simhash();
|
||||
final long hash = simhash.hash(StrUtil.split(text1, ' '));
|
||||
final long hash = simhash.hash64(StrUtil.split(text1, ' '));
|
||||
Assert.assertTrue(hash != 0);
|
||||
|
||||
simhash.store(hash);
|
@ -1,36 +0,0 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
|
||||
import cn.hutool.core.text.StrUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class MurmurHashTest {
|
||||
|
||||
@Test
|
||||
public void hash32Test() {
|
||||
int hv = MurmurHash.hash32(StrUtil.utf8Bytes("你"));
|
||||
Assert.assertEquals(-1898877446, hv);
|
||||
|
||||
hv = MurmurHash.hash32(StrUtil.utf8Bytes("你好"));
|
||||
Assert.assertEquals(337357348, hv);
|
||||
|
||||
hv = MurmurHash.hash32(StrUtil.utf8Bytes("见到你很高兴"));
|
||||
Assert.assertEquals(1101306141, hv);
|
||||
hv = MurmurHash.hash32(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
|
||||
Assert.assertEquals(-785444229, hv);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void hash64Test() {
|
||||
long hv = MurmurHash.hash64(StrUtil.utf8Bytes("你"));
|
||||
Assert.assertEquals(-1349759534971957051L, hv);
|
||||
|
||||
hv = MurmurHash.hash64(StrUtil.utf8Bytes("你好"));
|
||||
Assert.assertEquals(-7563732748897304996L, hv);
|
||||
|
||||
hv = MurmurHash.hash64(StrUtil.utf8Bytes("见到你很高兴"));
|
||||
Assert.assertEquals(-766658210119995316L, hv);
|
||||
hv = MurmurHash.hash64(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
|
||||
Assert.assertEquals(-7469283059271653317L, hv);
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
package cn.hutool.core.text.bloom;
|
||||
|
||||
import cn.hutool.core.lang.hash.HashUtil;
|
||||
import cn.hutool.core.codec.hash.HashUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
package cn.hutool.core.util;
|
||||
|
||||
import cn.hutool.core.lang.hash.HashUtil;
|
||||
import cn.hutool.core.codec.hash.HashUtil;
|
||||
import cn.hutool.core.text.StrUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
Loading…
x
Reference in New Issue
Block a user