diff --git a/CHANGELOG.md b/CHANGELOG.md index 648563835..de42089e3 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ ### 🐞Bug修复 * 【core 】 修复FileNameUtil.cleanInvalid无法去除换行符问题(issue#I5RMZV@Gitee) +* 【core 】 修复murmur3_32实现错误(pr#2616@Github) ------------------------------------------------------------------------------------------------------------- diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/hash/MurmurHash.java b/hutool-core/src/main/java/cn/hutool/core/lang/hash/MurmurHash.java index 2369c30e8..0038f8163 100644 --- a/hutool-core/src/main/java/cn/hutool/core/lang/hash/MurmurHash.java +++ b/hutool-core/src/main/java/cn/hutool/core/lang/hash/MurmurHash.java @@ -17,10 +17,10 @@ import java.nio.charset.Charset; * 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255 *

* - * @author looly,Simhash4J + * @author looly, Simhash4J * @since 4.3.3 */ -public class MurmurHash implements Serializable{ +public class MurmurHash implements Serializable { private static final long serialVersionUID = 1L; // Constants for 32 bit variant @@ -68,55 +68,57 @@ public class MurmurHash implements Serializable{ /** * Murmur3 32-bit Hash值计算 * - * @param data 数据 + * @param data 数据 * @param length 长度 - * @param seed 种子,默认0 + * @param seed 种子,默认0 * @return Hash值 */ public static int hash32(byte[] data, int length, int seed) { + return hash32(data, 0, length, seed); + } + + /** + * Murmur3 32-bit Hash值计算 + * + * @param data 数据 + * @param offset 数据开始位置 + * @param length 长度 + * @param seed 种子,默认0 + * @return Hash值 + */ + public static int hash32(byte[] data, int offset, int length, int seed) { int hash = seed; final int nblocks = length >> 2; // body for (int i = 0; i < nblocks; i++) { - int i4 = i << 2; - int k = ByteUtil.bytesToInt(data, i4, DEFAULT_ORDER); - + final int i4 = offset + (i << 2); + final int k = ByteUtil.bytesToInt(data, i4, DEFAULT_ORDER); // mix functions - k *= C1_32; - k = Integer.rotateLeft(k, R1_32); - k *= C2_32; - hash ^= k; - hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + hash = mix32(k, hash); } // tail - int idx = nblocks << 2; + final int idx = offset + (nblocks << 2); int k1 = 0; - switch (length - idx) { - case 3: - k1 ^= (data[idx + 2] & 0xff) << 16; - case 2: - k1 ^= (data[idx + 1] & 0xff) << 8; - case 1: - k1 ^= (data[idx] & 0xff); + switch (offset + length - idx) { + case 3: + k1 ^= (data[idx + 2] & 0xff) << 16; + case 2: + k1 ^= (data[idx + 1] & 0xff) << 8; + case 1: + k1 ^= (data[idx] & 0xff); - // mix functions - k1 *= C1_32; - k1 = Integer.rotateLeft(k1, R1_32); - k1 *= C2_32; - hash ^= k1; + // mix functions + k1 *= C1_32; + k1 = Integer.rotateLeft(k1, R1_32); + k1 *= C2_32; + hash ^= k1; } // finalization hash ^= length; - hash ^= (hash >>> 16); - hash *= 0x85ebca6b; - hash ^= (hash >>> 13); - hash *= 0xc2b2ae35; - hash ^= (hash >>> 16); - - return hash; + return fmix32(hash); } /** @@ -133,7 +135,6 @@ public class MurmurHash implements Serializable{ * Murmur3 64-bit 算法
* This is essentially MSB 8 bytes of Murmur3 128-bit variant. * - * * @param data 数据 * @return Hash值 */ @@ -142,12 +143,12 @@ public class MurmurHash implements Serializable{ } /** - * Murmur3 64-bit 算法
+ * 类Murmur3 64-bit 算法
* This is essentially MSB 8 bytes of Murmur3 128-bit variant. * - * @param data 数据 + * @param data 数据 * @param length 长度 - * @param seed 种子,默认0 + * @param seed 种子,默认0 * @return Hash值 */ public static long hash64(byte[] data, int length, int seed) { @@ -171,24 +172,24 @@ public class MurmurHash implements Serializable{ long k1 = 0; int tailStart = nblocks << 3; switch (length - tailStart) { - case 7: - k1 ^= ((long) data[tailStart + 6] & 0xff) << 48; - case 6: - k1 ^= ((long) data[tailStart + 5] & 0xff) << 40; - case 5: - k1 ^= ((long) data[tailStart + 4] & 0xff) << 32; - case 4: - k1 ^= ((long) data[tailStart + 3] & 0xff) << 24; - case 3: - k1 ^= ((long) data[tailStart + 2] & 0xff) << 16; - case 2: - k1 ^= ((long) data[tailStart + 1] & 0xff) << 8; - case 1: - k1 ^= ((long) data[tailStart] & 0xff); - k1 *= C1; - k1 = Long.rotateLeft(k1, R1); - k1 *= C2; - hash ^= k1; + case 7: + k1 ^= ((long) data[tailStart + 6] & 0xff) << 48; + case 6: + k1 ^= ((long) data[tailStart + 5] & 0xff) << 40; + case 5: + k1 ^= ((long) data[tailStart + 4] & 0xff) << 32; + case 4: + k1 ^= ((long) data[tailStart + 3] & 0xff) << 24; + case 3: + k1 ^= ((long) data[tailStart + 2] & 0xff) << 16; + case 2: + k1 ^= ((long) data[tailStart + 1] & 0xff) << 8; + case 1: + k1 ^= ((long) data[tailStart] & 0xff); + k1 *= C1; + k1 = Long.rotateLeft(k1, R1); + k1 *= C2; + hash ^= k1; } // finalization @@ -221,19 +222,35 @@ public class MurmurHash implements Serializable{ /** * Murmur3 128-bit variant. * - * @param data 数据 + * @param data 数据 * @param length 长度 - * @param seed 种子,默认0 + * @param seed 种子,默认0 * @return Hash值(2 longs) */ public static long[] hash128(byte[] data, int length, int seed) { + return hash128(data, 0, length, seed); + } + + /** + * Murmur3 128-bit variant. + * + * @param data 数据 + * @param offset 数据开始位置 + * @param length 长度 + * @param seed 种子,默认0 + * @return Hash值(2 longs) + */ + public static long[] hash128(byte[] data, int offset, int length, int seed) { + // 避免负数的种子 + seed &= 0xffffffffL; + long h1 = seed; long h2 = seed; final int nblocks = length >> 4; // body for (int i = 0; i < nblocks; i++) { - final int i16 = i << 4; + final int i16 = offset + (i << 4); long k1 = ByteUtil.bytesToLong(data, i16, DEFAULT_ORDER); long k2 = ByteUtil.bytesToLong(data, i16 + 8, DEFAULT_ORDER); @@ -259,47 +276,47 @@ public class MurmurHash implements Serializable{ // tail long k1 = 0; long k2 = 0; - int tailStart = nblocks << 4; - switch (length - tailStart) { - case 15: - k2 ^= (long) (data[tailStart + 14] & 0xff) << 48; - case 14: - k2 ^= (long) (data[tailStart + 13] & 0xff) << 40; - case 13: - k2 ^= (long) (data[tailStart + 12] & 0xff) << 32; - case 12: - k2 ^= (long) (data[tailStart + 11] & 0xff) << 24; - case 11: - k2 ^= (long) (data[tailStart + 10] & 0xff) << 16; - case 10: - k2 ^= (long) (data[tailStart + 9] & 0xff) << 8; - case 9: - k2 ^= data[tailStart + 8] & 0xff; - k2 *= C2; - k2 = Long.rotateLeft(k2, R3); - k2 *= C1; - h2 ^= k2; + final int tailStart = offset + (nblocks << 4); + switch (offset + length - tailStart) { + case 15: + k2 ^= (long) (data[tailStart + 14] & 0xff) << 48; + case 14: + k2 ^= (long) (data[tailStart + 13] & 0xff) << 40; + case 13: + k2 ^= (long) (data[tailStart + 12] & 0xff) << 32; + case 12: + k2 ^= (long) (data[tailStart + 11] & 0xff) << 24; + case 11: + k2 ^= (long) (data[tailStart + 10] & 0xff) << 16; + case 10: + k2 ^= (long) (data[tailStart + 9] & 0xff) << 8; + case 9: + k2 ^= data[tailStart + 8] & 0xff; + k2 *= C2; + k2 = Long.rotateLeft(k2, R3); + k2 *= C1; + h2 ^= k2; - case 8: - k1 ^= (long) (data[tailStart + 7] & 0xff) << 56; - case 7: - k1 ^= (long) (data[tailStart + 6] & 0xff) << 48; - case 6: - k1 ^= (long) (data[tailStart + 5] & 0xff) << 40; - case 5: - k1 ^= (long) (data[tailStart + 4] & 0xff) << 32; - case 4: - k1 ^= (long) (data[tailStart + 3] & 0xff) << 24; - case 3: - k1 ^= (long) (data[tailStart + 2] & 0xff) << 16; - case 2: - k1 ^= (long) (data[tailStart + 1] & 0xff) << 8; - case 1: - k1 ^= data[tailStart] & 0xff; - k1 *= C1; - k1 = Long.rotateLeft(k1, R1); - k1 *= C2; - h1 ^= k1; + case 8: + k1 ^= (long) (data[tailStart + 7] & 0xff) << 56; + case 7: + k1 ^= (long) (data[tailStart + 6] & 0xff) << 48; + case 6: + k1 ^= (long) (data[tailStart + 5] & 0xff) << 40; + case 5: + k1 ^= (long) (data[tailStart + 4] & 0xff) << 32; + case 4: + k1 ^= (long) (data[tailStart + 3] & 0xff) << 24; + case 3: + k1 ^= (long) (data[tailStart + 2] & 0xff) << 16; + case 2: + k1 ^= (long) (data[tailStart + 1] & 0xff) << 8; + case 1: + k1 ^= data[tailStart] & 0xff; + k1 *= C1; + k1 = Long.rotateLeft(k1, R1); + k1 *= C2; + h1 ^= k1; } // finalization @@ -315,7 +332,24 @@ public class MurmurHash implements Serializable{ h1 += h2; h2 += h1; - return new long[] { h1, h2 }; + return new long[]{h1, h2}; + } + + private static int mix32(int k, int hash) { + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + return Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + } + + private static int fmix32(int hash) { + hash ^= (hash >>> 16); + hash *= 0x85ebca6b; + hash ^= (hash >>> 13); + hash *= 0xc2b2ae35; + hash ^= (hash >>> 16); + return hash; } private static long fmix64(long h) { diff --git a/hutool-core/src/test/java/cn/hutool/core/lang/hash/MurMurHashTest.java b/hutool-core/src/test/java/cn/hutool/core/lang/hash/MurmurHashTest.java similarity index 97% rename from hutool-core/src/test/java/cn/hutool/core/lang/hash/MurMurHashTest.java rename to hutool-core/src/test/java/cn/hutool/core/lang/hash/MurmurHashTest.java index 4a4628145..ee3eadbac 100755 --- a/hutool-core/src/test/java/cn/hutool/core/lang/hash/MurMurHashTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/lang/hash/MurmurHashTest.java @@ -4,7 +4,7 @@ import cn.hutool.core.util.StrUtil; import org.junit.Assert; import org.junit.Test; -public class MurMurHashTest { +public class MurmurHashTest { @Test public void hash32Test() {