This commit is contained in:
Looly 2022-09-20 17:50:43 +08:00
parent 68cfc3db5d
commit f525f21196
8 changed files with 232 additions and 128 deletions

View File

@ -3277,6 +3277,8 @@ public class FileUtil extends PathUtil {
contentType = "application/x-rar-compressed";
} else if (StrUtil.endWithIgnoreCase(filePath, ".7z")) {
contentType = "application/x-7z-compressed";
} else if (StrUtil.endWithIgnoreCase(filePath, ".wgt")) {
contentType = "application/widget";
}
}

View File

@ -470,7 +470,7 @@ public class HashUtil {
* @return hash值
* @since 4.3.3
*/
public static long[] murmur128(final byte[] data) {
public static Number128 murmur128(final byte[] data) {
return MurmurHash.hash128(data);
}

View File

@ -1,8 +1,8 @@
package cn.hutool.core.lang.hash;
import cn.hutool.core.text.StrUtil;
import cn.hutool.core.util.ByteUtil;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.text.StrUtil;
import java.io.Serializable;
import java.nio.ByteOrder;
@ -10,17 +10,17 @@ import java.nio.charset.Charset;
/**
* Murmur3 32bit64bit128bit 哈希算法实现<br>
* 此算法来自于https://github.com/xlturing/Simhash4J/blob/master/src/main/java/bee/simhash/main/Murmur3.java
* 此算法来自于<a href="https://github.com/xlturing/Simhash4J/blob/master/src/main/java/bee/simhash/main/Murmur3.java">...</a>
*
* <p>
* 32-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94 <br>
* 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255
* </p>
*
* @author looly,Simhash4J
* @author looly, Simhash4J
* @since 4.3.3
*/
public class MurmurHash implements Serializable{
public class MurmurHash implements Serializable {
private static final long serialVersionUID = 1L;
// Constants for 32 bit variant
@ -68,55 +68,57 @@ public class MurmurHash implements Serializable{
/**
* Murmur3 32-bit Hash值计算
*
* @param data 数据
* @param data 数据
* @param length 长度
* @param seed 种子默认0
* @param seed 种子默认0
* @return Hash值
*/
public static int hash32(final byte[] data, final int length, final int seed) {
return hash32(data, 0, length, seed);
}
/**
* Murmur3 32-bit Hash值计算
*
* @param data 数据
* @param offset 数据开始位置
* @param length 长度
* @param seed 种子默认0
* @return Hash值
*/
public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
int hash = seed;
final int nblocks = length >> 2;
// body
for (int i = 0; i < nblocks; i++) {
final int i4 = i << 2;
int k = ByteUtil.bytesToInt(data, i4, DEFAULT_ORDER);
final int i4 = offset + (i << 2);
final int k = ByteUtil.bytesToInt(data, i4, DEFAULT_ORDER);
// mix functions
k *= C1_32;
k = Integer.rotateLeft(k, R1_32);
k *= C2_32;
hash ^= k;
hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
hash = mix32(k, hash);
}
// tail
final int idx = nblocks << 2;
final int idx = offset + (nblocks << 2);
int k1 = 0;
switch (length - idx) {
case 3:
k1 ^= data[idx + 2] << 16;
case 2:
k1 ^= data[idx + 1] << 8;
case 1:
k1 ^= data[idx];
switch (offset + length - idx) {
case 3:
k1 ^= (data[idx + 2] & 0xff) << 16;
case 2:
k1 ^= (data[idx + 1] & 0xff) << 8;
case 1:
k1 ^= (data[idx] & 0xff);
// mix functions
k1 *= C1_32;
k1 = Integer.rotateLeft(k1, R1_32);
k1 *= C2_32;
hash ^= k1;
// mix functions
k1 *= C1_32;
k1 = Integer.rotateLeft(k1, R1_32);
k1 *= C2_32;
hash ^= k1;
}
// finalization
hash ^= length;
hash ^= (hash >>> 16);
hash *= 0x85ebca6b;
hash ^= (hash >>> 13);
hash *= 0xc2b2ae35;
hash ^= (hash >>> 16);
return hash;
return fmix32(hash);
}
/**
@ -133,7 +135,6 @@ public class MurmurHash implements Serializable{
* Murmur3 64-bit 算法<br>
* This is essentially MSB 8 bytes of Murmur3 128-bit variant.
*
*
* @param data 数据
* @return Hash值
*/
@ -142,12 +143,12 @@ public class MurmurHash implements Serializable{
}
/**
* Murmur3 64-bit 算法 <br>
* Murmur3 64-bit 算法 <br>
* This is essentially MSB 8 bytes of Murmur3 128-bit variant.
*
* @param data 数据
* @param data 数据
* @param length 长度
* @param seed 种子默认0
* @param seed 种子默认0
* @return Hash值
*/
public static long hash64(final byte[] data, final int length, final int seed) {
@ -171,24 +172,24 @@ public class MurmurHash implements Serializable{
long k1 = 0;
final int tailStart = nblocks << 3;
switch (length - tailStart) {
case 7:
k1 ^= ((long) data[tailStart + 6] & 0xff) << 48;
case 6:
k1 ^= ((long) data[tailStart + 5] & 0xff) << 40;
case 5:
k1 ^= ((long) data[tailStart + 4] & 0xff) << 32;
case 4:
k1 ^= ((long) data[tailStart + 3] & 0xff) << 24;
case 3:
k1 ^= ((long) data[tailStart + 2] & 0xff) << 16;
case 2:
k1 ^= ((long) data[tailStart + 1] & 0xff) << 8;
case 1:
k1 ^= ((long) data[tailStart] & 0xff);
k1 *= C1;
k1 = Long.rotateLeft(k1, R1);
k1 *= C2;
hash ^= k1;
case 7:
k1 ^= ((long) data[tailStart + 6] & 0xff) << 48;
case 6:
k1 ^= ((long) data[tailStart + 5] & 0xff) << 40;
case 5:
k1 ^= ((long) data[tailStart + 4] & 0xff) << 32;
case 4:
k1 ^= ((long) data[tailStart + 3] & 0xff) << 24;
case 3:
k1 ^= ((long) data[tailStart + 2] & 0xff) << 16;
case 2:
k1 ^= ((long) data[tailStart + 1] & 0xff) << 8;
case 1:
k1 ^= ((long) data[tailStart] & 0xff);
k1 *= C1;
k1 = Long.rotateLeft(k1, R1);
k1 *= C2;
hash ^= k1;
}
// finalization
@ -204,7 +205,7 @@ public class MurmurHash implements Serializable{
* @param data 数据
* @return Hash值 (2 longs)
*/
public static long[] hash128(final CharSequence data) {
public static Number128 hash128(final CharSequence data) {
return hash128(StrUtil.bytes(data, DEFAULT_CHARSET));
}
@ -214,26 +215,42 @@ public class MurmurHash implements Serializable{
* @param data -数据
* @return Hash值 (2 longs)
*/
public static long[] hash128(final byte[] data) {
public static Number128 hash128(final byte[] data) {
return hash128(data, data.length, DEFAULT_SEED);
}
/**
* Murmur3 128-bit variant.
*
* @param data 数据
* @param data 数据
* @param length 长度
* @param seed 种子默认0
* @param seed 种子默认0
* @return Hash值(2 longs)
*/
public static long[] hash128(final byte[] data, final int length, final int seed) {
public static Number128 hash128(final byte[] data, final int length, final int seed) {
return hash128(data, 0, length, seed);
}
/**
* Murmur3 128-bit variant.
*
* @param data 数据
* @param offset 数据开始位置
* @param length 长度
* @param seed 种子默认0
* @return Hash值(2 longs)
*/
public static Number128 hash128(final byte[] data, final int offset, final int length, int seed) {
// 避免负数的种子
seed &= 0xffffffffL;
long h1 = seed;
long h2 = seed;
final int nblocks = length >> 4;
// body
for (int i = 0; i < nblocks; i++) {
final int i16 = i << 4;
final int i16 = offset + (i << 4);
long k1 = ByteUtil.bytesToLong(data, i16, DEFAULT_ORDER);
long k2 = ByteUtil.bytesToLong(data, i16 + 8, DEFAULT_ORDER);
@ -259,47 +276,47 @@ public class MurmurHash implements Serializable{
// tail
long k1 = 0;
long k2 = 0;
final int tailStart = nblocks << 4;
switch (length - tailStart) {
case 15:
k2 ^= (long) (data[tailStart + 14] & 0xff) << 48;
case 14:
k2 ^= (long) (data[tailStart + 13] & 0xff) << 40;
case 13:
k2 ^= (long) (data[tailStart + 12] & 0xff) << 32;
case 12:
k2 ^= (long) (data[tailStart + 11] & 0xff) << 24;
case 11:
k2 ^= (long) (data[tailStart + 10] & 0xff) << 16;
case 10:
k2 ^= (long) (data[tailStart + 9] & 0xff) << 8;
case 9:
k2 ^= data[tailStart + 8] & 0xff;
k2 *= C2;
k2 = Long.rotateLeft(k2, R3);
k2 *= C1;
h2 ^= k2;
final int tailStart = offset + (nblocks << 4);
switch (offset + length - tailStart) {
case 15:
k2 ^= (long) (data[tailStart + 14] & 0xff) << 48;
case 14:
k2 ^= (long) (data[tailStart + 13] & 0xff) << 40;
case 13:
k2 ^= (long) (data[tailStart + 12] & 0xff) << 32;
case 12:
k2 ^= (long) (data[tailStart + 11] & 0xff) << 24;
case 11:
k2 ^= (long) (data[tailStart + 10] & 0xff) << 16;
case 10:
k2 ^= (long) (data[tailStart + 9] & 0xff) << 8;
case 9:
k2 ^= data[tailStart + 8] & 0xff;
k2 *= C2;
k2 = Long.rotateLeft(k2, R3);
k2 *= C1;
h2 ^= k2;
case 8:
k1 ^= (long) (data[tailStart + 7] & 0xff) << 56;
case 7:
k1 ^= (long) (data[tailStart + 6] & 0xff) << 48;
case 6:
k1 ^= (long) (data[tailStart + 5] & 0xff) << 40;
case 5:
k1 ^= (long) (data[tailStart + 4] & 0xff) << 32;
case 4:
k1 ^= (long) (data[tailStart + 3] & 0xff) << 24;
case 3:
k1 ^= (long) (data[tailStart + 2] & 0xff) << 16;
case 2:
k1 ^= (long) (data[tailStart + 1] & 0xff) << 8;
case 1:
k1 ^= data[tailStart] & 0xff;
k1 *= C1;
k1 = Long.rotateLeft(k1, R1);
k1 *= C2;
h1 ^= k1;
case 8:
k1 ^= (long) (data[tailStart + 7] & 0xff) << 56;
case 7:
k1 ^= (long) (data[tailStart + 6] & 0xff) << 48;
case 6:
k1 ^= (long) (data[tailStart + 5] & 0xff) << 40;
case 5:
k1 ^= (long) (data[tailStart + 4] & 0xff) << 32;
case 4:
k1 ^= (long) (data[tailStart + 3] & 0xff) << 24;
case 3:
k1 ^= (long) (data[tailStart + 2] & 0xff) << 16;
case 2:
k1 ^= (long) (data[tailStart + 1] & 0xff) << 8;
case 1:
k1 ^= data[tailStart] & 0xff;
k1 *= C1;
k1 = Long.rotateLeft(k1, R1);
k1 *= C2;
h1 ^= k1;
}
// finalization
@ -315,7 +332,24 @@ public class MurmurHash implements Serializable{
h1 += h2;
h2 += h1;
return new long[] { h1, h2 };
return new Number128(h1, h2);
}
private static int mix32(int k, int hash) {
k *= C1_32;
k = Integer.rotateLeft(k, R1_32);
k *= C2_32;
hash ^= k;
return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
}
private static int fmix32(int hash) {
hash ^= (hash >>> 16);
hash *= 0x85ebca6b;
hash ^= (hash >>> 13);
hash *= 0xc2b2ae35;
hash ^= (hash >>> 16);
return hash;
}
private static long fmix64(long h) {

View File

@ -1,6 +1,5 @@
package cn.hutool.core.stream;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.io.IORuntimeException;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.stream.spliterators.DropWhileSpliterator;
@ -13,6 +12,8 @@ import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collection;
import java.util.Iterator;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.function.Function;
@ -58,9 +59,33 @@ public class StreamUtil {
*/
public static <T> Stream<T> of(final Iterable<T> iterable, final boolean parallel) {
Assert.notNull(iterable, "Iterable must be not null!");
return StreamSupport.stream(
Spliterators.spliterator(CollUtil.toCollection(iterable), 0),
parallel);
return iterable instanceof Collection ?
parallel ? ((Collection<T>) iterable).parallelStream() : ((Collection<T>) iterable).stream() :
StreamSupport.stream(iterable.spliterator(), parallel);
}
/**
* {@link Iterator} 转换为 {@link Stream}
* @param iterator 迭代器
* @param <T> 集合元素类型
* @return {@link Stream}
* @throws IllegalArgumentException 如果iterator为null抛出该异常
*/
public static <T> Stream<T> ofIter(final Iterator<T> iterator) {
return ofIter(iterator, false);
}
/**
* {@link Iterator} 转换为 {@link Stream}
* @param iterator 迭代器
* @param parallel 是否并行
* @param <T> 集合元素类型
* @return {@link Stream}
* @throws IllegalArgumentException 如果iterator为null抛出该异常
*/
public static <T> Stream<T> ofIter(final Iterator<T> iterator, final boolean parallel) {
Assert.notNull(iterator, "iterator must not be null!");
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, 0), parallel);
}
/**
@ -160,7 +185,7 @@ public class StreamUtil {
* @param next 用上一个元素作为参数执行并返回一个新的元素
* @return 无限有序流
*/
public static <T> Stream<T> iterate(T seed, Predicate<? super T> hasNext, UnaryOperator<T> next) {
public static <T> Stream<T> iterate(final T seed, final Predicate<? super T> hasNext, final UnaryOperator<T> next) {
requireNonNull(next);
requireNonNull(hasNext);
return StreamSupport.stream(IterateSpliterator.create(seed, hasNext, next), false);
@ -177,7 +202,7 @@ public class StreamUtil {
* @param predicate 断言
* @return 与指定断言匹配的元素组成的流
*/
public static <T> Stream<T> takeWhile(Stream<T> source, Predicate<? super T> predicate) {
public static <T> Stream<T> takeWhile(final Stream<T> source, final Predicate<? super T> predicate) {
requireNonNull(source);
requireNonNull(predicate);
return createStatefulNewStream(source, TakeWhileSpliterator.create(source.spliterator(), predicate));
@ -194,7 +219,7 @@ public class StreamUtil {
* @param predicate 断言
* @return 剩余元素组成的流
*/
public static <T> Stream<T> dropWhile(Stream<T> source, Predicate<? super T> predicate) {
public static <T> Stream<T> dropWhile(final Stream<T> source, final Predicate<? super T> predicate) {
requireNonNull(source);
requireNonNull(predicate);
return createStatefulNewStream(source, DropWhileSpliterator.create(source.spliterator(), predicate));
@ -214,7 +239,7 @@ public class StreamUtil {
* @param <R> 新流的元素类型
* @return 新流
*/
private static <T, R> Stream<R> createStatefulNewStream(Stream<T> source, Spliterator<R> newSpliterator) {
private static <T, R> Stream<R> createStatefulNewStream(final Stream<T> source, final Spliterator<R> newSpliterator) {
// 创建新流
Stream<R> newStream = StreamSupport.stream(newSpliterator, source.isParallel());
// 如果旧流是并行流, 新流主动调用一个有状态的操作, 虽然没有意义, 但是可以让后续的无状态节点正常并发

View File

@ -14,8 +14,8 @@ public class IterChainTest {
@Test
public void testAddChain() {
Iterator<Integer> iter1 = Arrays.asList(1, 2).iterator();
Iterator<Integer> iter2 = Arrays.asList(3, 4).iterator();
final Iterator<Integer> iter1 = Arrays.asList(1, 2).iterator();
final Iterator<Integer> iter2 = Arrays.asList(3, 4).iterator();
IterChain<Integer> iterChain = new IterChain<>();
Assert.assertSame(iterChain, iterChain.addChain(iter1));
Assert.assertSame(iterChain, iterChain.addChain(iter2));
@ -27,7 +27,7 @@ public class IterChainTest {
@Test
public void testHasNext() {
IterChain<Integer> iterChain = new IterChain<>();
final IterChain<Integer> iterChain = new IterChain<>();
Assert.assertFalse(iterChain.hasNext());
Assert.assertFalse(iterChain.addChain(Collections.emptyIterator()).hasNext());
Assert.assertTrue(iterChain.addChain(Arrays.asList(3, 4).iterator()).hasNext());
@ -35,9 +35,9 @@ public class IterChainTest {
@Test
public void testNext() {
Iterator<Integer> iter1 = Arrays.asList(1, 2).iterator();
Iterator<Integer> iter2 = Arrays.asList(3, 4).iterator();
IterChain<Integer> iterChain = new IterChain<>();
final Iterator<Integer> iter1 = Arrays.asList(1, 2).iterator();
final Iterator<Integer> iter2 = Arrays.asList(3, 4).iterator();
final IterChain<Integer> iterChain = new IterChain<>();
Assert.assertSame(iterChain, iterChain.addChain(iter1));
Assert.assertSame(iterChain, iterChain.addChain(iter2));
Assert.assertEquals((Integer)1, iterChain.next());
@ -48,20 +48,20 @@ public class IterChainTest {
@Test
public void testRemove() {
IterChain<Integer> iterChain = new IterChain<>();
final IterChain<Integer> iterChain = new IterChain<>();
iterChain.addChain(Arrays.asList(1, 2).iterator());
Assert.assertThrows(IllegalStateException.class, iterChain::remove);
}
@Test
public void testIterator() {
Iterator<Integer> iter1 = Arrays.asList(1, 2).iterator();
Iterator<Integer> iter2 = Arrays.asList(3, 4).iterator();
IterChain<Integer> iterChain = new IterChain<>();
final Iterator<Integer> iter1 = Arrays.asList(1, 2).iterator();
final Iterator<Integer> iter2 = Arrays.asList(3, 4).iterator();
final IterChain<Integer> iterChain = new IterChain<>();
Assert.assertSame(iterChain, iterChain.addChain(iter1));
Assert.assertSame(iterChain, iterChain.addChain(iter2));
Iterator<Iterator<Integer>> iterators = iterChain.iterator();
final Iterator<Iterator<Integer>> iterators = iterChain.iterator();
Assert.assertSame(iter1, iterators.next());
Assert.assertSame(iter2, iterators.next());
}

View File

@ -446,6 +446,10 @@ public class FileUtilTest {
Assert.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", mimeType);
mimeType = FileUtil.getMimeType("test.pptx");
Assert.assertEquals("application/vnd.openxmlformats-officedocument.presentationml.presentation", mimeType);
// pr#2617@Github
mimeType = FileUtil.getMimeType("test.wgt");
Assert.assertEquals("application/widget", mimeType);
}
@Test

View File

@ -4,20 +4,20 @@ import cn.hutool.core.text.StrUtil;
import org.junit.Assert;
import org.junit.Test;
public class MurMurHashTest {
public class MurmurHashTest {
@Test
public void hash32Test() {
int hv = MurmurHash.hash32(StrUtil.utf8Bytes(""));
Assert.assertEquals(222142701, hv);
Assert.assertEquals(-1898877446, hv);
hv = MurmurHash.hash32(StrUtil.utf8Bytes("你好"));
Assert.assertEquals(1188098267, hv);
Assert.assertEquals(337357348, hv);
hv = MurmurHash.hash32(StrUtil.utf8Bytes("见到你很高兴"));
Assert.assertEquals(-1898490321, hv);
Assert.assertEquals(1101306141, hv);
hv = MurmurHash.hash32(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
Assert.assertEquals(-1713131054, hv);
Assert.assertEquals(-785444229, hv);
}
@Test

View File

@ -1,8 +1,15 @@
package cn.hutool.core.stream;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.collection.SetUtil;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class StreamUtilTest {
@ -13,4 +20,36 @@ public class StreamUtilTest {
final String result = stream.collect(CollectorUtil.joining(","));
Assert.assertEquals("2,4,8,16", result);
}
// === iterator ===
@Test
public void streamTestNullIterator() {
Assert.assertThrows(IllegalArgumentException.class, () -> StreamUtil.ofIter((Iterator<Object>) null));
}
@SuppressWarnings({"RedundantOperationOnEmptyContainer", "RedundantCollectionOperation"})
@Test
public void streamTestEmptyListToIterator() {
assertStreamIsEmpty(StreamUtil.ofIter(new ArrayList<>().iterator()));
}
@Test
public void streamTestEmptyIterator() {
assertStreamIsEmpty(StreamUtil.ofIter(Collections.emptyIterator()));
}
@Test
public void streamTestOrdinaryIterator() {
final ArrayList<Integer> arrayList = ListUtil.of(1, 2, 3);
Assert.assertArrayEquals(new Integer[]{1, 2, 3}, StreamUtil.ofIter(arrayList.iterator()).toArray());
final HashSet<Integer> hashSet = SetUtil.of(1, 2, 3);
Assert.assertEquals(hashSet, StreamUtil.ofIter(hashSet.iterator()).collect(Collectors.toSet()));
}
void assertStreamIsEmpty(final Stream<?> stream) {
Assert.assertNotNull(stream);
Assert.assertEquals(0, stream.toArray().length);
}
// ================ stream test end ================
}