diff --git a/hutool-core/src/main/java/cn/hutool/core/compress/InflaterInputStream.java b/hutool-core/src/main/java/cn/hutool/core/compress/InflaterInputStream.java
new file mode 100755
index 000000000..beaa58534
--- /dev/null
+++ b/hutool-core/src/main/java/cn/hutool/core/compress/InflaterInputStream.java
@@ -0,0 +1,108 @@
+package cn.hutool.core.compress;
+
+import cn.hutool.core.io.IORuntimeException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+import java.util.zip.Inflater;
+
+/**
+ * {@link java.util.zip.InflaterInputStream}包装实现,实现"deflate"算法解压
+ * 参考:org.apache.hc.client5.http.entity.DeflateInputStream
+ *
+ * @author looly
+ */
+public class InflaterInputStream extends InputStream {
+
+ private final java.util.zip.InflaterInputStream in;
+
+ /**
+ * 构造
+ *
+ * @param wrapped 被包装的流
+ */
+ public InflaterInputStream(final InputStream wrapped) {
+ this(wrapped, 512);
+ }
+
+ /**
+ * 构造
+ *
+ * @param wrapped 被包装的流
+ * @param size buffer大小
+ */
+ public InflaterInputStream(final InputStream wrapped, final int size) {
+ final PushbackInputStream pushback = new PushbackInputStream(wrapped, 2);
+ final int i1, i2;
+ try {
+ i1 = pushback.read();
+ i2 = pushback.read();
+ if (i1 == -1 || i2 == -1) {
+ throw new IORuntimeException("Unexpected end of stream");
+ }
+
+ pushback.unread(i2);
+ pushback.unread(i1);
+ } catch (final IOException e) {
+ throw new IORuntimeException(e);
+ }
+
+ boolean nowrap = true;
+ final int b1 = i1 & 0xFF;
+ final int compressionMethod = b1 & 0xF;
+ final int compressionInfo = b1 >> 4 & 0xF;
+ final int b2 = i2 & 0xFF;
+ if (compressionMethod == 8 && compressionInfo <= 7 && ((b1 << 8) | b2) % 31 == 0) {
+ nowrap = false;
+ }
+ in = new java.util.zip.InflaterInputStream(pushback, new Inflater(nowrap), size);
+ }
+
+ @Override
+ public int read() throws IOException {
+ return this.in.read();
+ }
+
+ @SuppressWarnings("NullableProblems")
+ @Override
+ public int read(final byte[] b) throws IOException {
+ return in.read(b);
+ }
+
+ @SuppressWarnings("NullableProblems")
+ @Override
+ public int read(final byte[] b, final int off, final int len) throws IOException {
+ return in.read(b, off, len);
+ }
+
+ @Override
+ public long skip(final long n) throws IOException {
+ return in.skip(n);
+ }
+
+ @Override
+ public int available() throws IOException {
+ return in.available();
+ }
+
+ @Override
+ public void mark(final int readLimit) {
+ in.mark(readLimit);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ in.reset();
+ }
+
+ @Override
+ public boolean markSupported() {
+ return in.markSupported();
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+}
diff --git a/hutool-http/pom.xml b/hutool-http/pom.xml
index d858816a9..5c7d4eba2 100755
--- a/hutool-http/pom.xml
+++ b/hutool-http/pom.xml
@@ -22,6 +22,12 @@
hutool-core
${project.parent.version}
+
+ org.apache.httpcomponents.client5
+ httpclient5
+ 5.1.3
+ provided
+
javax.xml.soap
javax.xml.soap-api
diff --git a/hutool-http/src/main/java/cn/hutool/http/GlobalCompressStreamRegister.java b/hutool-http/src/main/java/cn/hutool/http/GlobalCompressStreamRegister.java
index a0d13a9f3..037ac5d6e 100755
--- a/hutool-http/src/main/java/cn/hutool/http/GlobalCompressStreamRegister.java
+++ b/hutool-http/src/main/java/cn/hutool/http/GlobalCompressStreamRegister.java
@@ -1,9 +1,11 @@
package cn.hutool.http;
+import cn.hutool.core.compress.InflaterInputStream;
import cn.hutool.core.map.CaseInsensitiveMap;
import java.io.InputStream;
import java.util.Map;
+import java.util.zip.GZIPInputStream;
/**
* 全局响应内容压缩解压器注册中心
@@ -23,7 +25,12 @@ public enum GlobalCompressStreamRegister {
*/
private final Map> compressMap = new CaseInsensitiveMap<>();
+ /**
+ * 构造,初始化默认的压缩算法
+ */
GlobalCompressStreamRegister() {
+ compressMap.put("gzip", GZIPInputStream.class);
+ compressMap.put("deflate", InflaterInputStream.class);
}
/**
diff --git a/hutool-http/src/main/java/cn/hutool/http/HttpInputStream.java b/hutool-http/src/main/java/cn/hutool/http/HttpInputStream.java
index a26dcae34..384c64bd0 100644
--- a/hutool-http/src/main/java/cn/hutool/http/HttpInputStream.java
+++ b/hutool-http/src/main/java/cn/hutool/http/HttpInputStream.java
@@ -7,19 +7,17 @@ import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.Inflater;
-import java.util.zip.InflaterInputStream;
/**
* HTTP输入流,此流用于包装Http请求响应内容的流,用于解析各种压缩、分段的响应流内容
*
* @author Looly
- *
*/
public class HttpInputStream extends InputStream {
- /** 原始流 */
+ /**
+ * 原始流
+ */
private InputStream in;
/**
@@ -89,30 +87,17 @@ public class HttpInputStream extends InputStream {
// 在一些情况下,返回的流为null,此时提供状态码说明
if (null == this.in) {
- this.in = new ByteArrayInputStream(StrUtil.format("Error request, response status: {}", response.status).getBytes());
+ this.in = new ByteArrayInputStream(StrUtil.format("Error request, null response with status: {}", response.status).getBytes());
return;
}
final String contentEncoding = response.contentEncoding();
- if (StrUtil.equalsIgnoreCase("gzip", contentEncoding) && false == (response.in instanceof GZIPInputStream)) {
- // Accept-Encoding: gzip
+ final Class extends InputStream> streamClass = GlobalCompressStreamRegister.INSTANCE.get(contentEncoding);
+ if (null != streamClass) {
try {
- this.in = new GZIPInputStream(this.in);
- } catch (final IOException ignore) {
- // 在类似于Head等方法中无body返回,此时GZIPInputStream构造会出现错误,在此忽略此错误读取普通数据
- // ignore
- }
- } else if (StrUtil.equalsIgnoreCase("deflate", contentEncoding) && false == (this.in instanceof InflaterInputStream)) {
- // Accept-Encoding: defalte
- this.in = new InflaterInputStream(this.in, new Inflater(true));
- } else{
- final Class extends InputStream> streamClass = GlobalCompressStreamRegister.INSTANCE.get(contentEncoding);
- if(null != streamClass){
- try {
- this.in = ConstructorUtil.newInstance(streamClass, this.in);
- } catch (final Exception ignore) {
- // 对于构造错误的压缩算法,跳过之
- }
+ this.in = ConstructorUtil.newInstance(streamClass, this.in);
+ } catch (final Exception ignore) {
+ // 对于构造错误的压缩算法,跳过之
}
}
}
diff --git a/hutool-http/src/main/java/cn/hutool/http/HTMLFilter.java b/hutool-http/src/main/java/cn/hutool/http/html/HTMLFilter.java
similarity index 99%
rename from hutool-http/src/main/java/cn/hutool/http/HTMLFilter.java
rename to hutool-http/src/main/java/cn/hutool/http/html/HTMLFilter.java
index dc1e337c8..2df015628 100644
--- a/hutool-http/src/main/java/cn/hutool/http/HTMLFilter.java
+++ b/hutool-http/src/main/java/cn/hutool/http/html/HTMLFilter.java
@@ -1,4 +1,4 @@
-package cn.hutool.http;
+package cn.hutool.http.html;
import cn.hutool.core.lang.Console;
import cn.hutool.core.map.SafeConcurrentHashMap;
diff --git a/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java b/hutool-http/src/main/java/cn/hutool/http/html/HtmlUtil.java
similarity index 85%
rename from hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java
rename to hutool-http/src/main/java/cn/hutool/http/html/HtmlUtil.java
index 2ebbe8cd5..f9fe4c5a9 100755
--- a/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java
+++ b/hutool-http/src/main/java/cn/hutool/http/html/HtmlUtil.java
@@ -1,10 +1,12 @@
-package cn.hutool.http;
+package cn.hutool.http.html;
import cn.hutool.core.regex.ReUtil;
import cn.hutool.core.text.StrUtil;
import cn.hutool.core.text.escape.EscapeUtil;
import cn.hutool.core.util.XmlUtil;
+import java.util.regex.Pattern;
+
/**
* HTML工具类
*
@@ -13,18 +15,23 @@ import cn.hutool.core.util.XmlUtil;
* 比如去掉指定标签(例如广告栏等)、去除JS、去掉样式等等,这些操作都可以使用此工具类完成。
*
* @author xiaoleilu
- *
*/
public class HtmlUtil {
- public static final String RE_HTML_MARK = "(<[^<]*?>)|(<\\s*?/[^<]*?>)|(<[^<]*?/\\s*?>)";
- public static final String RE_SCRIPT = "<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>";
+ /**
+ * HTML标签正则
+ */
+ public static final Pattern RE_HTML_MARK = Pattern.compile("(<[^<]*?>)|(<\\s*?/[^<]*?>)|(<[^<]*?/\\s*?>)", Pattern.CASE_INSENSITIVE);
+ /**
+ * script标签正则
+ */
+ public static final Pattern RE_SCRIPT = Pattern.compile("<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>", Pattern.CASE_INSENSITIVE);
private static final char[][] TEXT = new char[64][];
static {
for (int i = 0; i < 64; i++) {
- TEXT[i] = new char[] { (char) i };
+ TEXT[i] = new char[]{(char) i};
}
// special HTML characters
@@ -75,14 +82,24 @@ public class HtmlUtil {
* @return 清除标签后的文本
*/
public static String cleanHtmlTag(final String content) {
- return content.replaceAll(RE_HTML_MARK, "");
+ return ReUtil.replaceAll(content, RE_HTML_MARK, "");
+ }
+
+ /**
+ * 清除所有script标签,包括内容
+ *
+ * @param content 文本
+ * @return 清除标签后的文本
+ */
+ public static String removeScriptTag(final String content) {
+ return ReUtil.replaceAll(content, RE_SCRIPT, "");
}
/**
* 清除指定HTML标签和被标签包围的内容
* 不区分大小写
*
- * @param content 文本
+ * @param content 文本
* @param tagNames 要清除的标签
* @return 去除标签后的文本
*/
@@ -94,7 +111,7 @@ public class HtmlUtil {
* 清除指定HTML标签,不包括内容
* 不区分大小写
*
- * @param content 文本
+ * @param content 文本
* @param tagNames 要清除的标签
* @return 去除标签后的文本
*/
@@ -106,9 +123,9 @@ public class HtmlUtil {
* 清除指定HTML标签
* 不区分大小写
*
- * @param content 文本
+ * @param content 文本
* @param withTagContent 是否去掉被包含在标签中的内容
- * @param tagNames 要清除的标签
+ * @param tagNames 要清除的标签
* @return 去除标签后的文本
*/
public static String removeHtmlTag(String content, final boolean withTagContent, final String... tagNames) {
@@ -136,7 +153,7 @@ public class HtmlUtil {
* 去除HTML标签中的属性,如果多个标签有相同属性,都去除
*
* @param content 文本
- * @param attrs 属性名(不区分大小写)
+ * @param attrs 属性名(不区分大小写)
* @return 处理后的文本
*/
public static String removeHtmlAttr(String content, final String... attrs) {
@@ -156,7 +173,7 @@ public class HtmlUtil {
/**
* 去除指定标签的所有属性
*
- * @param content 内容
+ * @param content 内容
* @param tagNames 指定标签
* @return 处理后的文本
*/
diff --git a/hutool-http/src/main/java/cn/hutool/http/html/package-info.java b/hutool-http/src/main/java/cn/hutool/http/html/package-info.java
new file mode 100755
index 000000000..a57808dd6
--- /dev/null
+++ b/hutool-http/src/main/java/cn/hutool/http/html/package-info.java
@@ -0,0 +1,6 @@
+/**
+ * HTML相关工具封装
+ *
+ * @author looly
+ */
+package cn.hutool.http.html;
diff --git a/hutool-http/src/main/java/cn/hutool/http/server/SimpleServer.java b/hutool-http/src/main/java/cn/hutool/http/server/SimpleServer.java
index a3b0049f3..e137ecc1e 100644
--- a/hutool-http/src/main/java/cn/hutool/http/server/SimpleServer.java
+++ b/hutool-http/src/main/java/cn/hutool/http/server/SimpleServer.java
@@ -9,6 +9,7 @@ import cn.hutool.http.server.action.RootAction;
import cn.hutool.http.server.filter.HttpFilter;
import cn.hutool.http.server.filter.SimpleFilter;
import cn.hutool.http.server.handler.ActionHandler;
+
import com.sun.net.httpserver.Filter;
import com.sun.net.httpserver.HttpContext;
import com.sun.net.httpserver.HttpExchange;
diff --git a/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java b/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java
index de2f49d74..e13eb8af3 100644
--- a/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java
+++ b/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java
@@ -1,5 +1,6 @@
package cn.hutool.http;
+import cn.hutool.http.html.HtmlUtil;
import org.junit.Assert;
import org.junit.Test;
diff --git a/hutool-http/src/test/java/cn/hutool/http/HttpRequestTest.java b/hutool-http/src/test/java/cn/hutool/http/HttpRequestTest.java
index de2eaf98d..8553a6173 100644
--- a/hutool-http/src/test/java/cn/hutool/http/HttpRequestTest.java
+++ b/hutool-http/src/test/java/cn/hutool/http/HttpRequestTest.java
@@ -115,9 +115,11 @@ public class HttpRequestTest {
@Test
@Ignore
public void getDeflateTest() {
- final String res = HttpRequest.get("https://comment.bilibili.com/67573272.xml")
- .execute().body();
- Console.log(res);
+ final HttpResponse res = HttpRequest.get("https://comment.bilibili.com/67573272.xml")
+ .header(Header.ACCEPT_ENCODING, "deflate")
+ .execute();
+ Console.log(res.header(Header.CONTENT_ENCODING));
+ Console.log(res.body());
}
@Test