diff --git a/hutool-core/src/main/java/cn/hutool/core/compress/InflaterInputStream.java b/hutool-core/src/main/java/cn/hutool/core/compress/InflaterInputStream.java new file mode 100755 index 000000000..beaa58534 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/compress/InflaterInputStream.java @@ -0,0 +1,108 @@ +package cn.hutool.core.compress; + +import cn.hutool.core.io.IORuntimeException; + +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; +import java.util.zip.Inflater; + +/** + * {@link java.util.zip.InflaterInputStream}包装实现,实现"deflate"算法解压
+ * 参考:org.apache.hc.client5.http.entity.DeflateInputStream + * + * @author looly + */ +public class InflaterInputStream extends InputStream { + + private final java.util.zip.InflaterInputStream in; + + /** + * 构造 + * + * @param wrapped 被包装的流 + */ + public InflaterInputStream(final InputStream wrapped) { + this(wrapped, 512); + } + + /** + * 构造 + * + * @param wrapped 被包装的流 + * @param size buffer大小 + */ + public InflaterInputStream(final InputStream wrapped, final int size) { + final PushbackInputStream pushback = new PushbackInputStream(wrapped, 2); + final int i1, i2; + try { + i1 = pushback.read(); + i2 = pushback.read(); + if (i1 == -1 || i2 == -1) { + throw new IORuntimeException("Unexpected end of stream"); + } + + pushback.unread(i2); + pushback.unread(i1); + } catch (final IOException e) { + throw new IORuntimeException(e); + } + + boolean nowrap = true; + final int b1 = i1 & 0xFF; + final int compressionMethod = b1 & 0xF; + final int compressionInfo = b1 >> 4 & 0xF; + final int b2 = i2 & 0xFF; + if (compressionMethod == 8 && compressionInfo <= 7 && ((b1 << 8) | b2) % 31 == 0) { + nowrap = false; + } + in = new java.util.zip.InflaterInputStream(pushback, new Inflater(nowrap), size); + } + + @Override + public int read() throws IOException { + return this.in.read(); + } + + @SuppressWarnings("NullableProblems") + @Override + public int read(final byte[] b) throws IOException { + return in.read(b); + } + + @SuppressWarnings("NullableProblems") + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + return in.read(b, off, len); + } + + @Override + public long skip(final long n) throws IOException { + return in.skip(n); + } + + @Override + public int available() throws IOException { + return in.available(); + } + + @Override + public void mark(final int readLimit) { + in.mark(readLimit); + } + + @Override + public void reset() throws IOException { + in.reset(); + } + + @Override + public boolean markSupported() { + return in.markSupported(); + } + + @Override + public void close() throws IOException { + in.close(); + } +} diff --git a/hutool-http/pom.xml b/hutool-http/pom.xml index d858816a9..5c7d4eba2 100755 --- a/hutool-http/pom.xml +++ b/hutool-http/pom.xml @@ -22,6 +22,12 @@ hutool-core ${project.parent.version} + + org.apache.httpcomponents.client5 + httpclient5 + 5.1.3 + provided + javax.xml.soap javax.xml.soap-api diff --git a/hutool-http/src/main/java/cn/hutool/http/GlobalCompressStreamRegister.java b/hutool-http/src/main/java/cn/hutool/http/GlobalCompressStreamRegister.java index a0d13a9f3..037ac5d6e 100755 --- a/hutool-http/src/main/java/cn/hutool/http/GlobalCompressStreamRegister.java +++ b/hutool-http/src/main/java/cn/hutool/http/GlobalCompressStreamRegister.java @@ -1,9 +1,11 @@ package cn.hutool.http; +import cn.hutool.core.compress.InflaterInputStream; import cn.hutool.core.map.CaseInsensitiveMap; import java.io.InputStream; import java.util.Map; +import java.util.zip.GZIPInputStream; /** * 全局响应内容压缩解压器注册中心
@@ -23,7 +25,12 @@ public enum GlobalCompressStreamRegister { */ private final Map> compressMap = new CaseInsensitiveMap<>(); + /** + * 构造,初始化默认的压缩算法 + */ GlobalCompressStreamRegister() { + compressMap.put("gzip", GZIPInputStream.class); + compressMap.put("deflate", InflaterInputStream.class); } /** diff --git a/hutool-http/src/main/java/cn/hutool/http/HttpInputStream.java b/hutool-http/src/main/java/cn/hutool/http/HttpInputStream.java index a26dcae34..384c64bd0 100644 --- a/hutool-http/src/main/java/cn/hutool/http/HttpInputStream.java +++ b/hutool-http/src/main/java/cn/hutool/http/HttpInputStream.java @@ -7,19 +7,17 @@ import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.util.zip.GZIPInputStream; -import java.util.zip.Inflater; -import java.util.zip.InflaterInputStream; /** * HTTP输入流,此流用于包装Http请求响应内容的流,用于解析各种压缩、分段的响应流内容 * * @author Looly - * */ public class HttpInputStream extends InputStream { - /** 原始流 */ + /** + * 原始流 + */ private InputStream in; /** @@ -89,30 +87,17 @@ public class HttpInputStream extends InputStream { // 在一些情况下,返回的流为null,此时提供状态码说明 if (null == this.in) { - this.in = new ByteArrayInputStream(StrUtil.format("Error request, response status: {}", response.status).getBytes()); + this.in = new ByteArrayInputStream(StrUtil.format("Error request, null response with status: {}", response.status).getBytes()); return; } final String contentEncoding = response.contentEncoding(); - if (StrUtil.equalsIgnoreCase("gzip", contentEncoding) && false == (response.in instanceof GZIPInputStream)) { - // Accept-Encoding: gzip + final Class streamClass = GlobalCompressStreamRegister.INSTANCE.get(contentEncoding); + if (null != streamClass) { try { - this.in = new GZIPInputStream(this.in); - } catch (final IOException ignore) { - // 在类似于Head等方法中无body返回,此时GZIPInputStream构造会出现错误,在此忽略此错误读取普通数据 - // ignore - } - } else if (StrUtil.equalsIgnoreCase("deflate", contentEncoding) && false == (this.in instanceof InflaterInputStream)) { - // Accept-Encoding: defalte - this.in = new InflaterInputStream(this.in, new Inflater(true)); - } else{ - final Class streamClass = GlobalCompressStreamRegister.INSTANCE.get(contentEncoding); - if(null != streamClass){ - try { - this.in = ConstructorUtil.newInstance(streamClass, this.in); - } catch (final Exception ignore) { - // 对于构造错误的压缩算法,跳过之 - } + this.in = ConstructorUtil.newInstance(streamClass, this.in); + } catch (final Exception ignore) { + // 对于构造错误的压缩算法,跳过之 } } } diff --git a/hutool-http/src/main/java/cn/hutool/http/HTMLFilter.java b/hutool-http/src/main/java/cn/hutool/http/html/HTMLFilter.java similarity index 99% rename from hutool-http/src/main/java/cn/hutool/http/HTMLFilter.java rename to hutool-http/src/main/java/cn/hutool/http/html/HTMLFilter.java index dc1e337c8..2df015628 100644 --- a/hutool-http/src/main/java/cn/hutool/http/HTMLFilter.java +++ b/hutool-http/src/main/java/cn/hutool/http/html/HTMLFilter.java @@ -1,4 +1,4 @@ -package cn.hutool.http; +package cn.hutool.http.html; import cn.hutool.core.lang.Console; import cn.hutool.core.map.SafeConcurrentHashMap; diff --git a/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java b/hutool-http/src/main/java/cn/hutool/http/html/HtmlUtil.java similarity index 85% rename from hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java rename to hutool-http/src/main/java/cn/hutool/http/html/HtmlUtil.java index 2ebbe8cd5..f9fe4c5a9 100755 --- a/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java +++ b/hutool-http/src/main/java/cn/hutool/http/html/HtmlUtil.java @@ -1,10 +1,12 @@ -package cn.hutool.http; +package cn.hutool.http.html; import cn.hutool.core.regex.ReUtil; import cn.hutool.core.text.StrUtil; import cn.hutool.core.text.escape.EscapeUtil; import cn.hutool.core.util.XmlUtil; +import java.util.regex.Pattern; + /** * HTML工具类 * @@ -13,18 +15,23 @@ import cn.hutool.core.util.XmlUtil; * 比如去掉指定标签(例如广告栏等)、去除JS、去掉样式等等,这些操作都可以使用此工具类完成。 * * @author xiaoleilu - * */ public class HtmlUtil { - public static final String RE_HTML_MARK = "(<[^<]*?>)|(<\\s*?/[^<]*?>)|(<[^<]*?/\\s*?>)"; - public static final String RE_SCRIPT = "<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; + /** + * HTML标签正则 + */ + public static final Pattern RE_HTML_MARK = Pattern.compile("(<[^<]*?>)|(<\\s*?/[^<]*?>)|(<[^<]*?/\\s*?>)", Pattern.CASE_INSENSITIVE); + /** + * script标签正则 + */ + public static final Pattern RE_SCRIPT = Pattern.compile("<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>", Pattern.CASE_INSENSITIVE); private static final char[][] TEXT = new char[64][]; static { for (int i = 0; i < 64; i++) { - TEXT[i] = new char[] { (char) i }; + TEXT[i] = new char[]{(char) i}; } // special HTML characters @@ -75,14 +82,24 @@ public class HtmlUtil { * @return 清除标签后的文本 */ public static String cleanHtmlTag(final String content) { - return content.replaceAll(RE_HTML_MARK, ""); + return ReUtil.replaceAll(content, RE_HTML_MARK, ""); + } + + /** + * 清除所有script标签,包括内容 + * + * @param content 文本 + * @return 清除标签后的文本 + */ + public static String removeScriptTag(final String content) { + return ReUtil.replaceAll(content, RE_SCRIPT, ""); } /** * 清除指定HTML标签和被标签包围的内容
* 不区分大小写 * - * @param content 文本 + * @param content 文本 * @param tagNames 要清除的标签 * @return 去除标签后的文本 */ @@ -94,7 +111,7 @@ public class HtmlUtil { * 清除指定HTML标签,不包括内容
* 不区分大小写 * - * @param content 文本 + * @param content 文本 * @param tagNames 要清除的标签 * @return 去除标签后的文本 */ @@ -106,9 +123,9 @@ public class HtmlUtil { * 清除指定HTML标签
* 不区分大小写 * - * @param content 文本 + * @param content 文本 * @param withTagContent 是否去掉被包含在标签中的内容 - * @param tagNames 要清除的标签 + * @param tagNames 要清除的标签 * @return 去除标签后的文本 */ public static String removeHtmlTag(String content, final boolean withTagContent, final String... tagNames) { @@ -136,7 +153,7 @@ public class HtmlUtil { * 去除HTML标签中的属性,如果多个标签有相同属性,都去除 * * @param content 文本 - * @param attrs 属性名(不区分大小写) + * @param attrs 属性名(不区分大小写) * @return 处理后的文本 */ public static String removeHtmlAttr(String content, final String... attrs) { @@ -156,7 +173,7 @@ public class HtmlUtil { /** * 去除指定标签的所有属性 * - * @param content 内容 + * @param content 内容 * @param tagNames 指定标签 * @return 处理后的文本 */ diff --git a/hutool-http/src/main/java/cn/hutool/http/html/package-info.java b/hutool-http/src/main/java/cn/hutool/http/html/package-info.java new file mode 100755 index 000000000..a57808dd6 --- /dev/null +++ b/hutool-http/src/main/java/cn/hutool/http/html/package-info.java @@ -0,0 +1,6 @@ +/** + * HTML相关工具封装 + * + * @author looly + */ +package cn.hutool.http.html; diff --git a/hutool-http/src/main/java/cn/hutool/http/server/SimpleServer.java b/hutool-http/src/main/java/cn/hutool/http/server/SimpleServer.java index a3b0049f3..e137ecc1e 100644 --- a/hutool-http/src/main/java/cn/hutool/http/server/SimpleServer.java +++ b/hutool-http/src/main/java/cn/hutool/http/server/SimpleServer.java @@ -9,6 +9,7 @@ import cn.hutool.http.server.action.RootAction; import cn.hutool.http.server.filter.HttpFilter; import cn.hutool.http.server.filter.SimpleFilter; import cn.hutool.http.server.handler.ActionHandler; + import com.sun.net.httpserver.Filter; import com.sun.net.httpserver.HttpContext; import com.sun.net.httpserver.HttpExchange; diff --git a/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java b/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java index de2f49d74..e13eb8af3 100644 --- a/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java +++ b/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java @@ -1,5 +1,6 @@ package cn.hutool.http; +import cn.hutool.http.html.HtmlUtil; import org.junit.Assert; import org.junit.Test; diff --git a/hutool-http/src/test/java/cn/hutool/http/HttpRequestTest.java b/hutool-http/src/test/java/cn/hutool/http/HttpRequestTest.java index de2eaf98d..8553a6173 100644 --- a/hutool-http/src/test/java/cn/hutool/http/HttpRequestTest.java +++ b/hutool-http/src/test/java/cn/hutool/http/HttpRequestTest.java @@ -115,9 +115,11 @@ public class HttpRequestTest { @Test @Ignore public void getDeflateTest() { - final String res = HttpRequest.get("https://comment.bilibili.com/67573272.xml") - .execute().body(); - Console.log(res); + final HttpResponse res = HttpRequest.get("https://comment.bilibili.com/67573272.xml") + .header(Header.ACCEPT_ENCODING, "deflate") + .execute(); + Console.log(res.header(Header.CONTENT_ENCODING)); + Console.log(res.body()); } @Test