From 31bcd02732dd14e5f0a3a30a9e993c9bd4d85717 Mon Sep 17 00:00:00 2001 From: LuisStruggle <18300767078@163.com> Date: Thu, 24 Nov 2022 10:53:32 +0800 Subject: [PATCH] =?UTF-8?q?html=E9=98=B2=E6=AD=A2=E6=B3=A8=E5=85=A5?= =?UTF-8?q?=E8=BD=AC=E8=AF=91=EF=BC=8C=E5=A2=9E=E5=8A=A0=E4=B8=8D=E6=96=AD?= =?UTF-8?q?=E5=BC=80=E7=A9=BA=E6=A0=BC=EF=BC=88nbsp=EF=BC=89=E8=BD=AC?= =?UTF-8?q?=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/main/java/cn/hutool/core/text/StrPool.java | 2 +- .../src/main/java/cn/hutool/core/util/XmlUtil.java | 2 +- hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java | 8 +++++--- .../src/test/java/cn/hutool/http/HtmlUtilTest.java | 10 ++++++++++ 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/hutool-core/src/main/java/cn/hutool/core/text/StrPool.java b/hutool-core/src/main/java/cn/hutool/core/text/StrPool.java index 743c4e474..ae627a6a4 100644 --- a/hutool-core/src/main/java/cn/hutool/core/text/StrPool.java +++ b/hutool-core/src/main/java/cn/hutool/core/text/StrPool.java @@ -176,7 +176,7 @@ public interface StrPool { /** - * 字符串常量:HTML 空格转义 {@code " " -> " "} + * 字符串常量:HTML 不间断空格转义 {@code " " -> " "} */ String HTML_NBSP = XmlUtil.NBSP; diff --git a/hutool-core/src/main/java/cn/hutool/core/util/XmlUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/XmlUtil.java index a6ccc3e06..9f46d7495 100755 --- a/hutool-core/src/main/java/cn/hutool/core/util/XmlUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/XmlUtil.java @@ -67,7 +67,7 @@ import java.util.Map; public class XmlUtil { /** - * 字符串常量:XML 空格转义 {@code " " -> " "} + * 字符串常量:XML 不间断空格转义 {@code " " -> " "} */ public static final String NBSP = " "; diff --git a/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java b/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java index 09c20e04b..4a0fba28b 100755 --- a/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java +++ b/hutool-http/src/main/java/cn/hutool/http/HtmlUtil.java @@ -26,10 +26,11 @@ public class HtmlUtil { public static final String RE_HTML_MARK = "(<[^<]*?>)|(<[\\s]*?/[^<]*?>)|(<[^<]*?/[\\s]*?>)"; public static final String RE_SCRIPT = "<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; - private static final char[][] TEXT = new char[64][]; + private static final char[][] TEXT = new char[256][]; static { - for (int i = 0; i < 64; i++) { + // ascii码值最大的是【0x7f=127】,扩展ascii码值最大的是【0xFF=255】,因为ASCII码使用指定的7位或8位二进制数组合来表示128或256种可能的字符,标准ASCII码也叫基础ASCII码。 + for (int i = 0; i < 256; i++) { TEXT[i] = new char[] { (char) i }; } @@ -39,6 +40,7 @@ public class HtmlUtil { TEXT['&'] = AMP.toCharArray(); // &符 TEXT['<'] = LT.toCharArray(); // 小于号 TEXT['>'] = GT.toCharArray(); // 大于号 + TEXT[' '] = NBSP.toCharArray(); // 不断开空格(non-breaking space,缩写nbsp。ASCII值是32:是用键盘输入的空格;ASCII值是160:不间断空格,即  ,所产生的空格,作用是在页面换行时不被打断) } /** @@ -190,7 +192,7 @@ public class HtmlUtil { char c; for (int i = 0; i < len; i++) { c = text.charAt(i); - if (c < 64) { + if (c < 256) { buffer.append(TEXT[c]); } else { buffer.append(c); diff --git a/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java b/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java index 214620d5d..9cbaf4055 100644 --- a/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java +++ b/hutool-http/src/test/java/cn/hutool/http/HtmlUtilTest.java @@ -134,6 +134,16 @@ public class HtmlUtilTest { Assert.assertEquals("'", HtmlUtil.unescape("'")); } + @Test + public void escapeTest2() { + char c = ' '; // 不断开空格(non-breaking space,缩写nbsp。) + Assert.assertEquals(c, 160); + String html = " "; + String escape = HtmlUtil.escape(html); + Assert.assertEquals("<html><body> </body></html>", escape); + Assert.assertEquals(" ", HtmlUtil.unescape(" ")); + } + @Test public void filterTest() { String html = "";