This commit is contained in:
Looly 2022-11-28 13:52:11 +08:00
parent 738519d6db
commit 9e6ba78ae8
3 changed files with 18 additions and 5 deletions

View File

@ -68,7 +68,7 @@ import java.util.Map;
public class XmlUtil {
/**
* 字符串常量XML 空格转义 {@code " " -> " "}
* 字符串常量XML 不间断空格转义 {@code " " -> " "}
*/
public static final String NBSP = " ";

View File

@ -27,19 +27,22 @@ public class HtmlUtil {
*/
public static final Pattern RE_SCRIPT = Pattern.compile("<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>", Pattern.CASE_INSENSITIVE);
private static final char[][] TEXT = new char[64][];
private static final char[][] TEXT = new char[256][];
static {
for (int i = 0; i < 64; i++) {
// ascii码值最大的是0x7f=127扩展ascii码值最大的是0xFF=255
// 因为ASCII码使用指定的7位或8位二进制数组合来表示128或256种可能的字符标准ASCII码也叫基础ASCII码
for (int i = 0; i < 256; i++) {
TEXT[i] = new char[]{(char) i};
}
// special HTML characters
TEXT['\''] = "&#039;".toCharArray(); // 单引号 ('&apos;' doesn't work - it is not by the w3 specs)
TEXT['"'] = XmlUtil.QUOTE.toCharArray(); // 引号
TEXT['"'] = XmlUtil.QUOTE.toCharArray(); // 引号
TEXT['&'] = XmlUtil.AMP.toCharArray(); // &
TEXT['<'] = XmlUtil.LT.toCharArray(); // 小于号
TEXT['>'] = XmlUtil.GT.toCharArray(); // 大于号
TEXT[' '] = XmlUtil.NBSP.toCharArray(); // 不断开空格non-breaking space缩写nbspASCII值是32是用键盘输入的空格ASCII值是160不间断空格 &nbsp所产生的空格作用是在页面换行时不被打断
}
/**
@ -201,7 +204,7 @@ public class HtmlUtil {
char c;
for (int i = 0; i < len; i++) {
c = text.charAt(i);
if (c < 64) {
if (c < 256) {
buffer.append(TEXT[c]);
} else {
buffer.append(c);

View File

@ -135,6 +135,16 @@ public class HtmlUtilTest {
Assert.assertEquals("'", HtmlUtil.unescape("&apos;"));
}
@Test
public void escapeTest2() {
final char c = ' '; // 不断开空格non-breaking space缩写nbsp)
Assert.assertEquals(c, 160);
final String html = "<html><body> </body></html>";
final String escape = HtmlUtil.escape(html);
Assert.assertEquals("&lt;html&gt;&lt;body&gt;&nbsp;&lt;/body&gt;&lt;/html&gt;", escape);
Assert.assertEquals(" ", HtmlUtil.unescape("&nbsp;"));
}
@Test
public void filterTest() {
final String html = "<alert></alert>";