mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-05-09 23:51:34 +08:00
增加HtmlUtil.cleanEmptyTag方法(pr#3838@Github)
This commit is contained in:
parent
b8ba60df50
commit
60b1413aa6
@ -41,6 +41,10 @@ public class HtmlUtil {
|
||||
* HTML标签正则
|
||||
*/
|
||||
public static final Pattern RE_HTML_MARK = Pattern.compile("(<[^<]*?>)|(<\\s*?/[^<]*?>)|(<[^<]*?/\\s*?>)", Pattern.CASE_INSENSITIVE);
|
||||
/**
|
||||
* 正则:匹配空标签
|
||||
*/
|
||||
public static final String RE_HTML_EMPTY_MARK = "<(\\w+)([^>]*)>\\s*</\\1>";
|
||||
/**
|
||||
* script标签正则
|
||||
*/
|
||||
@ -111,6 +115,17 @@ public class HtmlUtil {
|
||||
return ReUtil.replaceAll(content, RE_HTML_MARK, StrUtil.EMPTY);
|
||||
}
|
||||
|
||||
/**
|
||||
* 清除所有HTML空标签<br>
|
||||
* 例如:{@code <p></p>}
|
||||
*
|
||||
* @param content 文本
|
||||
* @return 清除空标签后的文本
|
||||
*/
|
||||
public static String cleanEmptyTag(final String content) {
|
||||
return content.replaceAll(RE_HTML_EMPTY_MARK, StrUtil.EMPTY);
|
||||
}
|
||||
|
||||
/**
|
||||
* 清除所有script标签,包括内容
|
||||
*
|
||||
|
@ -18,9 +18,10 @@ package org.dromara.hutool.http.html;
|
||||
|
||||
import org.dromara.hutool.core.regex.ReUtil;
|
||||
import org.dromara.hutool.http.meta.ContentTypeUtil;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
/**
|
||||
* Html单元测试
|
||||
*
|
||||
@ -34,32 +35,32 @@ public class HtmlUtilTest {
|
||||
//非闭合标签
|
||||
String str = "pre<img src=\"xxx/dfdsfds/test.jpg\">";
|
||||
String result = HtmlUtil.removeHtmlTag(str, "img");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//闭合标签
|
||||
str = "pre<img>";
|
||||
result = HtmlUtil.removeHtmlTag(str, "img");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//闭合标签
|
||||
str = "pre<img src=\"xxx/dfdsfds/test.jpg\" />";
|
||||
result = HtmlUtil.removeHtmlTag(str, "img");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//闭合标签
|
||||
str = "pre<img />";
|
||||
result = HtmlUtil.removeHtmlTag(str, "img");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//包含内容标签
|
||||
str = "pre<div class=\"test_div\">dfdsfdsfdsf</div>";
|
||||
result = HtmlUtil.removeHtmlTag(str, "div");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//带换行
|
||||
str = "pre<div class=\"test_div\">\r\n\t\tdfdsfdsfdsf\r\n</div>";
|
||||
result = HtmlUtil.removeHtmlTag(str, "div");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -67,32 +68,32 @@ public class HtmlUtilTest {
|
||||
//非闭合标签
|
||||
String str = "pre<img src=\"xxx/dfdsfds/test.jpg\">";
|
||||
String result = HtmlUtil.cleanHtmlTag(str);
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//闭合标签
|
||||
str = "pre<img>";
|
||||
result = HtmlUtil.cleanHtmlTag(str);
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//闭合标签
|
||||
str = "pre<img src=\"xxx/dfdsfds/test.jpg\" />";
|
||||
result = HtmlUtil.cleanHtmlTag(str);
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//闭合标签
|
||||
str = "pre<img />";
|
||||
result = HtmlUtil.cleanHtmlTag(str);
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//包含内容标签
|
||||
str = "pre<div class=\"test_div\">dfdsfdsfdsf</div>";
|
||||
result = HtmlUtil.cleanHtmlTag(str);
|
||||
Assertions.assertEquals("predfdsfdsfdsf", result);
|
||||
assertEquals("predfdsfdsfdsf", result);
|
||||
|
||||
//带换行
|
||||
str = "pre<div class=\"test_div\">\r\n\t\tdfdsfdsfdsf\r\n</div><div class=\"test_div\">BBBB</div>";
|
||||
result = HtmlUtil.cleanHtmlTag(str);
|
||||
Assertions.assertEquals("pre\r\n\t\tdfdsfdsfdsf\r\nBBBB", result);
|
||||
assertEquals("pre\r\n\t\tdfdsfdsfdsf\r\nBBBB", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -100,37 +101,37 @@ public class HtmlUtilTest {
|
||||
//非闭合标签
|
||||
String str = "pre<img src=\"xxx/dfdsfds/test.jpg\">";
|
||||
String result = HtmlUtil.unwrapHtmlTag(str, "img");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//闭合标签
|
||||
str = "pre<img>";
|
||||
result = HtmlUtil.unwrapHtmlTag(str, "img");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//闭合标签
|
||||
str = "pre<img src=\"xxx/dfdsfds/test.jpg\" />";
|
||||
result = HtmlUtil.unwrapHtmlTag(str, "img");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//闭合标签
|
||||
str = "pre<img />";
|
||||
result = HtmlUtil.unwrapHtmlTag(str, "img");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//闭合标签
|
||||
str = "pre<img/>";
|
||||
result = HtmlUtil.unwrapHtmlTag(str, "img");
|
||||
Assertions.assertEquals("pre", result);
|
||||
assertEquals("pre", result);
|
||||
|
||||
//包含内容标签
|
||||
str = "pre<div class=\"test_div\">abc</div>";
|
||||
result = HtmlUtil.unwrapHtmlTag(str, "div");
|
||||
Assertions.assertEquals("preabc", result);
|
||||
assertEquals("preabc", result);
|
||||
|
||||
//带换行
|
||||
str = "pre<div class=\"test_div\">\r\n\t\tabc\r\n</div>";
|
||||
result = HtmlUtil.unwrapHtmlTag(str, "div");
|
||||
Assertions.assertEquals("pre\r\n\t\tabc\r\n", result);
|
||||
assertEquals("pre\r\n\t\tabc\r\n", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -139,34 +140,34 @@ public class HtmlUtilTest {
|
||||
final String htmlString = "<html><img src='aaa'><i>测试文本</i></html>";
|
||||
final String tagString = "i,br";
|
||||
final String cleanTxt = HtmlUtil.removeHtmlTag(htmlString, false, tagString.split(","));
|
||||
Assertions.assertEquals("<html><img src='aaa'>测试文本</html>", cleanTxt);
|
||||
assertEquals("<html><img src='aaa'>测试文本</html>", cleanTxt);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void escapeTest() {
|
||||
final String html = "<html><body>123'123'</body></html>";
|
||||
final String escape = HtmlUtil.escape(html);
|
||||
Assertions.assertEquals("<html><body>123'123'</body></html>", escape);
|
||||
assertEquals("<html><body>123'123'</body></html>", escape);
|
||||
final String restoreEscaped = HtmlUtil.unescape(escape);
|
||||
Assertions.assertEquals(html, restoreEscaped);
|
||||
Assertions.assertEquals("'", HtmlUtil.unescape("'"));
|
||||
assertEquals(html, restoreEscaped);
|
||||
assertEquals("'", HtmlUtil.unescape("'"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void escapeTest2() {
|
||||
final char c = ' '; // 不断开空格(non-breaking space,缩写nbsp。)
|
||||
Assertions.assertEquals(c, 160);
|
||||
assertEquals(c, 160);
|
||||
final String html = "<html><body> </body></html>";
|
||||
final String escape = HtmlUtil.escape(html);
|
||||
Assertions.assertEquals("<html><body> </body></html>", escape);
|
||||
Assertions.assertEquals(" ", HtmlUtil.unescape(" "));
|
||||
assertEquals("<html><body> </body></html>", escape);
|
||||
assertEquals(" ", HtmlUtil.unescape(" "));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void filterTest() {
|
||||
final String html = "<alert></alert>";
|
||||
final String filter = HtmlUtil.filter(html);
|
||||
Assertions.assertEquals("", filter);
|
||||
assertEquals("", filter);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -175,54 +176,77 @@ public class HtmlUtilTest {
|
||||
// 去除的属性加双引号测试
|
||||
String html = "<div class=\"test_div\"></div><span class=\"test_div\"></span>";
|
||||
String result = HtmlUtil.removeHtmlAttr(html, "class");
|
||||
Assertions.assertEquals("<div></div><span></span>", result);
|
||||
assertEquals("<div></div><span></span>", result);
|
||||
|
||||
// 去除的属性后跟空格、加单引号、不加引号测试
|
||||
html = "<div class=test_div></div><span Class='test_div' ></span>";
|
||||
result = HtmlUtil.removeHtmlAttr(html, "class");
|
||||
Assertions.assertEquals("<div></div><span></span>", result);
|
||||
assertEquals("<div></div><span></span>", result);
|
||||
|
||||
// 去除的属性位于标签末尾、其它属性前测试
|
||||
html = "<div style=\"margin:100%\" class=test_div></div><span Class='test_div' width=100></span>";
|
||||
result = HtmlUtil.removeHtmlAttr(html, "class");
|
||||
Assertions.assertEquals("<div style=\"margin:100%\"></div><span width=100></span>", result);
|
||||
assertEquals("<div style=\"margin:100%\"></div><span width=100></span>", result);
|
||||
|
||||
// 去除的属性名和值之间存在空格
|
||||
html = "<div style = \"margin:100%\" class = test_div></div><span Class = 'test_div' width=100></span>";
|
||||
result = HtmlUtil.removeHtmlAttr(html, "class");
|
||||
Assertions.assertEquals("<div style = \"margin:100%\"></div><span width=100></span>", result);
|
||||
assertEquals("<div style = \"margin:100%\"></div><span width=100></span>", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void removeAllHtmlAttrTest() {
|
||||
final String html = "<div class=\"test_div\" width=\"120\"></div>";
|
||||
final String result = HtmlUtil.removeAllHtmlAttr(html, "div");
|
||||
Assertions.assertEquals("<div></div>", result);
|
||||
assertEquals("<div></div>", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getCharsetTest() {
|
||||
String charsetName = ReUtil.get(ContentTypeUtil.CHARSET_PATTERN, "Charset=UTF-8;fq=0.9", 1);
|
||||
Assertions.assertEquals("UTF-8", charsetName);
|
||||
assertEquals("UTF-8", charsetName);
|
||||
|
||||
charsetName = ReUtil.get(HtmlUtil.META_CHARSET_PATTERN, "<meta charset=utf-8", 1);
|
||||
Assertions.assertEquals("utf-8", charsetName);
|
||||
assertEquals("utf-8", charsetName);
|
||||
charsetName = ReUtil.get(HtmlUtil.META_CHARSET_PATTERN, "<meta charset='utf-8'", 1);
|
||||
Assertions.assertEquals("utf-8", charsetName);
|
||||
assertEquals("utf-8", charsetName);
|
||||
charsetName = ReUtil.get(HtmlUtil.META_CHARSET_PATTERN, "<meta charset=\"utf-8\"", 1);
|
||||
Assertions.assertEquals("utf-8", charsetName);
|
||||
assertEquals("utf-8", charsetName);
|
||||
charsetName = ReUtil.get(HtmlUtil.META_CHARSET_PATTERN, "<meta charset = \"utf-8\"", 1);
|
||||
Assertions.assertEquals("utf-8", charsetName);
|
||||
assertEquals("utf-8", charsetName);
|
||||
}
|
||||
|
||||
@Test
|
||||
void issueI6YNTFTest() {
|
||||
String html = "<html><body><div class=\"a1 a2\">hello world</div></body></html>";
|
||||
String cleanText = HtmlUtil.removeHtmlAttr(html,"class");
|
||||
Assertions.assertEquals("<html><body><div>hello world</div></body></html>", cleanText);
|
||||
assertEquals("<html><body><div>hello world</div></body></html>", cleanText);
|
||||
|
||||
html = "<html><body><div class=a1>hello world</div></body></html>";
|
||||
cleanText = HtmlUtil.removeHtmlAttr(html,"class");
|
||||
Assertions.assertEquals("<html><body><div>hello world</div></body></html>", cleanText);
|
||||
assertEquals("<html><body><div>hello world</div></body></html>", cleanText);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cleanEmptyTagTest() {
|
||||
String str = "<p></p><div></div>";
|
||||
String result = HtmlUtil.cleanEmptyTag(str);
|
||||
assertEquals("", result);
|
||||
|
||||
str = "<p>TEXT</p><div></div>";
|
||||
result = HtmlUtil.cleanEmptyTag(str);
|
||||
assertEquals("<p>TEXT</p>", result);
|
||||
|
||||
str = "<p></p><div>TEXT</div>";
|
||||
result = HtmlUtil.cleanEmptyTag(str);
|
||||
assertEquals("<div>TEXT</div>", result);
|
||||
|
||||
str = "<p>TEXT</p><div>TEXT</div>";
|
||||
result = HtmlUtil.cleanEmptyTag(str);
|
||||
assertEquals("<p>TEXT</p><div>TEXT</div>", result);
|
||||
|
||||
str = "TEXT<p></p><div></div>TEXT";
|
||||
result = HtmlUtil.cleanEmptyTag(str);
|
||||
assertEquals("TEXTTEXT", result);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user