diff --git a/CHANGELOG.md b/CHANGELOG.md
index f5afbdfa5..263510186 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@
* 【core 】 修复ImgUtil.copyImage读取网络URL后宽高报错问题(issue#1821@Github)
* 【core 】 修复StrJoiner.append配置丢失问题(issue#I49K1L@Gitee)
* 【core 】 修复EscapeUtil特殊字符的hex长度不足导致的问题(issue#I49JU8@Gitee)
+* 【core 】 修复UrlBuilder对Fragment部分编码问题(issue#I49KAL@Gitee)
-------------------------------------------------------------------------------------------------------------
diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java b/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java
new file mode 100644
index 000000000..94d462bd7
--- /dev/null
+++ b/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java
@@ -0,0 +1,192 @@
+package cn.hutool.core.net;
+
+import cn.hutool.core.exceptions.UtilException;
+import cn.hutool.core.util.CharsetUtil;
+import cn.hutool.core.util.StrUtil;
+
+import java.nio.charset.Charset;
+
+/**
+ * URL编码工具
+ *
+ * @since 5.7.13
+ * @author looly
+ */
+public class URLEncodeUtil {
+ /**
+ * 编码URL,默认使用UTF-8编码
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ *
+ * @param url URL
+ * @return 编码后的URL
+ * @throws UtilException UnsupportedEncodingException
+ */
+ public static String encodeAll(String url) {
+ return encodeAll(url, CharsetUtil.CHARSET_UTF_8);
+ }
+
+ /**
+ * 编码URL
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ *
+ * @param url URL
+ * @param charset 编码,为null表示不编码
+ * @return 编码后的URL
+ * @throws UtilException UnsupportedEncodingException
+ */
+ public static String encodeAll(String url, Charset charset) throws UtilException {
+ return URLEncoder.ALL.encode(url, charset);
+ }
+
+ /**
+ * 编码URL,默认使用UTF-8编码
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于URL自动编码,类似于浏览器中键入地址自动编码,对于像类似于“/”的字符不再编码
+ *
+ * @param url URL
+ * @return 编码后的URL
+ * @throws UtilException UnsupportedEncodingException
+ * @since 3.1.2
+ */
+ public static String encode(String url) throws UtilException {
+ return encode(url, CharsetUtil.CHARSET_UTF_8);
+ }
+
+ /**
+ * 编码字符为 application/x-www-form-urlencoded
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于URL自动编码,类似于浏览器中键入地址自动编码,对于像类似于“/”的字符不再编码
+ *
+ * @param url 被编码内容
+ * @param charset 编码
+ * @return 编码后的字符
+ * @since 4.4.1
+ */
+ public static String encode(String url, Charset charset) {
+ return URLEncoder.DEFAULT.encode(url, charset);
+ }
+
+ /**
+ * 编码URL,默认使用UTF-8编码
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于POST请求中的请求体自动编码,转义大部分特殊字符
+ *
+ * @param url URL
+ * @return 编码后的URL
+ * @throws UtilException UnsupportedEncodingException
+ * @since 3.1.2
+ */
+ public static String encodeQuery(String url) throws UtilException {
+ return encodeQuery(url, CharsetUtil.CHARSET_UTF_8);
+ }
+
+ /**
+ * 编码字符为URL中查询语句
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于POST请求中的请求体自动编码,转义大部分特殊字符
+ *
+ * @param url 被编码内容
+ * @param charset 编码
+ * @return 编码后的字符
+ * @since 4.4.1
+ */
+ public static String encodeQuery(String url, Charset charset) {
+ return URLEncoder.QUERY.encode(url, charset);
+ }
+
+ /**
+ * 编码URL,默认使用UTF-8编码
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于URL的Segment中自动编码,转义大部分特殊字符
+ *
+ *
+ * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@" + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + *+ * + * @param url URL + * @return 编码后的URL + * @throws UtilException UnsupportedEncodingException + * @since 5.6.5 + */ + public static String encodePathSegment(String url) throws UtilException { + return encodePathSegment(url, CharsetUtil.CHARSET_UTF_8); + } + + /** + * 编码字符为URL中查询语句
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + *+ * + * @param url 被编码内容 + * @param charset 编码 + * @return 编码后的字符 + * @since 5.6.5 + */ + public static String encodePathSegment(String url, Charset charset) { + if (StrUtil.isEmpty(url)) { + return url; + } + if (null == charset) { + charset = CharsetUtil.defaultCharset(); + } + return URLEncoder.PATH_SEGMENT.encode(url, charset); + } + + /** + * 编码URL,默认使用UTF-8编码
+ * fragment = *( pchar / "/" / "?" ) + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + *+ * + * 具体见:https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 + * + * @param url 被编码内容 + * @return 编码后的字符 + * @since 5.7.13 + */ + public static String encodeFragment(String url) throws UtilException { + return encodeFragment(url, CharsetUtil.CHARSET_UTF_8); + } + + /** + * URL的Fragment URLEncoder
+ * fragment = *( pchar / "/" / "?" ) + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + *+ * + * 具体见:https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 + * + * @param url 被编码内容 + * @param charset 编码 + * @return 编码后的字符 + * @since 5.7.13 + */ + public static String encodeFragment(String url, Charset charset) { + if (StrUtil.isEmpty(url)) { + return url; + } + if (null == charset) { + charset = CharsetUtil.defaultCharset(); + } + return URLEncoder.FRAGMENT.encode(url, charset); + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java b/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java index 04520fc5c..660ae4bf5 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java @@ -1,7 +1,9 @@ package cn.hutool.core.net; import cn.hutool.core.util.CharUtil; +import cn.hutool.core.util.CharsetUtil; import cn.hutool.core.util.HexUtil; +import cn.hutool.core.util.StrUtil; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -30,7 +32,8 @@ public class URLEncoder implements Serializable { * 默认的编码器针对URI路径编码,定义如下: * *
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@" / "/" + * default = pchar / "/" + * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@" * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" *@@ -42,13 +45,31 @@ public class URLEncoder implements Serializable { * 默认的编码器针对URI路径编码,定义如下: * *
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@" + * pchar = unreserved / pct-encoded / sub-delims / ":"(非空segment不包含:) / "@" * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" *+ * + * 定义见:https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3 */ public static final URLEncoder PATH_SEGMENT = createPathSegment(); + /** + * URL的Fragment URLEncoder
+ * fragment = *( pchar / "/" / "?" ) + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + *+ * + * 具体见:https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 + * @since 5.7.13 + */ + public static final URLEncoder FRAGMENT = createFragment(); + /** * 用于查询语句的URLEncoder
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@" / "/" + * default = pchar / "/" + * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@" * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" *@@ -92,21 +114,14 @@ public class URLEncoder implements Serializable { encoder.addSafeCharacter('.'); encoder.addSafeCharacter('_'); encoder.addSafeCharacter('~'); + // Add the sub-delims - encoder.addSafeCharacter('!'); - encoder.addSafeCharacter('$'); - encoder.addSafeCharacter('&'); - encoder.addSafeCharacter('\''); - encoder.addSafeCharacter('('); - encoder.addSafeCharacter(')'); - encoder.addSafeCharacter('*'); - encoder.addSafeCharacter('+'); - encoder.addSafeCharacter(','); - encoder.addSafeCharacter(';'); - encoder.addSafeCharacter('='); + addSubDelims(encoder); + // Add the remaining literals encoder.addSafeCharacter(':'); encoder.addSafeCharacter('@'); + // Add '/' so it isn't encoded when we encode a path encoder.addSafeCharacter('/'); @@ -118,37 +133,71 @@ public class URLEncoder implements Serializable { * 默认的编码器针对URI路径的每一段编码,定义如下: * *
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@" + * pchar = unreserved / pct-encoded / sub-delims / ":"(非空segment不包含:) / "@" * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" ** + * 定义见:https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3 + * * @return URLEncoder */ public static URLEncoder createPathSegment() { final URLEncoder encoder = new URLEncoder(); + + // unreserved encoder.addSafeCharacter('-'); encoder.addSafeCharacter('.'); encoder.addSafeCharacter('_'); encoder.addSafeCharacter('~'); + // Add the sub-delims - encoder.addSafeCharacter('!'); - encoder.addSafeCharacter('$'); - encoder.addSafeCharacter('&'); - encoder.addSafeCharacter('\''); - encoder.addSafeCharacter('('); - encoder.addSafeCharacter(')'); - encoder.addSafeCharacter('*'); - encoder.addSafeCharacter('+'); - encoder.addSafeCharacter(','); - encoder.addSafeCharacter(';'); - encoder.addSafeCharacter('='); + addSubDelims(encoder); + // Add the remaining literals + //non-zero-length segment without any colon ":" + //encoder.addSafeCharacter(':'); encoder.addSafeCharacter('@'); return encoder; } + /** + * URL的Fragment URLEncoder
+ * fragment = *( pchar / "/" / "?" ) + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + *+ * + * 具体见:https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 + * + * @return URLEncoder + * @since 5.7.13 + */ + public static URLEncoder createFragment() { + final URLEncoder encoder = new URLEncoder(); + encoder.addSafeCharacter('-'); + encoder.addSafeCharacter('.'); + encoder.addSafeCharacter('_'); + encoder.addSafeCharacter('~'); + + // Add the sub-delims + addSubDelims(encoder); + + // Add the remaining literals + encoder.addSafeCharacter(':'); + encoder.addSafeCharacter('@'); + + encoder.addSafeCharacter('/'); + encoder.addSafeCharacter('?'); + + return encoder; + } + /** * 创建用于查询语句的URLEncoder
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@" - * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" - *- * - * @param url URL - * @return 编码后的URL - * @throws UtilException UnsupportedEncodingException - * @since 5.6.5 - */ - public static String encodePathSegment(String url) throws UtilException { - return encodePathSegment(url, CharsetUtil.CHARSET_UTF_8); - } - - /** - * 编码字符为URL中查询语句
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@" - * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" - *- * - * @param url 被编码内容 - * @param charset 编码 - * @return 编码后的字符 - * @since 5.6.5 - */ - public static String encodePathSegment(String url, Charset charset) { - if (StrUtil.isEmpty(url)) { - return url; - } - if (null == charset) { - charset = CharsetUtil.defaultCharset(); - } - return URLEncoder.PATH_SEGMENT.encode(url, charset); - } - //-------------------------------------------------------------------------- decode /** diff --git a/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java b/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java index bf4f81efb..f53085fc6 100644 --- a/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java @@ -1,11 +1,9 @@ package cn.hutool.core.net; import cn.hutool.core.date.DateUtil; -import cn.hutool.core.lang.Console; import cn.hutool.core.net.url.UrlBuilder; import cn.hutool.core.util.CharsetUtil; import org.junit.Assert; -import org.junit.Ignore; import org.junit.Test; import java.net.URI; @@ -263,10 +261,14 @@ public class UrlBuilderTest { } @Test - @Ignore public void fragmentEncodeTest(){ + // https://gitee.com/dromara/hutool/issues/I49KAL + // 见:https://stackoverflow.com/questions/26088849/url-fragment-allowed-characters String url = "https://hutool.cn/docs/#/?id=简介"; - final UrlBuilder urlBuilder = UrlBuilder.ofHttp(url); - Console.log(urlBuilder.toString()); + UrlBuilder urlBuilder = UrlBuilder.ofHttp(url); + Assert.assertEquals("https://hutool.cn/docs/#/?id=%E7%AE%80%E4%BB%8B", urlBuilder.toString()); + + urlBuilder = UrlBuilder.ofHttp(urlBuilder.toString()); + Assert.assertEquals(urlBuilder.toString(), urlBuilder.toString()); } }