diff --git a/CHANGELOG.md b/CHANGELOG.md
index f5afbdfa5..263510186 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@
* 【core 】 修复ImgUtil.copyImage读取网络URL后宽高报错问题(issue#1821@Github)
* 【core 】 修复StrJoiner.append配置丢失问题(issue#I49K1L@Gitee)
* 【core 】 修复EscapeUtil特殊字符的hex长度不足导致的问题(issue#I49JU8@Gitee)
+* 【core 】 修复UrlBuilder对Fragment部分编码问题(issue#I49KAL@Gitee)
-------------------------------------------------------------------------------------------------------------
diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java b/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java
new file mode 100644
index 000000000..94d462bd7
--- /dev/null
+++ b/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java
@@ -0,0 +1,192 @@
+package cn.hutool.core.net;
+
+import cn.hutool.core.exceptions.UtilException;
+import cn.hutool.core.util.CharsetUtil;
+import cn.hutool.core.util.StrUtil;
+
+import java.nio.charset.Charset;
+
+/**
+ * URL编码工具
+ *
+ * @since 5.7.13
+ * @author looly
+ */
+public class URLEncodeUtil {
+ /**
+ * 编码URL,默认使用UTF-8编码
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ *
+ * @param url URL
+ * @return 编码后的URL
+ * @throws UtilException UnsupportedEncodingException
+ */
+ public static String encodeAll(String url) {
+ return encodeAll(url, CharsetUtil.CHARSET_UTF_8);
+ }
+
+ /**
+ * 编码URL
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ *
+ * @param url URL
+ * @param charset 编码,为null表示不编码
+ * @return 编码后的URL
+ * @throws UtilException UnsupportedEncodingException
+ */
+ public static String encodeAll(String url, Charset charset) throws UtilException {
+ return URLEncoder.ALL.encode(url, charset);
+ }
+
+ /**
+ * 编码URL,默认使用UTF-8编码
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于URL自动编码,类似于浏览器中键入地址自动编码,对于像类似于“/”的字符不再编码
+ *
+ * @param url URL
+ * @return 编码后的URL
+ * @throws UtilException UnsupportedEncodingException
+ * @since 3.1.2
+ */
+ public static String encode(String url) throws UtilException {
+ return encode(url, CharsetUtil.CHARSET_UTF_8);
+ }
+
+ /**
+ * 编码字符为 application/x-www-form-urlencoded
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于URL自动编码,类似于浏览器中键入地址自动编码,对于像类似于“/”的字符不再编码
+ *
+ * @param url 被编码内容
+ * @param charset 编码
+ * @return 编码后的字符
+ * @since 4.4.1
+ */
+ public static String encode(String url, Charset charset) {
+ return URLEncoder.DEFAULT.encode(url, charset);
+ }
+
+ /**
+ * 编码URL,默认使用UTF-8编码
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于POST请求中的请求体自动编码,转义大部分特殊字符
+ *
+ * @param url URL
+ * @return 编码后的URL
+ * @throws UtilException UnsupportedEncodingException
+ * @since 3.1.2
+ */
+ public static String encodeQuery(String url) throws UtilException {
+ return encodeQuery(url, CharsetUtil.CHARSET_UTF_8);
+ }
+
+ /**
+ * 编码字符为URL中查询语句
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于POST请求中的请求体自动编码,转义大部分特殊字符
+ *
+ * @param url 被编码内容
+ * @param charset 编码
+ * @return 编码后的字符
+ * @since 4.4.1
+ */
+ public static String encodeQuery(String url, Charset charset) {
+ return URLEncoder.QUERY.encode(url, charset);
+ }
+
+ /**
+ * 编码URL,默认使用UTF-8编码
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于URL的Segment中自动编码,转义大部分特殊字符
+ *
+ *
+ * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@"
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ *
+ *
+ * @param url URL
+ * @return 编码后的URL
+ * @throws UtilException UnsupportedEncodingException
+ * @since 5.6.5
+ */
+ public static String encodePathSegment(String url) throws UtilException {
+ return encodePathSegment(url, CharsetUtil.CHARSET_UTF_8);
+ }
+
+ /**
+ * 编码字符为URL中查询语句
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ *
+ *
+ * @param url 被编码内容
+ * @param charset 编码
+ * @return 编码后的字符
+ * @since 5.6.5
+ */
+ public static String encodePathSegment(String url, Charset charset) {
+ if (StrUtil.isEmpty(url)) {
+ return url;
+ }
+ if (null == charset) {
+ charset = CharsetUtil.defaultCharset();
+ }
+ return URLEncoder.PATH_SEGMENT.encode(url, charset);
+ }
+
+ /**
+ * 编码URL,默认使用UTF-8编码
+ * fragment = *( pchar / "/" / "?" )
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ *
+ *
+ * 具体见:https://datatracker.ietf.org/doc/html/rfc3986#section-3.5
+ *
+ * @param url 被编码内容
+ * @return 编码后的字符
+ * @since 5.7.13
+ */
+ public static String encodeFragment(String url) throws UtilException {
+ return encodeFragment(url, CharsetUtil.CHARSET_UTF_8);
+ }
+
+ /**
+ * URL的Fragment URLEncoder
+ * fragment = *( pchar / "/" / "?" )
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ *
+ *
+ * 具体见:https://datatracker.ietf.org/doc/html/rfc3986#section-3.5
+ *
+ * @param url 被编码内容
+ * @param charset 编码
+ * @return 编码后的字符
+ * @since 5.7.13
+ */
+ public static String encodeFragment(String url, Charset charset) {
+ if (StrUtil.isEmpty(url)) {
+ return url;
+ }
+ if (null == charset) {
+ charset = CharsetUtil.defaultCharset();
+ }
+ return URLEncoder.FRAGMENT.encode(url, charset);
+ }
+}
diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java b/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java
index 04520fc5c..660ae4bf5 100644
--- a/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java
+++ b/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java
@@ -1,7 +1,9 @@
package cn.hutool.core.net;
import cn.hutool.core.util.CharUtil;
+import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.HexUtil;
+import cn.hutool.core.util.StrUtil;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -30,7 +32,8 @@ public class URLEncoder implements Serializable {
* 默认的编码器针对URI路径编码,定义如下:
*
*
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@" / "/"
+ * default = pchar / "/"
+ * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
*
@@ -42,13 +45,31 @@ public class URLEncoder implements Serializable {
* 默认的编码器针对URI路径编码,定义如下:
*
*
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@"
+ * pchar = unreserved / pct-encoded / sub-delims / ":"(非空segment不包含:) / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
*
+ *
+ * 定义见:https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3
*/
public static final URLEncoder PATH_SEGMENT = createPathSegment();
+ /**
+ * URL的Fragment URLEncoder
+ * fragment = *( pchar / "/" / "?" )
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ *
+ *
+ * 具体见:https://datatracker.ietf.org/doc/html/rfc3986#section-3.5
+ * @since 5.7.13
+ */
+ public static final URLEncoder FRAGMENT = createFragment();
+
/**
* 用于查询语句的URLEncoder
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@" / "/"
+ * default = pchar / "/"
+ * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
*
@@ -92,21 +114,14 @@ public class URLEncoder implements Serializable {
encoder.addSafeCharacter('.');
encoder.addSafeCharacter('_');
encoder.addSafeCharacter('~');
+
// Add the sub-delims
- encoder.addSafeCharacter('!');
- encoder.addSafeCharacter('$');
- encoder.addSafeCharacter('&');
- encoder.addSafeCharacter('\'');
- encoder.addSafeCharacter('(');
- encoder.addSafeCharacter(')');
- encoder.addSafeCharacter('*');
- encoder.addSafeCharacter('+');
- encoder.addSafeCharacter(',');
- encoder.addSafeCharacter(';');
- encoder.addSafeCharacter('=');
+ addSubDelims(encoder);
+
// Add the remaining literals
encoder.addSafeCharacter(':');
encoder.addSafeCharacter('@');
+
// Add '/' so it isn't encoded when we encode a path
encoder.addSafeCharacter('/');
@@ -118,37 +133,71 @@ public class URLEncoder implements Serializable {
* 默认的编码器针对URI路径的每一段编码,定义如下:
*
*
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@"
+ * pchar = unreserved / pct-encoded / sub-delims / ":"(非空segment不包含:) / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
*
*
+ * 定义见:https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3
+ *
* @return URLEncoder
*/
public static URLEncoder createPathSegment() {
final URLEncoder encoder = new URLEncoder();
+
+ // unreserved
encoder.addSafeCharacter('-');
encoder.addSafeCharacter('.');
encoder.addSafeCharacter('_');
encoder.addSafeCharacter('~');
+
// Add the sub-delims
- encoder.addSafeCharacter('!');
- encoder.addSafeCharacter('$');
- encoder.addSafeCharacter('&');
- encoder.addSafeCharacter('\'');
- encoder.addSafeCharacter('(');
- encoder.addSafeCharacter(')');
- encoder.addSafeCharacter('*');
- encoder.addSafeCharacter('+');
- encoder.addSafeCharacter(',');
- encoder.addSafeCharacter(';');
- encoder.addSafeCharacter('=');
+ addSubDelims(encoder);
+
// Add the remaining literals
+ //non-zero-length segment without any colon ":"
+ //encoder.addSafeCharacter(':');
encoder.addSafeCharacter('@');
return encoder;
}
+ /**
+ * URL的Fragment URLEncoder
+ * fragment = *( pchar / "/" / "?" )
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+ *
+ *
+ * 具体见:https://datatracker.ietf.org/doc/html/rfc3986#section-3.5
+ *
+ * @return URLEncoder
+ * @since 5.7.13
+ */
+ public static URLEncoder createFragment() {
+ final URLEncoder encoder = new URLEncoder();
+ encoder.addSafeCharacter('-');
+ encoder.addSafeCharacter('.');
+ encoder.addSafeCharacter('_');
+ encoder.addSafeCharacter('~');
+
+ // Add the sub-delims
+ addSubDelims(encoder);
+
+ // Add the remaining literals
+ encoder.addSafeCharacter(':');
+ encoder.addSafeCharacter('@');
+
+ encoder.addSafeCharacter('/');
+ encoder.addSafeCharacter('?');
+
+ return encoder;
+ }
+
/**
* 创建用于查询语句的URLEncoder
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@"
- * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
- *
- *
- * @param url URL
- * @return 编码后的URL
- * @throws UtilException UnsupportedEncodingException
- * @since 5.6.5
- */
- public static String encodePathSegment(String url) throws UtilException {
- return encodePathSegment(url, CharsetUtil.CHARSET_UTF_8);
- }
-
- /**
- * 编码字符为URL中查询语句
- * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@"
- * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
- *
- *
- * @param url 被编码内容
- * @param charset 编码
- * @return 编码后的字符
- * @since 5.6.5
- */
- public static String encodePathSegment(String url, Charset charset) {
- if (StrUtil.isEmpty(url)) {
- return url;
- }
- if (null == charset) {
- charset = CharsetUtil.defaultCharset();
- }
- return URLEncoder.PATH_SEGMENT.encode(url, charset);
- }
-
//-------------------------------------------------------------------------- decode
/**
diff --git a/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java b/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java
index bf4f81efb..f53085fc6 100644
--- a/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java
+++ b/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java
@@ -1,11 +1,9 @@
package cn.hutool.core.net;
import cn.hutool.core.date.DateUtil;
-import cn.hutool.core.lang.Console;
import cn.hutool.core.net.url.UrlBuilder;
import cn.hutool.core.util.CharsetUtil;
import org.junit.Assert;
-import org.junit.Ignore;
import org.junit.Test;
import java.net.URI;
@@ -263,10 +261,14 @@ public class UrlBuilderTest {
}
@Test
- @Ignore
public void fragmentEncodeTest(){
+ // https://gitee.com/dromara/hutool/issues/I49KAL
+ // 见:https://stackoverflow.com/questions/26088849/url-fragment-allowed-characters
String url = "https://hutool.cn/docs/#/?id=简介";
- final UrlBuilder urlBuilder = UrlBuilder.ofHttp(url);
- Console.log(urlBuilder.toString());
+ UrlBuilder urlBuilder = UrlBuilder.ofHttp(url);
+ Assert.assertEquals("https://hutool.cn/docs/#/?id=%E7%AE%80%E4%BB%8B", urlBuilder.toString());
+
+ urlBuilder = UrlBuilder.ofHttp(urlBuilder.toString());
+ Assert.assertEquals(urlBuilder.toString(), urlBuilder.toString());
}
}