diff --git a/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java b/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java
index 91faa74f6..3cb1a51d5 100644
--- a/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java
+++ b/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java
@@ -62,8 +62,11 @@ public class PercentCodec implements Serializable {
* 存放安全编码
*/
private final BitSet safeCharacters;
+
/**
- * 是否编码空格为+
+ * 是否编码空格为+
+ * 如果为{@code true},则将空格编码为"+",此项只在"application/x-www-form-urlencoded"中使用
+ * 如果为{@code false},则空格编码为"%20",此项一般用于URL的Query部分(RFC3986规范)
*/
private boolean encodeSpaceAsPlus = false;
@@ -130,7 +133,9 @@ public class PercentCodec implements Serializable {
}
/**
- * 是否将空格编码为+
+ * 是否将空格编码为+
+ * 如果为{@code true},则将空格编码为"+",此项只在"application/x-www-form-urlencoded"中使用
+ * 如果为{@code false},则空格编码为"%20",此项一般用于URL的Query部分(RFC3986规范)
*
* @param encodeSpaceAsPlus 是否将空格编码为+
* @return this
diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/RegexPool.java b/hutool-core/src/main/java/cn/hutool/core/lang/RegexPool.java
index 330775887..7d1295e0c 100755
--- a/hutool-core/src/main/java/cn/hutool/core/lang/RegexPool.java
+++ b/hutool-core/src/main/java/cn/hutool/core/lang/RegexPool.java
@@ -100,6 +100,11 @@ public interface RegexPool {
* 生日
*/
String BIRTHDAY = "^(\\d{2,4})([/\\-.年]?)(\\d{1,2})([/\\-.月]?)(\\d{1,2})日?$";
+ /**
+ * URI
+ * 定义见:https://www.ietf.org/rfc/rfc3986.html#appendix-B
+ */
+ String URI = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
/**
* URL
*/
diff --git a/hutool-core/src/main/java/cn/hutool/core/net/FormUrlencoded.java b/hutool-core/src/main/java/cn/hutool/core/net/FormUrlencoded.java
new file mode 100644
index 000000000..5de8513f2
--- /dev/null
+++ b/hutool-core/src/main/java/cn/hutool/core/net/FormUrlencoded.java
@@ -0,0 +1,25 @@
+package cn.hutool.core.net;
+
+import cn.hutool.core.codec.PercentCodec;
+
+/**
+ * application/x-www-form-urlencoded,遵循W3C HTML Form content types规范,如空格须转+,+须被编码
+ * 规范见:https://url.spec.whatwg.org/#urlencoded-serializing
+ *
+ * @since 5.7.16
+ */
+public class FormUrlencoded {
+
+ /**
+ * query中的value
+ * value不能包含"{@code &}",可以包含 "="
+ */
+ public static final PercentCodec QUERY_PARAM_VALUE = PercentCodec.of(RFC3986.QUERY_PARAM_VALUE)
+ .setEncodeSpaceAsPlus(true).removeSafe('+');
+
+ /**
+ * query中的key
+ * key不能包含"{@code &}" 和 "="
+ */
+ public static final PercentCodec QUERY_PARAM_NAME = QUERY_PARAM_VALUE.removeSafe('=');
+}
diff --git a/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java b/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java
index 713b082c2..a40764932 100644
--- a/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java
+++ b/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java
@@ -3,7 +3,8 @@ package cn.hutool.core.net;
import cn.hutool.core.codec.PercentCodec;
/**
- * rfc3986 : https://www.ietf.org/rfc/rfc3986.html 编码实现
+ * rfc3986 : https://www.ietf.org/rfc/rfc3986.html 编码实现
+ * 定义见:https://www.ietf.org/rfc/rfc3986.html#appendix-A
*
* @author looly
* @since 5.7.16
@@ -21,12 +22,14 @@ public class RFC3986 {
public static final PercentCodec SUB_DELIMS = PercentCodec.of("!$&'()*+,;=");
/**
- * reserved = gen-delims / sub-delims
+ * reserved = gen-delims / sub-delims
+ * see:https://www.ietf.org/rfc/rfc3986.html#section-2.2
*/
public static final PercentCodec RESERVED = GEN_DELIMS.orNew(SUB_DELIMS);
/**
- * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * see: https://www.ietf.org/rfc/rfc3986.html#section-2.3
*/
public static final PercentCodec UNRESERVED = PercentCodec.of(unreservedChars());
@@ -36,7 +39,8 @@ public class RFC3986 {
public static final PercentCodec PCHAR = UNRESERVED.orNew(SUB_DELIMS).or(PercentCodec.of(":@"));
/**
- * segment = pchar
+ * segment = pchar
+ * see: https://www.ietf.org/rfc/rfc3986.html#section-3.3
*/
public static final PercentCodec SEGMENT = PCHAR;
/**
@@ -60,15 +64,17 @@ public class RFC3986 {
public static final PercentCodec FRAGMENT = QUERY;
/**
- * query中的key
- */
- public static final PercentCodec QUERY_PARAM_NAME = PercentCodec.of(QUERY).removeSafe('&').removeSafe('=');
-
- /**
- * query中的value
+ * query中的value
+ * value不能包含"{@code &}",可以包含 "="
*/
public static final PercentCodec QUERY_PARAM_VALUE = PercentCodec.of(QUERY).removeSafe('&');
+ /**
+ * query中的key
+ * key不能包含"{@code &}" 和 "="
+ */
+ public static final PercentCodec QUERY_PARAM_NAME = QUERY_PARAM_VALUE.removeSafe('=');
+
/**
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
*
diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java b/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java
index 8ae9bc59f..bd0adc849 100644
--- a/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java
+++ b/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java
@@ -41,9 +41,10 @@ public class URLDecoder implements Serializable {
}
/**
- * 解码
+ * 解码
+ * 规则见:https://url.spec.whatwg.org/#urlencoded-parsing
*
- * 1. 将+和%20转换为空格 ;
+ * 1. 将+和%20转换为空格(" ");
* 2. 将"%xy"转换为文本形式,xy是两位16进制的数值;
* 3. 跳过不符合规范的%形式,直接输出
*
diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java b/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java
index 94d462bd7..cc01cd212 100644
--- a/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java
+++ b/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java
@@ -7,7 +7,8 @@ import cn.hutool.core.util.StrUtil;
import java.nio.charset.Charset;
/**
- * URL编码工具
+ * URL编码工具
+ * TODO 在6.x中移除此工具(无法很好区分URL编码和www-form编码)
*
* @since 5.7.13
* @author looly
diff --git a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java
index bc91edf9b..890c14b66 100644
--- a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java
+++ b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java
@@ -127,6 +127,9 @@ public class UrlPath {
final StringBuilder builder = new StringBuilder();
for (String segment : segments) {
+ // 根据https://www.ietf.org/rfc/rfc3986.html#section-3.3定义
+ // path的第一部分允许有":",其余部分不允许
+ // 在此处的Path部分特指host之后的部分,即不包含第一部分
builder.append(CharUtil.SLASH).append(RFC3986.SEGMENT_NZ_NC.encode(segment, charset));
}
if (withEngTag || StrUtil.isEmpty(builder)) {
diff --git a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java
index cd7fff640..536b08c0d 100644
--- a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java
+++ b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java
@@ -144,48 +144,7 @@ public class UrlQuery {
}
}
- final int len = queryStr.length();
- String name = null;
- int pos = 0; // 未处理字符开始位置
- int i; // 未处理字符结束位置
- char c; // 当前字符
- for (i = 0; i < len; i++) {
- c = queryStr.charAt(i);
- switch (c) {
- case '='://键和值的分界符
- if (null == name) {
- // name可以是""
- name = queryStr.substring(pos, i);
- // 开始位置从分节符后开始
- pos = i + 1;
- }
- // 当=不作为分界符时,按照普通字符对待
- break;
- case '&'://键值对之间的分界符
- addParam(name, queryStr.substring(pos, i), charset);
- name = null;
- if (i + 4 < len && "amp;".equals(queryStr.substring(i + 1, i + 5))) {
- // issue#850@Github,"&"转义为"&"
- i += 4;
- }
- // 开始位置从分节符后开始
- pos = i + 1;
- break;
- }
- }
-
- if (i - pos == len) {
- // 没有任何参数符号
- if (queryStr.startsWith("http") || queryStr.contains("/")) {
- // 可能为url路径,忽略之
- return this;
- }
- }
-
- // 处理结尾
- addParam(name, queryStr.substring(pos, i), charset);
-
- return this;
+ return doParse(queryStr, charset);
}
/**
@@ -250,6 +209,60 @@ public class UrlQuery {
return build(null);
}
+ /**
+ * 解析URL中的查询字符串
+ * 规则见:https://url.spec.whatwg.org/#urlencoded-parsing
+ *
+ * @param queryStr 查询字符串,类似于key1=v1&key2=&key3=v3
+ * @param charset decode编码,null表示不做decode
+ * @return this
+ * @since 5.5.8
+ */
+ private UrlQuery doParse(String queryStr, Charset charset) {
+ final int len = queryStr.length();
+ String name = null;
+ int pos = 0; // 未处理字符开始位置
+ int i; // 未处理字符结束位置
+ char c; // 当前字符
+ for (i = 0; i < len; i++) {
+ c = queryStr.charAt(i);
+ switch (c) {
+ case '='://键和值的分界符
+ if (null == name) {
+ // name可以是""
+ name = queryStr.substring(pos, i);
+ // 开始位置从分节符后开始
+ pos = i + 1;
+ }
+ // 当=不作为分界符时,按照普通字符对待
+ break;
+ case '&'://键值对之间的分界符
+ addParam(name, queryStr.substring(pos, i), charset);
+ name = null;
+ if (i + 4 < len && "amp;".equals(queryStr.substring(i + 1, i + 5))) {
+ // issue#850@Github,"&"转义为"&"
+ i += 4;
+ }
+ // 开始位置从分节符后开始
+ pos = i + 1;
+ break;
+ }
+ }
+
+ if (i - pos == len) {
+ // 没有任何参数符号
+ if (queryStr.startsWith("http") || queryStr.contains("/")) {
+ // 可能为url路径,忽略之
+ return this;
+ }
+ }
+
+ // 处理结尾
+ addParam(name, queryStr.substring(pos, i), charset);
+
+ return this;
+ }
+
/**
* 对象转换为字符串,用于URL的Query中
*
diff --git a/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java
index 045fc72b8..5745bce89 100644
--- a/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java
+++ b/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java
@@ -319,7 +319,8 @@ public class URLUtil extends URLEncodeUtil {
/**
* 解码application/x-www-form-urlencoded字符
- * 将%开头的16进制表示的内容解码。
+ * 将%开头的16进制表示的内容解码。
+ * 规则见:https://url.spec.whatwg.org/#urlencoded-parsing
*
* @param content 被解码内容
* @param charset 编码,null表示不解码
diff --git a/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java b/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java
index 85d11ec00..ca5f9581a 100644
--- a/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java
+++ b/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java
@@ -3,6 +3,7 @@ package cn.hutool.core.net;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.net.url.UrlBuilder;
import cn.hutool.core.net.url.UrlQuery;
+import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.URLUtil;
import org.junit.Assert;
import org.junit.Test;
@@ -99,4 +100,18 @@ public class UrlQueryTest {
query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
Assert.assertEquals("password==&username%3D=SSM", query);
}
+
+ @Test
+ public void plusTest(){
+ // 根据RFC3986,在URL中,+是安全字符,即此符号不转义
+ final String a = UrlQuery.of(MapUtil.of("a+b", "1+2")).build(CharsetUtil.CHARSET_UTF_8);
+ Assert.assertEquals("a+b=1+2", a);
+ }
+
+ @Test
+ public void spaceTest(){
+ // 根据RFC3986,在URL中,空格编码为"%20"
+ final String a = UrlQuery.of(MapUtil.of("a ", " ")).build(CharsetUtil.CHARSET_UTF_8);
+ Assert.assertEquals("a%20=%20", a);
+ }
}
diff --git a/hutool-http/src/main/java/cn/hutool/http/HttpUtil.java b/hutool-http/src/main/java/cn/hutool/http/HttpUtil.java
index b7f485b0b..644300c47 100644
--- a/hutool-http/src/main/java/cn/hutool/http/HttpUtil.java
+++ b/hutool-http/src/main/java/cn/hutool/http/HttpUtil.java
@@ -6,6 +6,7 @@ import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.io.StreamProgress;
import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.net.RFC3986;
import cn.hutool.core.net.url.UrlQuery;
import cn.hutool.core.text.StrBuilder;
import cn.hutool.core.util.CharsetUtil;
@@ -557,9 +558,10 @@ public class HttpUtil {
if (null == name) {
// 对于像&a&这类无参数值的字符串,我们将name为a的值设为""
name = paramPart.substring(pos, i);
- builder.append(URLUtil.encodeQuery(name, charset)).append('=');
+ builder.append(RFC3986.QUERY_PARAM_NAME.encode(name, charset)).append('=');
} else {
- builder.append(URLUtil.encodeQuery(name, charset)).append('=').append(URLUtil.encodeQuery(paramPart.substring(pos, i), charset)).append('&');
+ builder.append(RFC3986.QUERY_PARAM_NAME.encode(name, charset)).append('=')
+ .append(RFC3986.QUERY_PARAM_VALUE.encode(paramPart.substring(pos, i), charset)).append('&');
}
name = null;
}