diff --git a/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java b/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java index 91faa74f6..3cb1a51d5 100644 --- a/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java +++ b/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java @@ -62,8 +62,11 @@ public class PercentCodec implements Serializable { * 存放安全编码 */ private final BitSet safeCharacters; + /** - * 是否编码空格为+ + * 是否编码空格为+
+ * 如果为{@code true},则将空格编码为"+",此项只在"application/x-www-form-urlencoded"中使用
+ * 如果为{@code false},则空格编码为"%20",此项一般用于URL的Query部分(RFC3986规范) */ private boolean encodeSpaceAsPlus = false; @@ -130,7 +133,9 @@ public class PercentCodec implements Serializable { } /** - * 是否将空格编码为+ + * 是否将空格编码为+
+ * 如果为{@code true},则将空格编码为"+",此项只在"application/x-www-form-urlencoded"中使用
+ * 如果为{@code false},则空格编码为"%20",此项一般用于URL的Query部分(RFC3986规范) * * @param encodeSpaceAsPlus 是否将空格编码为+ * @return this diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/RegexPool.java b/hutool-core/src/main/java/cn/hutool/core/lang/RegexPool.java index 330775887..7d1295e0c 100755 --- a/hutool-core/src/main/java/cn/hutool/core/lang/RegexPool.java +++ b/hutool-core/src/main/java/cn/hutool/core/lang/RegexPool.java @@ -100,6 +100,11 @@ public interface RegexPool { * 生日 */ String BIRTHDAY = "^(\\d{2,4})([/\\-.年]?)(\\d{1,2})([/\\-.月]?)(\\d{1,2})日?$"; + /** + * URI
+ * 定义见:https://www.ietf.org/rfc/rfc3986.html#appendix-B + */ + String URI = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; /** * URL */ diff --git a/hutool-core/src/main/java/cn/hutool/core/net/FormUrlencoded.java b/hutool-core/src/main/java/cn/hutool/core/net/FormUrlencoded.java new file mode 100644 index 000000000..5de8513f2 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/net/FormUrlencoded.java @@ -0,0 +1,25 @@ +package cn.hutool.core.net; + +import cn.hutool.core.codec.PercentCodec; + +/** + * application/x-www-form-urlencoded,遵循W3C HTML Form content types规范,如空格须转+,+须被编码
+ * 规范见:https://url.spec.whatwg.org/#urlencoded-serializing + * + * @since 5.7.16 + */ +public class FormUrlencoded { + + /** + * query中的value
+ * value不能包含"{@code &}",可以包含 "=" + */ + public static final PercentCodec QUERY_PARAM_VALUE = PercentCodec.of(RFC3986.QUERY_PARAM_VALUE) + .setEncodeSpaceAsPlus(true).removeSafe('+'); + + /** + * query中的key
+ * key不能包含"{@code &}" 和 "=" + */ + public static final PercentCodec QUERY_PARAM_NAME = QUERY_PARAM_VALUE.removeSafe('='); +} diff --git a/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java b/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java index 713b082c2..a40764932 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java @@ -3,7 +3,8 @@ package cn.hutool.core.net; import cn.hutool.core.codec.PercentCodec; /** - * rfc3986 : https://www.ietf.org/rfc/rfc3986.html 编码实现 + * rfc3986 : https://www.ietf.org/rfc/rfc3986.html 编码实现
+ * 定义见:https://www.ietf.org/rfc/rfc3986.html#appendix-A * * @author looly * @since 5.7.16 @@ -21,12 +22,14 @@ public class RFC3986 { public static final PercentCodec SUB_DELIMS = PercentCodec.of("!$&'()*+,;="); /** - * reserved = gen-delims / sub-delims + * reserved = gen-delims / sub-delims
+ * see:https://www.ietf.org/rfc/rfc3986.html#section-2.2 */ public static final PercentCodec RESERVED = GEN_DELIMS.orNew(SUB_DELIMS); /** - * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * see: https://www.ietf.org/rfc/rfc3986.html#section-2.3 */ public static final PercentCodec UNRESERVED = PercentCodec.of(unreservedChars()); @@ -36,7 +39,8 @@ public class RFC3986 { public static final PercentCodec PCHAR = UNRESERVED.orNew(SUB_DELIMS).or(PercentCodec.of(":@")); /** - * segment = pchar + * segment = pchar
+ * see: https://www.ietf.org/rfc/rfc3986.html#section-3.3 */ public static final PercentCodec SEGMENT = PCHAR; /** @@ -60,15 +64,17 @@ public class RFC3986 { public static final PercentCodec FRAGMENT = QUERY; /** - * query中的key - */ - public static final PercentCodec QUERY_PARAM_NAME = PercentCodec.of(QUERY).removeSafe('&').removeSafe('='); - - /** - * query中的value + * query中的value
+ * value不能包含"{@code &}",可以包含 "=" */ public static final PercentCodec QUERY_PARAM_VALUE = PercentCodec.of(QUERY).removeSafe('&'); + /** + * query中的key
+ * key不能包含"{@code &}" 和 "=" + */ + public static final PercentCodec QUERY_PARAM_NAME = QUERY_PARAM_VALUE.removeSafe('='); + /** * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" * diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java b/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java index 8ae9bc59f..bd0adc849 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java @@ -41,9 +41,10 @@ public class URLDecoder implements Serializable { } /** - * 解码 + * 解码
+ * 规则见:https://url.spec.whatwg.org/#urlencoded-parsing *
-	 *   1. 将+和%20转换为空格 ;
+	 *   1. 将+和%20转换为空格(" ");
 	 *   2. 将"%xy"转换为文本形式,xy是两位16进制的数值;
 	 *   3. 跳过不符合规范的%形式,直接输出
 	 * 
diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java b/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java index 94d462bd7..cc01cd212 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/URLEncodeUtil.java @@ -7,7 +7,8 @@ import cn.hutool.core.util.StrUtil; import java.nio.charset.Charset; /** - * URL编码工具 + * URL编码工具
+ * TODO 在6.x中移除此工具(无法很好区分URL编码和www-form编码) * * @since 5.7.13 * @author looly diff --git a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java index bc91edf9b..890c14b66 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java @@ -127,6 +127,9 @@ public class UrlPath { final StringBuilder builder = new StringBuilder(); for (String segment : segments) { + // 根据https://www.ietf.org/rfc/rfc3986.html#section-3.3定义 + // path的第一部分允许有":",其余部分不允许 + // 在此处的Path部分特指host之后的部分,即不包含第一部分 builder.append(CharUtil.SLASH).append(RFC3986.SEGMENT_NZ_NC.encode(segment, charset)); } if (withEngTag || StrUtil.isEmpty(builder)) { diff --git a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java index cd7fff640..536b08c0d 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java @@ -144,48 +144,7 @@ public class UrlQuery { } } - final int len = queryStr.length(); - String name = null; - int pos = 0; // 未处理字符开始位置 - int i; // 未处理字符结束位置 - char c; // 当前字符 - for (i = 0; i < len; i++) { - c = queryStr.charAt(i); - switch (c) { - case '='://键和值的分界符 - if (null == name) { - // name可以是"" - name = queryStr.substring(pos, i); - // 开始位置从分节符后开始 - pos = i + 1; - } - // 当=不作为分界符时,按照普通字符对待 - break; - case '&'://键值对之间的分界符 - addParam(name, queryStr.substring(pos, i), charset); - name = null; - if (i + 4 < len && "amp;".equals(queryStr.substring(i + 1, i + 5))) { - // issue#850@Github,"&"转义为"&" - i += 4; - } - // 开始位置从分节符后开始 - pos = i + 1; - break; - } - } - - if (i - pos == len) { - // 没有任何参数符号 - if (queryStr.startsWith("http") || queryStr.contains("/")) { - // 可能为url路径,忽略之 - return this; - } - } - - // 处理结尾 - addParam(name, queryStr.substring(pos, i), charset); - - return this; + return doParse(queryStr, charset); } /** @@ -250,6 +209,60 @@ public class UrlQuery { return build(null); } + /** + * 解析URL中的查询字符串
+ * 规则见:https://url.spec.whatwg.org/#urlencoded-parsing + * + * @param queryStr 查询字符串,类似于key1=v1&key2=&key3=v3 + * @param charset decode编码,null表示不做decode + * @return this + * @since 5.5.8 + */ + private UrlQuery doParse(String queryStr, Charset charset) { + final int len = queryStr.length(); + String name = null; + int pos = 0; // 未处理字符开始位置 + int i; // 未处理字符结束位置 + char c; // 当前字符 + for (i = 0; i < len; i++) { + c = queryStr.charAt(i); + switch (c) { + case '='://键和值的分界符 + if (null == name) { + // name可以是"" + name = queryStr.substring(pos, i); + // 开始位置从分节符后开始 + pos = i + 1; + } + // 当=不作为分界符时,按照普通字符对待 + break; + case '&'://键值对之间的分界符 + addParam(name, queryStr.substring(pos, i), charset); + name = null; + if (i + 4 < len && "amp;".equals(queryStr.substring(i + 1, i + 5))) { + // issue#850@Github,"&"转义为"&" + i += 4; + } + // 开始位置从分节符后开始 + pos = i + 1; + break; + } + } + + if (i - pos == len) { + // 没有任何参数符号 + if (queryStr.startsWith("http") || queryStr.contains("/")) { + // 可能为url路径,忽略之 + return this; + } + } + + // 处理结尾 + addParam(name, queryStr.substring(pos, i), charset); + + return this; + } + /** * 对象转换为字符串,用于URL的Query中 * diff --git a/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java index 045fc72b8..5745bce89 100644 --- a/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java @@ -319,7 +319,8 @@ public class URLUtil extends URLEncodeUtil { /** * 解码application/x-www-form-urlencoded字符
- * 将%开头的16进制表示的内容解码。 + * 将%开头的16进制表示的内容解码。
+ * 规则见:https://url.spec.whatwg.org/#urlencoded-parsing * * @param content 被解码内容 * @param charset 编码,null表示不解码 diff --git a/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java b/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java index 85d11ec00..ca5f9581a 100644 --- a/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java @@ -3,6 +3,7 @@ package cn.hutool.core.net; import cn.hutool.core.map.MapUtil; import cn.hutool.core.net.url.UrlBuilder; import cn.hutool.core.net.url.UrlQuery; +import cn.hutool.core.util.CharsetUtil; import cn.hutool.core.util.URLUtil; import org.junit.Assert; import org.junit.Test; @@ -99,4 +100,18 @@ public class UrlQueryTest { query = URLUtil.buildQuery(map, StandardCharsets.UTF_8); Assert.assertEquals("password==&username%3D=SSM", query); } + + @Test + public void plusTest(){ + // 根据RFC3986,在URL中,+是安全字符,即此符号不转义 + final String a = UrlQuery.of(MapUtil.of("a+b", "1+2")).build(CharsetUtil.CHARSET_UTF_8); + Assert.assertEquals("a+b=1+2", a); + } + + @Test + public void spaceTest(){ + // 根据RFC3986,在URL中,空格编码为"%20" + final String a = UrlQuery.of(MapUtil.of("a ", " ")).build(CharsetUtil.CHARSET_UTF_8); + Assert.assertEquals("a%20=%20", a); + } } diff --git a/hutool-http/src/main/java/cn/hutool/http/HttpUtil.java b/hutool-http/src/main/java/cn/hutool/http/HttpUtil.java index b7f485b0b..644300c47 100644 --- a/hutool-http/src/main/java/cn/hutool/http/HttpUtil.java +++ b/hutool-http/src/main/java/cn/hutool/http/HttpUtil.java @@ -6,6 +6,7 @@ import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.IoUtil; import cn.hutool.core.io.StreamProgress; import cn.hutool.core.map.MapUtil; +import cn.hutool.core.net.RFC3986; import cn.hutool.core.net.url.UrlQuery; import cn.hutool.core.text.StrBuilder; import cn.hutool.core.util.CharsetUtil; @@ -557,9 +558,10 @@ public class HttpUtil { if (null == name) { // 对于像&a&这类无参数值的字符串,我们将name为a的值设为"" name = paramPart.substring(pos, i); - builder.append(URLUtil.encodeQuery(name, charset)).append('='); + builder.append(RFC3986.QUERY_PARAM_NAME.encode(name, charset)).append('='); } else { - builder.append(URLUtil.encodeQuery(name, charset)).append('=').append(URLUtil.encodeQuery(paramPart.substring(pos, i), charset)).append('&'); + builder.append(RFC3986.QUERY_PARAM_NAME.encode(name, charset)).append('=') + .append(RFC3986.QUERY_PARAM_VALUE.encode(paramPart.substring(pos, i), charset)).append('&'); } name = null; }