diff --git a/CHANGELOG.md b/CHANGELOG.md index 41c510b61..9fab546fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * 【core 】 MailAccount增加setEncodefilename()方法,可选是否编码附件的文件名(issue#I4F160@Gitee) * 【core 】 MailAccount中charset增加null时的默认规则 * 【core 】 NumberUtil.compare修正注释说明(issue#I4FAJ1@Gitee) +* 【core 】 增加RFC3986类 ### 🐞Bug修复 * 【core 】 修复UrlBuilder.addPath歧义问题(issue#1912@Github) diff --git a/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java b/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java new file mode 100644 index 000000000..91faa74f6 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java @@ -0,0 +1,188 @@ +package cn.hutool.core.codec; + +import cn.hutool.core.util.CharUtil; +import cn.hutool.core.util.HexUtil; +import cn.hutool.core.util.StrUtil; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.util.BitSet; + +/** + * 百分号编码(Percent-encoding), 也称作URL编码(URL encoding)。
+ * 百分号编码可用于URI的编码,也可以用于"application/x-www-form-urlencoded"的MIME准备数据。 + * + *

+ * 百分号编码会对 URI 中不允许出现的字符或者其他特殊情况的允许的字符进行编码,对于被编码的字符,最终会转为以百分号"%“开头,后面跟着两位16进制数值的形式。 + * 举个例子,空格符(SP)是不允许的字符,在 ASCII 码对应的二进制值是"00100000”,最终转为"%20"。 + *

+ *

+ * 对于不同场景应遵循不同规范: + * + *

+ * + * @author looly + * @since 5.7.16 + */ +public class PercentCodec implements Serializable { + private static final long serialVersionUID = 1L; + + /** + * 从已知PercentCodec创建PercentCodec,会复制给定PercentCodec的安全字符 + * + * @param codec PercentCodec + * @return PercentCodec + */ + public static PercentCodec of(PercentCodec codec) { + return new PercentCodec((BitSet) codec.safeCharacters.clone()); + } + + /** + * 创建PercentCodec,使用指定字符串中的字符作为安全字符 + * + * @param chars 安全字符合集 + * @return PercentCodec + */ + public static PercentCodec of(CharSequence chars) { + final PercentCodec codec = new PercentCodec(); + final int length = chars.length(); + for (int i = 0; i < length; i++) { + codec.addSafe(chars.charAt(i)); + } + return codec; + } + + /** + * 存放安全编码 + */ + private final BitSet safeCharacters; + /** + * 是否编码空格为+ + */ + private boolean encodeSpaceAsPlus = false; + + /** + * 构造
+ * [a-zA-Z0-9]默认不被编码 + */ + public PercentCodec() { + this(new BitSet(256)); + } + + /** + * 构造 + * + * @param safeCharacters 安全字符,安全字符不被编码 + */ + public PercentCodec(BitSet safeCharacters) { + this.safeCharacters = safeCharacters; + } + + /** + * 增加安全字符
+ * 安全字符不被编码 + * + * @param c 字符 + * @return this + */ + public PercentCodec addSafe(char c) { + safeCharacters.set(c); + return this; + } + + /** + * 移除安全字符
+ * 安全字符不被编码 + * + * @param c 字符 + * @return this + */ + public PercentCodec removeSafe(char c) { + safeCharacters.clear(c); + return this; + } + + /** + * 增加安全字符到挡墙的PercentCodec + * + * @param codec PercentCodec + * @return this + */ + public PercentCodec or(PercentCodec codec) { + this.safeCharacters.or(codec.safeCharacters); + return this; + } + + /** + * 组合当前PercentCodec和指定PercentCodec为一个新的PercentCodec,安全字符为并集 + * + * @param codec PercentCodec + * @return 新的PercentCodec + */ + public PercentCodec orNew(PercentCodec codec) { + return of(this).or(codec); + } + + /** + * 是否将空格编码为+ + * + * @param encodeSpaceAsPlus 是否将空格编码为+ + * @return this + */ + public PercentCodec setEncodeSpaceAsPlus(boolean encodeSpaceAsPlus) { + this.encodeSpaceAsPlus = encodeSpaceAsPlus; + return this; + } + + /** + * 将URL中的字符串编码为%形式 + * + * @param path 需要编码的字符串 + * @param charset 编码, {@code null}返回原字符串,表示不编码 + * @return 编码后的字符串 + */ + public String encode(CharSequence path, Charset charset) { + if (null == charset || StrUtil.isEmpty(path)) { + return StrUtil.str(path); + } + + final StringBuilder rewrittenPath = new StringBuilder(path.length()); + final ByteArrayOutputStream buf = new ByteArrayOutputStream(); + final OutputStreamWriter writer = new OutputStreamWriter(buf, charset); + + int c; + for (int i = 0; i < path.length(); i++) { + c = path.charAt(i); + if (safeCharacters.get(c)) { + rewrittenPath.append((char) c); + } else if (encodeSpaceAsPlus && c == CharUtil.SPACE) { + // 对于空格单独处理 + rewrittenPath.append('+'); + } else { + // convert to external encoding before hex conversion + try { + writer.write((char) c); + writer.flush(); + } catch (IOException e) { + buf.reset(); + continue; + } + + byte[] ba = buf.toByteArray(); + for (byte toEncode : ba) { + // Converting each byte in the buffer + rewrittenPath.append('%'); + HexUtil.appendHex(rewrittenPath, toEncode, false); + } + buf.reset(); + } + } + return rewrittenPath.toString(); + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java b/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java new file mode 100644 index 000000000..b8ca1ad72 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java @@ -0,0 +1,98 @@ +package cn.hutool.core.net; + +import cn.hutool.core.codec.PercentCodec; + +/** + * rfc3986 : https://www.ietf.org/rfc/rfc3986.html 编码实现 + * + * @author looly + * @since 5.7.16 + */ +public class RFC3986 { + + /** + * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + */ + public static final PercentCodec GEN_DELIMS = PercentCodec.of(":/?#[]&"); + + /** + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + */ + public static final PercentCodec SUB_DELIMS = PercentCodec.of("!$&'()*+,;="); + + /** + * reserved = gen-delims / sub-delims + */ + public static final PercentCodec RESERVED = GEN_DELIMS.orNew(SUB_DELIMS); + + /** + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + */ + public static final PercentCodec UNRESERVED = PercentCodec.of(unreservedChars()); + + /** + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + */ + public static final PercentCodec PCHAR = UNRESERVED.orNew(SUB_DELIMS).or(PercentCodec.of(":@")); + + /** + * segment = pchar + */ + public static final PercentCodec SEGMENT = PCHAR; + /** + * segment-nz-nc = SEGMENT ; non-zero-length segment without any colon ":" + */ + public static final PercentCodec SEGMENT_NZ_NC = PercentCodec.of(SEGMENT).removeSafe(':'); + + /** + * path = segment / "/" + */ + public static final PercentCodec PATH = SEGMENT.orNew(PercentCodec.of("/")); + + /** + * query = pchar / "/" / "?" + */ + public static final PercentCodec QUERY = PCHAR.orNew(PercentCodec.of("/?")); + + /** + * fragment = pchar / "/" / "?" + */ + public static final PercentCodec FRAGMENT = QUERY; + + /** + * query中的key + */ + public static final PercentCodec QUERY_PARAM_NAME = PercentCodec.of(QUERY).removeSafe('&').removeSafe('='); + + /** + * query中的value + */ + public static final PercentCodec QUERY_PARAM_VALUE = PercentCodec.of(QUERY).removeSafe('&'); + + /** + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * + * @return unreserved字符 + */ + private static StringBuilder unreservedChars() { + StringBuilder sb = new StringBuilder(); + + // ALPHA + for (char c = 'A'; c <= 'Z'; c++) { + sb.append(c); + } + for (char c = 'a'; c <= 'z'; c++) { + sb.append(c); + } + + // DIGIT + for (char c = '0'; c <= '9'; c++) { + sb.append(c); + } + + // "-" / "." / "_" / "~" + sb.append("_.-~"); + + return sb; + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlBuilder.java b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlBuilder.java index f6a7c266d..708e9721a 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlBuilder.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlBuilder.java @@ -1,6 +1,7 @@ package cn.hutool.core.net.url; import cn.hutool.core.lang.Assert; +import cn.hutool.core.net.RFC3986; import cn.hutool.core.util.CharsetUtil; import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.URLUtil; @@ -425,7 +426,7 @@ public final class UrlBuilder implements Serializable { * @return 标识符,例如#后边的部分 */ public String getFragmentEncoded() { - return URLUtil.encodeFragment(this.fragment, this.charset); + return RFC3986.FRAGMENT.encode(this.fragment, this.charset); } /** diff --git a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java index 78e2c393e..bc91edf9b 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java @@ -2,10 +2,10 @@ package cn.hutool.core.net.url; import cn.hutool.core.collection.CollUtil; import cn.hutool.core.lang.Assert; +import cn.hutool.core.net.RFC3986; import cn.hutool.core.net.URLDecoder; import cn.hutool.core.util.CharUtil; import cn.hutool.core.util.StrUtil; -import cn.hutool.core.util.URLUtil; import java.nio.charset.Charset; import java.util.LinkedList; @@ -127,7 +127,7 @@ public class UrlPath { final StringBuilder builder = new StringBuilder(); for (String segment : segments) { - builder.append(CharUtil.SLASH).append(URLUtil.encodePathSegment(segment, charset)); + builder.append(CharUtil.SLASH).append(RFC3986.SEGMENT_NZ_NC.encode(segment, charset)); } if (withEngTag || StrUtil.isEmpty(builder)) { builder.append(CharUtil.SLASH); diff --git a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java index 8f882d9f5..69d5ec641 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlQuery.java @@ -5,6 +5,7 @@ import cn.hutool.core.collection.IterUtil; import cn.hutool.core.convert.Convert; import cn.hutool.core.map.MapUtil; import cn.hutool.core.map.TableMap; +import cn.hutool.core.net.RFC3986; import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.URLUtil; @@ -220,10 +221,15 @@ public class UrlQuery { } /** - * 构建URL查询字符串,即将key-value键值对转换为key1=v1&key2=&key3=v3形式 + * 构建URL查询字符串,即将key-value键值对转换为{@code key1=v1&key2=v2&key3=v3}形式。
+ * 对于{@code null}处理规则如下: + * * * @param charset encode编码,null表示不做encode编码 - * @param isEncode 是否转义键和值 + * @param isEncode 是否转义键和值,转义遵循rfc3986规范 * @return URL查询字符串 * @since 5.7.13 */ @@ -233,21 +239,18 @@ public class UrlQuery { } final StringBuilder sb = new StringBuilder(); - boolean isFirst = true; - CharSequence key; + CharSequence name; CharSequence value; for (Map.Entry entry : this.query) { - if (isFirst) { - isFirst = false; - } else { - sb.append("&"); - } - key = entry.getKey(); - if (null != key) { - sb.append(toStr(key, charset, isEncode)); + name = entry.getKey(); + if (null != name) { + if(sb.length() >0){ + sb.append("&"); + } + sb.append(isEncode ? RFC3986.QUERY_PARAM_NAME.encode(name, charset) : name); value = entry.getValue(); if (null != value) { - sb.append("=").append(toStr(value, charset, isEncode)); + sb.append("=").append(isEncode ? RFC3986.QUERY_PARAM_VALUE.encode(value, charset) : value); } } } @@ -301,18 +304,18 @@ public class UrlQuery { } /** - * 键值对的{@link CharSequence}转换为String,可选是否转义 + * 键值对的name转换为 * * @param str 原字符串 * @param charset 编码,只用于encode中 - * @param isEncode 是否转义 + * @param isEncode 是否转义,转义遵循rfc3986规范 * @return 转换后的String * @since 5.7.13 */ - private static String toStr(CharSequence str, Charset charset, boolean isEncode) { + private static String nameToStr(CharSequence str, Charset charset, boolean isEncode) { String result = StrUtil.str(str); if (isEncode) { - result = URLUtil.encodeFragment(result, charset); + result = RFC3986.QUERY_PARAM_NAME.encode(result, charset); } return result; } diff --git a/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java b/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java index d4ab80682..85d11ec00 100644 --- a/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/net/UrlQueryTest.java @@ -63,4 +63,40 @@ public class UrlQueryTest { query = URLUtil.buildQuery(map, StandardCharsets.UTF_8); Assert.assertEquals("password=123456&username=SSM", query); } + + @Test + public void buildHasNullTest() { + Map map = new LinkedHashMap<>(); + map.put(null, "SSM"); + map.put("password", "123456"); + String query = URLUtil.buildQuery(map, StandardCharsets.UTF_8); + Assert.assertEquals("password=123456", query); + + map = new TreeMap<>(); + map.put("username", "SSM"); + map.put("password", ""); + query = URLUtil.buildQuery(map, StandardCharsets.UTF_8); + Assert.assertEquals("password=&username=SSM", query); + + map = new TreeMap<>(); + map.put("username", "SSM"); + map.put("password", null); + query = URLUtil.buildQuery(map, StandardCharsets.UTF_8); + Assert.assertEquals("password&username=SSM", query); + } + + @Test + public void buildSpecialTest() { + Map map = new LinkedHashMap<>(); + map.put("key1&", "SSM"); + map.put("key2", "123456&"); + String query = URLUtil.buildQuery(map, StandardCharsets.UTF_8); + Assert.assertEquals("key1%26=SSM&key2=123456%26", query); + + map = new TreeMap<>(); + map.put("username=", "SSM"); + map.put("password", "="); + query = URLUtil.buildQuery(map, StandardCharsets.UTF_8); + Assert.assertEquals("password==&username%3D=SSM", query); + } } diff --git a/hutool-http/src/test/java/cn/hutool/http/HttpUtilTest.java b/hutool-http/src/test/java/cn/hutool/http/HttpUtilTest.java index b89e7ea99..e829d7752 100644 --- a/hutool-http/src/test/java/cn/hutool/http/HttpUtilTest.java +++ b/hutool-http/src/test/java/cn/hutool/http/HttpUtilTest.java @@ -346,5 +346,4 @@ public class HttpUtilTest { .execute().body(); Console.log(body); } - }