mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-04-19 03:01:48 +08:00
add EFC3986
This commit is contained in:
parent
87b88b395e
commit
7248b65f40
@ -11,6 +11,7 @@
|
||||
* 【core 】 MailAccount增加setEncodefilename()方法,可选是否编码附件的文件名(issue#I4F160@Gitee)
|
||||
* 【core 】 MailAccount中charset增加null时的默认规则
|
||||
* 【core 】 NumberUtil.compare修正注释说明(issue#I4FAJ1@Gitee)
|
||||
* 【core 】 增加RFC3986类
|
||||
|
||||
### 🐞Bug修复
|
||||
* 【core 】 修复UrlBuilder.addPath歧义问题(issue#1912@Github)
|
||||
|
188
hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java
Normal file
188
hutool-core/src/main/java/cn/hutool/core/codec/PercentCodec.java
Normal file
@ -0,0 +1,188 @@
|
||||
package cn.hutool.core.codec;
|
||||
|
||||
import cn.hutool.core.util.CharUtil;
|
||||
import cn.hutool.core.util.HexUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Serializable;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* 百分号编码(Percent-encoding), 也称作URL编码(URL encoding)。<br>
|
||||
* 百分号编码可用于URI的编码,也可以用于"application/x-www-form-urlencoded"的MIME准备数据。
|
||||
*
|
||||
* <p>
|
||||
* 百分号编码会对 URI 中不允许出现的字符或者其他特殊情况的允许的字符进行编码,对于被编码的字符,最终会转为以百分号"%“开头,后面跟着两位16进制数值的形式。
|
||||
* 举个例子,空格符(SP)是不允许的字符,在 ASCII 码对应的二进制值是"00100000”,最终转为"%20"。
|
||||
* </p>
|
||||
* <p>
|
||||
* 对于不同场景应遵循不同规范:
|
||||
*
|
||||
* <ul>
|
||||
* <li>URI:遵循RFC 3986保留字规范</li>
|
||||
* <li>application/x-www-form-urlencoded,遵循W3C HTML Form content types规范,如空格须转+</li>
|
||||
* </ul>
|
||||
*
|
||||
* @author looly
|
||||
* @since 5.7.16
|
||||
*/
|
||||
public class PercentCodec implements Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* 从已知PercentCodec创建PercentCodec,会复制给定PercentCodec的安全字符
|
||||
*
|
||||
* @param codec PercentCodec
|
||||
* @return PercentCodec
|
||||
*/
|
||||
public static PercentCodec of(PercentCodec codec) {
|
||||
return new PercentCodec((BitSet) codec.safeCharacters.clone());
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建PercentCodec,使用指定字符串中的字符作为安全字符
|
||||
*
|
||||
* @param chars 安全字符合集
|
||||
* @return PercentCodec
|
||||
*/
|
||||
public static PercentCodec of(CharSequence chars) {
|
||||
final PercentCodec codec = new PercentCodec();
|
||||
final int length = chars.length();
|
||||
for (int i = 0; i < length; i++) {
|
||||
codec.addSafe(chars.charAt(i));
|
||||
}
|
||||
return codec;
|
||||
}
|
||||
|
||||
/**
|
||||
* 存放安全编码
|
||||
*/
|
||||
private final BitSet safeCharacters;
|
||||
/**
|
||||
* 是否编码空格为+
|
||||
*/
|
||||
private boolean encodeSpaceAsPlus = false;
|
||||
|
||||
/**
|
||||
* 构造<br>
|
||||
* [a-zA-Z0-9]默认不被编码
|
||||
*/
|
||||
public PercentCodec() {
|
||||
this(new BitSet(256));
|
||||
}
|
||||
|
||||
/**
|
||||
* 构造
|
||||
*
|
||||
* @param safeCharacters 安全字符,安全字符不被编码
|
||||
*/
|
||||
public PercentCodec(BitSet safeCharacters) {
|
||||
this.safeCharacters = safeCharacters;
|
||||
}
|
||||
|
||||
/**
|
||||
* 增加安全字符<br>
|
||||
* 安全字符不被编码
|
||||
*
|
||||
* @param c 字符
|
||||
* @return this
|
||||
*/
|
||||
public PercentCodec addSafe(char c) {
|
||||
safeCharacters.set(c);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 移除安全字符<br>
|
||||
* 安全字符不被编码
|
||||
*
|
||||
* @param c 字符
|
||||
* @return this
|
||||
*/
|
||||
public PercentCodec removeSafe(char c) {
|
||||
safeCharacters.clear(c);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 增加安全字符到挡墙的PercentCodec
|
||||
*
|
||||
* @param codec PercentCodec
|
||||
* @return this
|
||||
*/
|
||||
public PercentCodec or(PercentCodec codec) {
|
||||
this.safeCharacters.or(codec.safeCharacters);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 组合当前PercentCodec和指定PercentCodec为一个新的PercentCodec,安全字符为并集
|
||||
*
|
||||
* @param codec PercentCodec
|
||||
* @return 新的PercentCodec
|
||||
*/
|
||||
public PercentCodec orNew(PercentCodec codec) {
|
||||
return of(this).or(codec);
|
||||
}
|
||||
|
||||
/**
|
||||
* 是否将空格编码为+
|
||||
*
|
||||
* @param encodeSpaceAsPlus 是否将空格编码为+
|
||||
* @return this
|
||||
*/
|
||||
public PercentCodec setEncodeSpaceAsPlus(boolean encodeSpaceAsPlus) {
|
||||
this.encodeSpaceAsPlus = encodeSpaceAsPlus;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* 将URL中的字符串编码为%形式
|
||||
*
|
||||
* @param path 需要编码的字符串
|
||||
* @param charset 编码, {@code null}返回原字符串,表示不编码
|
||||
* @return 编码后的字符串
|
||||
*/
|
||||
public String encode(CharSequence path, Charset charset) {
|
||||
if (null == charset || StrUtil.isEmpty(path)) {
|
||||
return StrUtil.str(path);
|
||||
}
|
||||
|
||||
final StringBuilder rewrittenPath = new StringBuilder(path.length());
|
||||
final ByteArrayOutputStream buf = new ByteArrayOutputStream();
|
||||
final OutputStreamWriter writer = new OutputStreamWriter(buf, charset);
|
||||
|
||||
int c;
|
||||
for (int i = 0; i < path.length(); i++) {
|
||||
c = path.charAt(i);
|
||||
if (safeCharacters.get(c)) {
|
||||
rewrittenPath.append((char) c);
|
||||
} else if (encodeSpaceAsPlus && c == CharUtil.SPACE) {
|
||||
// 对于空格单独处理
|
||||
rewrittenPath.append('+');
|
||||
} else {
|
||||
// convert to external encoding before hex conversion
|
||||
try {
|
||||
writer.write((char) c);
|
||||
writer.flush();
|
||||
} catch (IOException e) {
|
||||
buf.reset();
|
||||
continue;
|
||||
}
|
||||
|
||||
byte[] ba = buf.toByteArray();
|
||||
for (byte toEncode : ba) {
|
||||
// Converting each byte in the buffer
|
||||
rewrittenPath.append('%');
|
||||
HexUtil.appendHex(rewrittenPath, toEncode, false);
|
||||
}
|
||||
buf.reset();
|
||||
}
|
||||
}
|
||||
return rewrittenPath.toString();
|
||||
}
|
||||
}
|
98
hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java
Normal file
98
hutool-core/src/main/java/cn/hutool/core/net/RFC3986.java
Normal file
@ -0,0 +1,98 @@
|
||||
package cn.hutool.core.net;
|
||||
|
||||
import cn.hutool.core.codec.PercentCodec;
|
||||
|
||||
/**
|
||||
* rfc3986 : https://www.ietf.org/rfc/rfc3986.html 编码实现
|
||||
*
|
||||
* @author looly
|
||||
* @since 5.7.16
|
||||
*/
|
||||
public class RFC3986 {
|
||||
|
||||
/**
|
||||
* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
|
||||
*/
|
||||
public static final PercentCodec GEN_DELIMS = PercentCodec.of(":/?#[]&");
|
||||
|
||||
/**
|
||||
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
|
||||
*/
|
||||
public static final PercentCodec SUB_DELIMS = PercentCodec.of("!$&'()*+,;=");
|
||||
|
||||
/**
|
||||
* reserved = gen-delims / sub-delims
|
||||
*/
|
||||
public static final PercentCodec RESERVED = GEN_DELIMS.orNew(SUB_DELIMS);
|
||||
|
||||
/**
|
||||
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||
*/
|
||||
public static final PercentCodec UNRESERVED = PercentCodec.of(unreservedChars());
|
||||
|
||||
/**
|
||||
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
|
||||
*/
|
||||
public static final PercentCodec PCHAR = UNRESERVED.orNew(SUB_DELIMS).or(PercentCodec.of(":@"));
|
||||
|
||||
/**
|
||||
* segment = pchar
|
||||
*/
|
||||
public static final PercentCodec SEGMENT = PCHAR;
|
||||
/**
|
||||
* segment-nz-nc = SEGMENT ; non-zero-length segment without any colon ":"
|
||||
*/
|
||||
public static final PercentCodec SEGMENT_NZ_NC = PercentCodec.of(SEGMENT).removeSafe(':');
|
||||
|
||||
/**
|
||||
* path = segment / "/"
|
||||
*/
|
||||
public static final PercentCodec PATH = SEGMENT.orNew(PercentCodec.of("/"));
|
||||
|
||||
/**
|
||||
* query = pchar / "/" / "?"
|
||||
*/
|
||||
public static final PercentCodec QUERY = PCHAR.orNew(PercentCodec.of("/?"));
|
||||
|
||||
/**
|
||||
* fragment = pchar / "/" / "?"
|
||||
*/
|
||||
public static final PercentCodec FRAGMENT = QUERY;
|
||||
|
||||
/**
|
||||
* query中的key
|
||||
*/
|
||||
public static final PercentCodec QUERY_PARAM_NAME = PercentCodec.of(QUERY).removeSafe('&').removeSafe('=');
|
||||
|
||||
/**
|
||||
* query中的value
|
||||
*/
|
||||
public static final PercentCodec QUERY_PARAM_VALUE = PercentCodec.of(QUERY).removeSafe('&');
|
||||
|
||||
/**
|
||||
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||
*
|
||||
* @return unreserved字符
|
||||
*/
|
||||
private static StringBuilder unreservedChars() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
// ALPHA
|
||||
for (char c = 'A'; c <= 'Z'; c++) {
|
||||
sb.append(c);
|
||||
}
|
||||
for (char c = 'a'; c <= 'z'; c++) {
|
||||
sb.append(c);
|
||||
}
|
||||
|
||||
// DIGIT
|
||||
for (char c = '0'; c <= '9'; c++) {
|
||||
sb.append(c);
|
||||
}
|
||||
|
||||
// "-" / "." / "_" / "~"
|
||||
sb.append("_.-~");
|
||||
|
||||
return sb;
|
||||
}
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
package cn.hutool.core.net.url;
|
||||
|
||||
import cn.hutool.core.lang.Assert;
|
||||
import cn.hutool.core.net.RFC3986;
|
||||
import cn.hutool.core.util.CharsetUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.core.util.URLUtil;
|
||||
@ -425,7 +426,7 @@ public final class UrlBuilder implements Serializable {
|
||||
* @return 标识符,例如#后边的部分
|
||||
*/
|
||||
public String getFragmentEncoded() {
|
||||
return URLUtil.encodeFragment(this.fragment, this.charset);
|
||||
return RFC3986.FRAGMENT.encode(this.fragment, this.charset);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2,10 +2,10 @@ package cn.hutool.core.net.url;
|
||||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.lang.Assert;
|
||||
import cn.hutool.core.net.RFC3986;
|
||||
import cn.hutool.core.net.URLDecoder;
|
||||
import cn.hutool.core.util.CharUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.core.util.URLUtil;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.LinkedList;
|
||||
@ -127,7 +127,7 @@ public class UrlPath {
|
||||
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
for (String segment : segments) {
|
||||
builder.append(CharUtil.SLASH).append(URLUtil.encodePathSegment(segment, charset));
|
||||
builder.append(CharUtil.SLASH).append(RFC3986.SEGMENT_NZ_NC.encode(segment, charset));
|
||||
}
|
||||
if (withEngTag || StrUtil.isEmpty(builder)) {
|
||||
builder.append(CharUtil.SLASH);
|
||||
|
@ -5,6 +5,7 @@ import cn.hutool.core.collection.IterUtil;
|
||||
import cn.hutool.core.convert.Convert;
|
||||
import cn.hutool.core.map.MapUtil;
|
||||
import cn.hutool.core.map.TableMap;
|
||||
import cn.hutool.core.net.RFC3986;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.core.util.URLUtil;
|
||||
|
||||
@ -220,10 +221,15 @@ public class UrlQuery {
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建URL查询字符串,即将key-value键值对转换为key1=v1&key2=&key3=v3形式
|
||||
* 构建URL查询字符串,即将key-value键值对转换为{@code key1=v1&key2=v2&key3=v3}形式。<br>
|
||||
* 对于{@code null}处理规则如下:
|
||||
* <ul>
|
||||
* <li>如果key为{@code null},则这个键值对忽略</li>
|
||||
* <li>如果value为{@code null},只保留key,如key1对应value为{@code null}生成类似于{@code key1&key2=v2}形式</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param charset encode编码,null表示不做encode编码
|
||||
* @param isEncode 是否转义键和值
|
||||
* @param isEncode 是否转义键和值,转义遵循rfc3986规范
|
||||
* @return URL查询字符串
|
||||
* @since 5.7.13
|
||||
*/
|
||||
@ -233,21 +239,18 @@ public class UrlQuery {
|
||||
}
|
||||
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
boolean isFirst = true;
|
||||
CharSequence key;
|
||||
CharSequence name;
|
||||
CharSequence value;
|
||||
for (Map.Entry<CharSequence, CharSequence> entry : this.query) {
|
||||
if (isFirst) {
|
||||
isFirst = false;
|
||||
} else {
|
||||
sb.append("&");
|
||||
}
|
||||
key = entry.getKey();
|
||||
if (null != key) {
|
||||
sb.append(toStr(key, charset, isEncode));
|
||||
name = entry.getKey();
|
||||
if (null != name) {
|
||||
if(sb.length() >0){
|
||||
sb.append("&");
|
||||
}
|
||||
sb.append(isEncode ? RFC3986.QUERY_PARAM_NAME.encode(name, charset) : name);
|
||||
value = entry.getValue();
|
||||
if (null != value) {
|
||||
sb.append("=").append(toStr(value, charset, isEncode));
|
||||
sb.append("=").append(isEncode ? RFC3986.QUERY_PARAM_VALUE.encode(value, charset) : value);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -301,18 +304,18 @@ public class UrlQuery {
|
||||
}
|
||||
|
||||
/**
|
||||
* 键值对的{@link CharSequence}转换为String,可选是否转义
|
||||
* 键值对的name转换为
|
||||
*
|
||||
* @param str 原字符串
|
||||
* @param charset 编码,只用于encode中
|
||||
* @param isEncode 是否转义
|
||||
* @param isEncode 是否转义,转义遵循rfc3986规范
|
||||
* @return 转换后的String
|
||||
* @since 5.7.13
|
||||
*/
|
||||
private static String toStr(CharSequence str, Charset charset, boolean isEncode) {
|
||||
private static String nameToStr(CharSequence str, Charset charset, boolean isEncode) {
|
||||
String result = StrUtil.str(str);
|
||||
if (isEncode) {
|
||||
result = URLUtil.encodeFragment(result, charset);
|
||||
result = RFC3986.QUERY_PARAM_NAME.encode(result, charset);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -63,4 +63,40 @@ public class UrlQueryTest {
|
||||
query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
|
||||
Assert.assertEquals("password=123456&username=SSM", query);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void buildHasNullTest() {
|
||||
Map<String, String> map = new LinkedHashMap<>();
|
||||
map.put(null, "SSM");
|
||||
map.put("password", "123456");
|
||||
String query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
|
||||
Assert.assertEquals("password=123456", query);
|
||||
|
||||
map = new TreeMap<>();
|
||||
map.put("username", "SSM");
|
||||
map.put("password", "");
|
||||
query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
|
||||
Assert.assertEquals("password=&username=SSM", query);
|
||||
|
||||
map = new TreeMap<>();
|
||||
map.put("username", "SSM");
|
||||
map.put("password", null);
|
||||
query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
|
||||
Assert.assertEquals("password&username=SSM", query);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void buildSpecialTest() {
|
||||
Map<String, String> map = new LinkedHashMap<>();
|
||||
map.put("key1&", "SSM");
|
||||
map.put("key2", "123456&");
|
||||
String query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
|
||||
Assert.assertEquals("key1%26=SSM&key2=123456%26", query);
|
||||
|
||||
map = new TreeMap<>();
|
||||
map.put("username=", "SSM");
|
||||
map.put("password", "=");
|
||||
query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
|
||||
Assert.assertEquals("password==&username%3D=SSM", query);
|
||||
}
|
||||
}
|
||||
|
@ -346,5 +346,4 @@ public class HttpUtilTest {
|
||||
.execute().body();
|
||||
Console.log(body);
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user