add EFC3986

This commit is contained in:
Looly 2021-10-26 04:24:05 +08:00
parent 87b88b395e
commit 7248b65f40
8 changed files with 347 additions and 21 deletions

View File

@ -11,6 +11,7 @@
* 【core 】 MailAccount增加setEncodefilename()方法可选是否编码附件的文件名issue#I4F160@Gitee
* 【core 】 MailAccount中charset增加null时的默认规则
* 【core 】 NumberUtil.compare修正注释说明issue#I4FAJ1@Gitee
* 【core 】 增加RFC3986类
### 🐞Bug修复
* 【core 】 修复UrlBuilder.addPath歧义问题issue#1912@Github

View File

@ -0,0 +1,188 @@
package cn.hutool.core.codec;
import cn.hutool.core.util.CharUtil;
import cn.hutool.core.util.HexUtil;
import cn.hutool.core.util.StrUtil;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.BitSet;
/**
* 百分号编码(Percent-encoding), 也称作URL编码(URL encoding)<br>
* 百分号编码可用于URI的编码也可以用于"application/x-www-form-urlencoded"的MIME准备数据
*
* <p>
* 百分号编码会对 URI 中不允许出现的字符或者其他特殊情况的允许的字符进行编码对于被编码的字符最终会转为以百分号"%“开头后面跟着两位16进制数值的形式。
* 举个例子空格符SP是不允许的字符 ASCII 码对应的二进制值是"00100000”最终转为"%20"
* </p>
* <p>
* 对于不同场景应遵循不同规范
*
* <ul>
* <li>URI遵循RFC 3986保留字规范</li>
* <li>application/x-www-form-urlencoded遵循W3C HTML Form content types规范如空格须转+</li>
* </ul>
*
* @author looly
* @since 5.7.16
*/
public class PercentCodec implements Serializable {
private static final long serialVersionUID = 1L;
/**
* 从已知PercentCodec创建PercentCodec会复制给定PercentCodec的安全字符
*
* @param codec PercentCodec
* @return PercentCodec
*/
public static PercentCodec of(PercentCodec codec) {
return new PercentCodec((BitSet) codec.safeCharacters.clone());
}
/**
* 创建PercentCodec使用指定字符串中的字符作为安全字符
*
* @param chars 安全字符合集
* @return PercentCodec
*/
public static PercentCodec of(CharSequence chars) {
final PercentCodec codec = new PercentCodec();
final int length = chars.length();
for (int i = 0; i < length; i++) {
codec.addSafe(chars.charAt(i));
}
return codec;
}
/**
* 存放安全编码
*/
private final BitSet safeCharacters;
/**
* 是否编码空格为+
*/
private boolean encodeSpaceAsPlus = false;
/**
* 构造<br>
* [a-zA-Z0-9]默认不被编码
*/
public PercentCodec() {
this(new BitSet(256));
}
/**
* 构造
*
* @param safeCharacters 安全字符安全字符不被编码
*/
public PercentCodec(BitSet safeCharacters) {
this.safeCharacters = safeCharacters;
}
/**
* 增加安全字符<br>
* 安全字符不被编码
*
* @param c 字符
* @return this
*/
public PercentCodec addSafe(char c) {
safeCharacters.set(c);
return this;
}
/**
* 移除安全字符<br>
* 安全字符不被编码
*
* @param c 字符
* @return this
*/
public PercentCodec removeSafe(char c) {
safeCharacters.clear(c);
return this;
}
/**
* 增加安全字符到挡墙的PercentCodec
*
* @param codec PercentCodec
* @return this
*/
public PercentCodec or(PercentCodec codec) {
this.safeCharacters.or(codec.safeCharacters);
return this;
}
/**
* 组合当前PercentCodec和指定PercentCodec为一个新的PercentCodec安全字符为并集
*
* @param codec PercentCodec
* @return 新的PercentCodec
*/
public PercentCodec orNew(PercentCodec codec) {
return of(this).or(codec);
}
/**
* 是否将空格编码为+
*
* @param encodeSpaceAsPlus 是否将空格编码为+
* @return this
*/
public PercentCodec setEncodeSpaceAsPlus(boolean encodeSpaceAsPlus) {
this.encodeSpaceAsPlus = encodeSpaceAsPlus;
return this;
}
/**
* 将URL中的字符串编码为%形式
*
* @param path 需要编码的字符串
* @param charset 编码, {@code null}返回原字符串表示不编码
* @return 编码后的字符串
*/
public String encode(CharSequence path, Charset charset) {
if (null == charset || StrUtil.isEmpty(path)) {
return StrUtil.str(path);
}
final StringBuilder rewrittenPath = new StringBuilder(path.length());
final ByteArrayOutputStream buf = new ByteArrayOutputStream();
final OutputStreamWriter writer = new OutputStreamWriter(buf, charset);
int c;
for (int i = 0; i < path.length(); i++) {
c = path.charAt(i);
if (safeCharacters.get(c)) {
rewrittenPath.append((char) c);
} else if (encodeSpaceAsPlus && c == CharUtil.SPACE) {
// 对于空格单独处理
rewrittenPath.append('+');
} else {
// convert to external encoding before hex conversion
try {
writer.write((char) c);
writer.flush();
} catch (IOException e) {
buf.reset();
continue;
}
byte[] ba = buf.toByteArray();
for (byte toEncode : ba) {
// Converting each byte in the buffer
rewrittenPath.append('%');
HexUtil.appendHex(rewrittenPath, toEncode, false);
}
buf.reset();
}
}
return rewrittenPath.toString();
}
}

View File

@ -0,0 +1,98 @@
package cn.hutool.core.net;
import cn.hutool.core.codec.PercentCodec;
/**
* rfc3986 : https://www.ietf.org/rfc/rfc3986.html 编码实现
*
* @author looly
* @since 5.7.16
*/
public class RFC3986 {
/**
* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
*/
public static final PercentCodec GEN_DELIMS = PercentCodec.of(":/?#[]&");
/**
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
*/
public static final PercentCodec SUB_DELIMS = PercentCodec.of("!$&'()*+,;=");
/**
* reserved = gen-delims / sub-delims
*/
public static final PercentCodec RESERVED = GEN_DELIMS.orNew(SUB_DELIMS);
/**
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
*/
public static final PercentCodec UNRESERVED = PercentCodec.of(unreservedChars());
/**
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
public static final PercentCodec PCHAR = UNRESERVED.orNew(SUB_DELIMS).or(PercentCodec.of(":@"));
/**
* segment = pchar
*/
public static final PercentCodec SEGMENT = PCHAR;
/**
* segment-nz-nc = SEGMENT ; non-zero-length segment without any colon ":"
*/
public static final PercentCodec SEGMENT_NZ_NC = PercentCodec.of(SEGMENT).removeSafe(':');
/**
* path = segment / "/"
*/
public static final PercentCodec PATH = SEGMENT.orNew(PercentCodec.of("/"));
/**
* query = pchar / "/" / "?"
*/
public static final PercentCodec QUERY = PCHAR.orNew(PercentCodec.of("/?"));
/**
* fragment = pchar / "/" / "?"
*/
public static final PercentCodec FRAGMENT = QUERY;
/**
* query中的key
*/
public static final PercentCodec QUERY_PARAM_NAME = PercentCodec.of(QUERY).removeSafe('&').removeSafe('=');
/**
* query中的value
*/
public static final PercentCodec QUERY_PARAM_VALUE = PercentCodec.of(QUERY).removeSafe('&');
/**
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
*
* @return unreserved字符
*/
private static StringBuilder unreservedChars() {
StringBuilder sb = new StringBuilder();
// ALPHA
for (char c = 'A'; c <= 'Z'; c++) {
sb.append(c);
}
for (char c = 'a'; c <= 'z'; c++) {
sb.append(c);
}
// DIGIT
for (char c = '0'; c <= '9'; c++) {
sb.append(c);
}
// "-" / "." / "_" / "~"
sb.append("_.-~");
return sb;
}
}

View File

@ -1,6 +1,7 @@
package cn.hutool.core.net.url;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.net.RFC3986;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.core.util.URLUtil;
@ -425,7 +426,7 @@ public final class UrlBuilder implements Serializable {
* @return 标识符例如#后边的部分
*/
public String getFragmentEncoded() {
return URLUtil.encodeFragment(this.fragment, this.charset);
return RFC3986.FRAGMENT.encode(this.fragment, this.charset);
}
/**

View File

@ -2,10 +2,10 @@ package cn.hutool.core.net.url;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.net.RFC3986;
import cn.hutool.core.net.URLDecoder;
import cn.hutool.core.util.CharUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.core.util.URLUtil;
import java.nio.charset.Charset;
import java.util.LinkedList;
@ -127,7 +127,7 @@ public class UrlPath {
final StringBuilder builder = new StringBuilder();
for (String segment : segments) {
builder.append(CharUtil.SLASH).append(URLUtil.encodePathSegment(segment, charset));
builder.append(CharUtil.SLASH).append(RFC3986.SEGMENT_NZ_NC.encode(segment, charset));
}
if (withEngTag || StrUtil.isEmpty(builder)) {
builder.append(CharUtil.SLASH);

View File

@ -5,6 +5,7 @@ import cn.hutool.core.collection.IterUtil;
import cn.hutool.core.convert.Convert;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.map.TableMap;
import cn.hutool.core.net.RFC3986;
import cn.hutool.core.util.StrUtil;
import cn.hutool.core.util.URLUtil;
@ -220,10 +221,15 @@ public class UrlQuery {
}
/**
* 构建URL查询字符串即将key-value键值对转换为key1=v1&amp;key2=&amp;key3=v3形式
* 构建URL查询字符串即将key-value键值对转换为{@code key1=v1&key2=v2&key3=v3}形式<br>
* 对于{@code null}处理规则如下
* <ul>
* <li>如果key为{@code null}则这个键值对忽略</li>
* <li>如果value为{@code null}只保留key如key1对应value为{@code null}生成类似于{@code key1&key2=v2}形式</li>
* </ul>
*
* @param charset encode编码null表示不做encode编码
* @param isEncode 是否转义键和值
* @param isEncode 是否转义键和值转义遵循rfc3986规范
* @return URL查询字符串
* @since 5.7.13
*/
@ -233,21 +239,18 @@ public class UrlQuery {
}
final StringBuilder sb = new StringBuilder();
boolean isFirst = true;
CharSequence key;
CharSequence name;
CharSequence value;
for (Map.Entry<CharSequence, CharSequence> entry : this.query) {
if (isFirst) {
isFirst = false;
} else {
sb.append("&");
}
key = entry.getKey();
if (null != key) {
sb.append(toStr(key, charset, isEncode));
name = entry.getKey();
if (null != name) {
if(sb.length() >0){
sb.append("&");
}
sb.append(isEncode ? RFC3986.QUERY_PARAM_NAME.encode(name, charset) : name);
value = entry.getValue();
if (null != value) {
sb.append("=").append(toStr(value, charset, isEncode));
sb.append("=").append(isEncode ? RFC3986.QUERY_PARAM_VALUE.encode(value, charset) : value);
}
}
}
@ -301,18 +304,18 @@ public class UrlQuery {
}
/**
* 键值对的{@link CharSequence}转换为String可选是否转义
* 键值对的name转换为
*
* @param str 原字符串
* @param charset 编码只用于encode中
* @param isEncode 是否转义
* @param isEncode 是否转义转义遵循rfc3986规范
* @return 转换后的String
* @since 5.7.13
*/
private static String toStr(CharSequence str, Charset charset, boolean isEncode) {
private static String nameToStr(CharSequence str, Charset charset, boolean isEncode) {
String result = StrUtil.str(str);
if (isEncode) {
result = URLUtil.encodeFragment(result, charset);
result = RFC3986.QUERY_PARAM_NAME.encode(result, charset);
}
return result;
}

View File

@ -63,4 +63,40 @@ public class UrlQueryTest {
query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
Assert.assertEquals("password=123456&username=SSM", query);
}
@Test
public void buildHasNullTest() {
Map<String, String> map = new LinkedHashMap<>();
map.put(null, "SSM");
map.put("password", "123456");
String query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
Assert.assertEquals("password=123456", query);
map = new TreeMap<>();
map.put("username", "SSM");
map.put("password", "");
query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
Assert.assertEquals("password=&username=SSM", query);
map = new TreeMap<>();
map.put("username", "SSM");
map.put("password", null);
query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
Assert.assertEquals("password&username=SSM", query);
}
@Test
public void buildSpecialTest() {
Map<String, String> map = new LinkedHashMap<>();
map.put("key1&", "SSM");
map.put("key2", "123456&");
String query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
Assert.assertEquals("key1%26=SSM&key2=123456%26", query);
map = new TreeMap<>();
map.put("username=", "SSM");
map.put("password", "=");
query = URLUtil.buildQuery(map, StandardCharsets.UTF_8);
Assert.assertEquals("password==&username%3D=SSM", query);
}
}

View File

@ -346,5 +346,4 @@ public class HttpUtilTest {
.execute().body();
Console.log(body);
}
}