fix frgment encode

This commit is contained in:
Looly 2021-09-12 01:19:16 +08:00
parent 07c69efa92
commit bf8906eb48
6 changed files with 311 additions and 179 deletions

View File

@ -13,6 +13,7 @@
* 【core 】 修复ImgUtil.copyImage读取网络URL后宽高报错问题issue#1821@Github
* 【core 】 修复StrJoiner.append配置丢失问题issue#I49K1L@Gitee
* 【core 】 修复EscapeUtil特殊字符的hex长度不足导致的问题issue#I49JU8@Gitee
* 【core 】 修复UrlBuilder对Fragment部分编码问题issue#I49KAL@Gitee
-------------------------------------------------------------------------------------------------------------

View File

@ -0,0 +1,192 @@
package cn.hutool.core.net;
import cn.hutool.core.exceptions.UtilException;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.StrUtil;
import java.nio.charset.Charset;
/**
* URL编码工具
*
* @since 5.7.13
* @author looly
*/
public class URLEncodeUtil {
/**
* 编码URL默认使用UTF-8编码<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头
*
* @param url URL
* @return 编码后的URL
* @throws UtilException UnsupportedEncodingException
*/
public static String encodeAll(String url) {
return encodeAll(url, CharsetUtil.CHARSET_UTF_8);
}
/**
* 编码URL<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头
*
* @param url URL
* @param charset 编码为null表示不编码
* @return 编码后的URL
* @throws UtilException UnsupportedEncodingException
*/
public static String encodeAll(String url, Charset charset) throws UtilException {
return URLEncoder.ALL.encode(url, charset);
}
/**
* 编码URL默认使用UTF-8编码<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于URL自动编码类似于浏览器中键入地址自动编码对于像类似于/的字符不再编码
*
* @param url URL
* @return 编码后的URL
* @throws UtilException UnsupportedEncodingException
* @since 3.1.2
*/
public static String encode(String url) throws UtilException {
return encode(url, CharsetUtil.CHARSET_UTF_8);
}
/**
* 编码字符为 application/x-www-form-urlencoded<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于URL自动编码类似于浏览器中键入地址自动编码对于像类似于/的字符不再编码
*
* @param url 被编码内容
* @param charset 编码
* @return 编码后的字符
* @since 4.4.1
*/
public static String encode(String url, Charset charset) {
return URLEncoder.DEFAULT.encode(url, charset);
}
/**
* 编码URL默认使用UTF-8编码<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于POST请求中的请求体自动编码转义大部分特殊字符
*
* @param url URL
* @return 编码后的URL
* @throws UtilException UnsupportedEncodingException
* @since 3.1.2
*/
public static String encodeQuery(String url) throws UtilException {
return encodeQuery(url, CharsetUtil.CHARSET_UTF_8);
}
/**
* 编码字符为URL中查询语句<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于POST请求中的请求体自动编码转义大部分特殊字符
*
* @param url 被编码内容
* @param charset 编码
* @return 编码后的字符
* @since 4.4.1
*/
public static String encodeQuery(String url, Charset charset) {
return URLEncoder.QUERY.encode(url, charset);
}
/**
* 编码URL默认使用UTF-8编码<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于URL的Segment中自动编码转义大部分特殊字符
*
* <pre>
* pchar = unreserved不处理 / pct-encoded / sub-delims子分隔符 / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
*
* @param url URL
* @return 编码后的URL
* @throws UtilException UnsupportedEncodingException
* @since 5.6.5
*/
public static String encodePathSegment(String url) throws UtilException {
return encodePathSegment(url, CharsetUtil.CHARSET_UTF_8);
}
/**
* 编码字符为URL中查询语句<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于URL的Segment中自动编码转义大部分特殊字符
*
* <pre>
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
*
* @param url 被编码内容
* @param charset 编码
* @return 编码后的字符
* @since 5.6.5
*/
public static String encodePathSegment(String url, Charset charset) {
if (StrUtil.isEmpty(url)) {
return url;
}
if (null == charset) {
charset = CharsetUtil.defaultCharset();
}
return URLEncoder.PATH_SEGMENT.encode(url, charset);
}
/**
* 编码URL默认使用UTF-8编码<br>
* URL的Fragment URLEncoder<br>
* 默认的编码器针对Fragment定义如下
*
* <pre>
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
*
* 具体见https://datatracker.ietf.org/doc/html/rfc3986#section-3.5
*
* @param url 被编码内容
* @return 编码后的字符
* @since 5.7.13
*/
public static String encodeFragment(String url) throws UtilException {
return encodeFragment(url, CharsetUtil.CHARSET_UTF_8);
}
/**
* URL的Fragment URLEncoder<br>
* 默认的编码器针对Fragment定义如下
*
* <pre>
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
*
* 具体见https://datatracker.ietf.org/doc/html/rfc3986#section-3.5
*
* @param url 被编码内容
* @param charset 编码
* @return 编码后的字符
* @since 5.7.13
*/
public static String encodeFragment(String url, Charset charset) {
if (StrUtil.isEmpty(url)) {
return url;
}
if (null == charset) {
charset = CharsetUtil.defaultCharset();
}
return URLEncoder.FRAGMENT.encode(url, charset);
}
}

View File

@ -1,7 +1,9 @@
package cn.hutool.core.net;
import cn.hutool.core.util.CharUtil;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.HexUtil;
import cn.hutool.core.util.StrUtil;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@ -30,7 +32,8 @@ public class URLEncoder implements Serializable {
* 默认的编码器针对URI路径编码定义如下
*
* <pre>
* pchar = unreserved不处理 / pct-encoded / sub-delims子分隔符 / ":" / "@" / "/"
* default = pchar / "/"
* pchar = unreserved不处理 / pct-encoded / sub-delims子分隔符 / ":" / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
@ -42,13 +45,31 @@ public class URLEncoder implements Serializable {
* 默认的编码器针对URI路径编码定义如下
*
* <pre>
* pchar = unreserved不处理 / pct-encoded / sub-delims子分隔符 / "@"
* pchar = unreserved / pct-encoded / sub-delims / ":"非空segment不包含: / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
*
* 定义见https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3
*/
public static final URLEncoder PATH_SEGMENT = createPathSegment();
/**
* URL的Fragment URLEncoder<br>
* 默认的编码器针对Fragment定义如下
*
* <pre>
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
*
* 具体见https://datatracker.ietf.org/doc/html/rfc3986#section-3.5
* @since 5.7.13
*/
public static final URLEncoder FRAGMENT = createFragment();
/**
* 用于查询语句的URLEncoder<br>
* 编码器针对URI路径编码定义如下
@ -79,7 +100,8 @@ public class URLEncoder implements Serializable {
* 默认的编码器针对URI路径编码定义如下
*
* <pre>
* pchar = unreserved不处理 / pct-encoded / sub-delims子分隔符 / ":" / "@" / "/"
* default = pchar / "/"
* pchar = unreserved不处理 / pct-encoded / sub-delims子分隔符 / ":" / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
@ -92,21 +114,14 @@ public class URLEncoder implements Serializable {
encoder.addSafeCharacter('.');
encoder.addSafeCharacter('_');
encoder.addSafeCharacter('~');
// Add the sub-delims
encoder.addSafeCharacter('!');
encoder.addSafeCharacter('$');
encoder.addSafeCharacter('&');
encoder.addSafeCharacter('\'');
encoder.addSafeCharacter('(');
encoder.addSafeCharacter(')');
encoder.addSafeCharacter('*');
encoder.addSafeCharacter('+');
encoder.addSafeCharacter(',');
encoder.addSafeCharacter(';');
encoder.addSafeCharacter('=');
addSubDelims(encoder);
// Add the remaining literals
encoder.addSafeCharacter(':');
encoder.addSafeCharacter('@');
// Add '/' so it isn't encoded when we encode a path
encoder.addSafeCharacter('/');
@ -118,37 +133,71 @@ public class URLEncoder implements Serializable {
* 默认的编码器针对URI路径的每一段编码定义如下
*
* <pre>
* pchar = unreserved不处理 / pct-encoded / sub-delims子分隔符 / "@"
* pchar = unreserved / pct-encoded / sub-delims / ":"非空segment不包含: / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
*
* 定义见https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3
*
* @return URLEncoder
*/
public static URLEncoder createPathSegment() {
final URLEncoder encoder = new URLEncoder();
// unreserved
encoder.addSafeCharacter('-');
encoder.addSafeCharacter('.');
encoder.addSafeCharacter('_');
encoder.addSafeCharacter('~');
// Add the sub-delims
encoder.addSafeCharacter('!');
encoder.addSafeCharacter('$');
encoder.addSafeCharacter('&');
encoder.addSafeCharacter('\'');
encoder.addSafeCharacter('(');
encoder.addSafeCharacter(')');
encoder.addSafeCharacter('*');
encoder.addSafeCharacter('+');
encoder.addSafeCharacter(',');
encoder.addSafeCharacter(';');
encoder.addSafeCharacter('=');
addSubDelims(encoder);
// Add the remaining literals
//non-zero-length segment without any colon ":"
//encoder.addSafeCharacter(':');
encoder.addSafeCharacter('@');
return encoder;
}
/**
* URL的Fragment URLEncoder<br>
* 默认的编码器针对Fragment定义如下
*
* <pre>
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
*
* 具体见https://datatracker.ietf.org/doc/html/rfc3986#section-3.5
*
* @return URLEncoder
* @since 5.7.13
*/
public static URLEncoder createFragment() {
final URLEncoder encoder = new URLEncoder();
encoder.addSafeCharacter('-');
encoder.addSafeCharacter('.');
encoder.addSafeCharacter('_');
encoder.addSafeCharacter('~');
// Add the sub-delims
addSubDelims(encoder);
// Add the remaining literals
encoder.addSafeCharacter(':');
encoder.addSafeCharacter('@');
encoder.addSafeCharacter('/');
encoder.addSafeCharacter('?');
return encoder;
}
/**
* 创建用于查询语句的URLEncoder<br>
* 编码器针对URI路径编码定义如下
@ -273,6 +322,12 @@ public class URLEncoder implements Serializable {
* @return 编码后的字符串
*/
public String encode(String path, Charset charset) {
if (StrUtil.isEmpty(path)) {
return path;
}
if(null == charset){
charset = CharsetUtil.CHARSET_UTF_8;
}
final StringBuilder rewrittenPath = new StringBuilder(path.length());
ByteArrayOutputStream buf = new ByteArrayOutputStream();
OutputStreamWriter writer = new OutputStreamWriter(buf, charset);
@ -329,4 +384,23 @@ public class URLEncoder implements Serializable {
}
/**
* 增加sub-delims<br>
* sub-delims = "!" / "$" / "&" / "'" / "(" / ") / "*" / "+" / "," / ";" / "="
* 定义见https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
*/
private static void addSubDelims(URLEncoder encoder){
// Add the sub-delims
encoder.addSafeCharacter('!');
encoder.addSafeCharacter('$');
encoder.addSafeCharacter('&');
encoder.addSafeCharacter('\'');
encoder.addSafeCharacter('(');
encoder.addSafeCharacter(')');
encoder.addSafeCharacter('*');
encoder.addSafeCharacter('+');
encoder.addSafeCharacter(',');
encoder.addSafeCharacter(';');
encoder.addSafeCharacter('=');
}
}

View File

@ -419,7 +419,7 @@ public final class UrlBuilder implements Serializable {
* @return 标识符例如#后边的部分
*/
public String getFragmentEncoded() {
return URLUtil.encodeAll(this.fragment, this.charset);
return URLUtil.encodeFragment(this.fragment, this.charset);
}
/**

View File

@ -7,14 +7,21 @@ import cn.hutool.core.io.IoUtil;
import cn.hutool.core.io.resource.ResourceUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.net.URLDecoder;
import cn.hutool.core.net.URLEncoder;
import cn.hutool.core.net.URLEncodeUtil;
import cn.hutool.core.net.url.UrlQuery;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.*;
import java.net.HttpURLConnection;
import java.net.JarURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLStreamHandler;
import java.nio.charset.Charset;
import java.util.Map;
import java.util.jar.JarFile;
@ -33,7 +40,7 @@ import java.util.jar.JarFile;
*
* @author xiaoleilu
*/
public class URLUtil {
public class URLUtil extends URLEncodeUtil {
/**
* 针对ClassPath路径的伪协议前缀兼容Spring: "classpath:"
@ -295,150 +302,6 @@ public class URLUtil {
throw new UtilException(e);
}
}
/**
* 编码URL默认使用UTF-8编码<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头
*
* @param url URL
* @return 编码后的URL
* @throws UtilException UnsupportedEncodingException
*/
public static String encodeAll(String url) {
return encodeAll(url, CharsetUtil.CHARSET_UTF_8);
}
/**
* 编码URL<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头
*
* @param url URL
* @param charset 编码为null表示不编码
* @return 编码后的URL
* @throws UtilException UnsupportedEncodingException
*/
public static String encodeAll(String url, Charset charset) throws UtilException {
if (null == charset || StrUtil.isEmpty(url)) {
return url;
}
return URLEncoder.ALL.encode(url, charset);
}
/**
* 编码URL默认使用UTF-8编码<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于URL自动编码类似于浏览器中键入地址自动编码对于像类似于/的字符不再编码
*
* @param url URL
* @return 编码后的URL
* @throws UtilException UnsupportedEncodingException
* @since 3.1.2
*/
public static String encode(String url) throws UtilException {
return encode(url, CharsetUtil.CHARSET_UTF_8);
}
/**
* 编码字符为 application/x-www-form-urlencoded<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于URL自动编码类似于浏览器中键入地址自动编码对于像类似于/的字符不再编码
*
* @param url 被编码内容
* @param charset 编码
* @return 编码后的字符
* @since 4.4.1
*/
public static String encode(String url, Charset charset) {
if (StrUtil.isEmpty(url)) {
return url;
}
if (null == charset) {
charset = CharsetUtil.defaultCharset();
}
return URLEncoder.DEFAULT.encode(url, charset);
}
/**
* 编码URL默认使用UTF-8编码<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于POST请求中的请求体自动编码转义大部分特殊字符
*
* @param url URL
* @return 编码后的URL
* @throws UtilException UnsupportedEncodingException
* @since 3.1.2
*/
public static String encodeQuery(String url) throws UtilException {
return encodeQuery(url, CharsetUtil.CHARSET_UTF_8);
}
/**
* 编码字符为URL中查询语句<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于POST请求中的请求体自动编码转义大部分特殊字符
*
* @param url 被编码内容
* @param charset 编码
* @return 编码后的字符
* @since 4.4.1
*/
public static String encodeQuery(String url, Charset charset) {
if (StrUtil.isEmpty(url)) {
return url;
}
if (null == charset) {
charset = CharsetUtil.defaultCharset();
}
return URLEncoder.QUERY.encode(url, charset);
}
/**
* 编码URL默认使用UTF-8编码<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于URL的Segment中自动编码转义大部分特殊字符
*
* <pre>
* pchar = unreserved不处理 / pct-encoded / sub-delims子分隔符 / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
*
* @param url URL
* @return 编码后的URL
* @throws UtilException UnsupportedEncodingException
* @since 5.6.5
*/
public static String encodePathSegment(String url) throws UtilException {
return encodePathSegment(url, CharsetUtil.CHARSET_UTF_8);
}
/**
* 编码字符为URL中查询语句<br>
* 将需要转换的内容ASCII码形式之外的内容用十六进制表示法转换出来并在之前加上%开头<br>
* 此方法用于URL的Segment中自动编码转义大部分特殊字符
*
* <pre>
* pchar = unreserved不处理 / pct-encoded / sub-delims子分隔符 / "@"
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
* sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
* </pre>
*
* @param url 被编码内容
* @param charset 编码
* @return 编码后的字符
* @since 5.6.5
*/
public static String encodePathSegment(String url, Charset charset) {
if (StrUtil.isEmpty(url)) {
return url;
}
if (null == charset) {
charset = CharsetUtil.defaultCharset();
}
return URLEncoder.PATH_SEGMENT.encode(url, charset);
}
//-------------------------------------------------------------------------- decode
/**

View File

@ -1,11 +1,9 @@
package cn.hutool.core.net;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.lang.Console;
import cn.hutool.core.net.url.UrlBuilder;
import cn.hutool.core.util.CharsetUtil;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import java.net.URI;
@ -263,10 +261,14 @@ public class UrlBuilderTest {
}
@Test
@Ignore
public void fragmentEncodeTest(){
// https://gitee.com/dromara/hutool/issues/I49KAL
// https://stackoverflow.com/questions/26088849/url-fragment-allowed-characters
String url = "https://hutool.cn/docs/#/?id=简介";
final UrlBuilder urlBuilder = UrlBuilder.ofHttp(url);
Console.log(urlBuilder.toString());
UrlBuilder urlBuilder = UrlBuilder.ofHttp(url);
Assert.assertEquals("https://hutool.cn/docs/#/?id=%E7%AE%80%E4%BB%8B", urlBuilder.toString());
urlBuilder = UrlBuilder.ofHttp(urlBuilder.toString());
Assert.assertEquals(urlBuilder.toString(), urlBuilder.toString());
}
}