This commit is contained in:
Looly 2024-01-05 12:00:24 +08:00
parent efc4ca63a2
commit 69206406d7
6 changed files with 172 additions and 94 deletions

View File

@ -12,6 +12,8 @@
package org.dromara.hutool.core.xml;
import org.dromara.hutool.core.text.CharUtil;
import java.util.regex.Pattern;
/**
@ -30,6 +32,10 @@ public class XmlConstants {
* 字符串常量XML And 符转义 {@code "&" -> "&"}
*/
public static final String AMP = "&";
/**
* The Character '&'.
*/
public static final Character C_AMP = CharUtil.AMP;
/**
* 字符串常量XML 双引号转义 {@code """ -> "\""}
@ -40,17 +46,41 @@ public class XmlConstants {
* 字符串常量XML 单引号转义 {@code "&apos" -> "'"}
*/
public static final String APOS = "'";
/**
* The Character '''.
*/
public static final Character C_APOS = CharUtil.SINGLE_QUOTE;
/**
* 字符串常量XML 小于号转义 {@code "&lt;" -> "<"}
*/
public static final String LT = "&lt;";
/**
* The Character '&lt;'.
*/
public static final Character C_LT = '<';
/**
* 字符串常量XML 大于号转义 {@code "&gt;" -> ">"}
*/
public static final String GT = "&gt;";
/**
* The Character '&gt;'.
*/
public static final Character C_GT = '>';
/**
* The Character '!'.
*/
public static final Character C_BANG = '!';
/**
* The Character '?'.
*/
public static final Character C_QUEST = '?';
/**
* 在XML中无效的字符 正则
*/

View File

@ -29,6 +29,7 @@ import org.dromara.hutool.json.JSONTokener;
import org.dromara.hutool.json.xml.JSONXMLUtil;
import org.dromara.hutool.json.serialize.GlobalSerializeMapping;
import org.dromara.hutool.json.serialize.JSONSerializer;
import org.dromara.hutool.json.xml.ParseConfig;
import java.io.InputStream;
import java.io.Reader;
@ -170,7 +171,7 @@ public class JSONObjectMapper {
final String jsonStr = StrUtil.trim(source);
if (StrUtil.startWith(jsonStr, '<')) {
// 可能为XML
JSONXMLUtil.toJSONObject(jsonObject, jsonStr, false);
JSONXMLUtil.toJSONObject(jsonStr, jsonObject, ParseConfig.of());
return;
}
mapFromTokener(new JSONTokener(StrUtil.trim(source), jsonObject.config()), jsonObject);

View File

@ -12,7 +12,9 @@
package org.dromara.hutool.json.xml;
import org.dromara.hutool.core.text.CharUtil;
import org.dromara.hutool.core.text.StrUtil;
import org.dromara.hutool.core.xml.XmlConstants;
import org.dromara.hutool.json.JSONException;
import org.dromara.hutool.json.JSONObject;
import org.dromara.hutool.json.mapper.JSONValueMapper;
@ -29,28 +31,28 @@ public class JSONXMLParser {
* 转换XML为JSONObject
* 转换过程中一些信息可能会丢失JSON中无法区分节点和属性相同的节点将被处理为JSONArray
*
* @param jo JSONObject
* @param xmlStr XML字符串
* @param keepStrings 如果为{@code true}则值保持String类型不转换为数字或boolean
* @param jo JSONObject
* @param parseConfig 解析选项
* @throws JSONException 解析异常
*/
public static void parseJSONObject(final JSONObject jo, final String xmlStr, final boolean keepStrings) throws JSONException {
public static void parseJSONObject(final String xmlStr, final JSONObject jo, final ParseConfig parseConfig) throws JSONException {
final XMLTokener x = new XMLTokener(xmlStr, jo.config());
while (x.more() && x.skipPast("<")) {
parse(x, jo, null, keepStrings);
parse(x, jo, null, parseConfig);
}
}
/**
* Scan the content following the named tag, attaching it to the context.
* 扫描XML内容并解析到JSONObject中
*
* @param x The XMLTokener containing the source string.
* @param context The JSONObject that will include the new material.
* @param name The tag name.
* @return true if the close tag is processed.
* @param x {@link XMLTokener}
* @param context {@link JSONObject}
* @param name 标签名null表示从根标签开始解析
* @return {@code true}表示解析完成
* @throws JSONException JSON异常
*/
private static boolean parse(final XMLTokener x, final JSONObject context, final String name, final boolean keepStrings) throws JSONException {
private static boolean parse(final XMLTokener x, final JSONObject context, final String name, final ParseConfig parseConfig) throws JSONException {
final char c;
int i;
final JSONObject jsonobject;
@ -60,7 +62,7 @@ public class JSONXMLParser {
token = x.nextToken();
if (token == JSONXMLUtil.BANG) {
if (token == XmlConstants.C_BANG) {
c = x.next();
if (c == '-') {
if (x.next() == '-') {
@ -86,19 +88,19 @@ public class JSONXMLParser {
token = x.nextMeta();
if (token == null) {
throw x.syntaxError("Missing '>' after '<!'.");
} else if (token == JSONXMLUtil.LT) {
} else if (token == XmlConstants.C_LT) {
i += 1;
} else if (token == JSONXMLUtil.GT) {
} else if (token == XmlConstants.C_GT) {
i -= 1;
}
} while (i > 0);
return false;
} else if (token == JSONXMLUtil.QUEST) {
} else if (token == XmlConstants.C_QUEST) {
// <?
x.skipPast("?>");
return false;
} else if (token == JSONXMLUtil.SLASH) {
} else if (token == Character.valueOf(CharUtil.SLASH)) {
// Close tag </
@ -109,7 +111,7 @@ public class JSONXMLParser {
if (!token.equals(name)) {
throw x.syntaxError("Mismatched " + name + " and " + token);
}
if (x.nextToken() != JSONXMLUtil.GT) {
if (x.nextToken() != XmlConstants.C_GT) {
throw x.syntaxError("Misshaped close tag");
}
return true;
@ -123,6 +125,7 @@ public class JSONXMLParser {
tagName = (String) token;
token = null;
jsonobject = new JSONObject();
final boolean keepStrings = parseConfig.isKeepStrings();
for (; ; ) {
if (token == null) {
token = x.nextToken();
@ -132,7 +135,7 @@ public class JSONXMLParser {
if (token instanceof String) {
string = (String) token;
token = x.nextToken();
if (token == JSONXMLUtil.EQ) {
if (token == Character.valueOf(CharUtil.EQUAL)) {
token = x.nextToken();
if (!(token instanceof String)) {
throw x.syntaxError("Missing value");
@ -143,9 +146,9 @@ public class JSONXMLParser {
jsonobject.append(string, "");
}
} else if (token == JSONXMLUtil.SLASH) {
} else if (token == Character.valueOf(CharUtil.SLASH)) {
// Empty tag <.../>
if (x.nextToken() != JSONXMLUtil.GT) {
if (x.nextToken() != XmlConstants.C_GT) {
throw x.syntaxError("Misshaped tag");
}
if (!jsonobject.isEmpty()) {
@ -155,7 +158,7 @@ public class JSONXMLParser {
}
return false;
} else if (token == JSONXMLUtil.GT) {
} else if (token == XmlConstants.C_GT) {
// Content, between <...> and </...>
for (; ; ) {
token = x.nextContent();
@ -170,9 +173,9 @@ public class JSONXMLParser {
jsonobject.append("content", keepStrings ? token : JSONValueMapper.toJsonValue(string));
}
} else if (token == JSONXMLUtil.LT) {
} else if (token == XmlConstants.C_LT) {
// Nested element
if (parse(x, jsonobject, tagName, keepStrings)) {
if (parse(x, jsonobject, tagName, parseConfig)) {
if (jsonobject.isEmpty()) {
context.append(tagName, StrUtil.EMPTY);
} else if (jsonobject.size() == 1 && jsonobject.get("content") != null) {

View File

@ -12,7 +12,6 @@
package org.dromara.hutool.json.xml;
import org.dromara.hutool.core.text.CharUtil;
import org.dromara.hutool.json.JSONException;
import org.dromara.hutool.json.JSONObject;
@ -25,51 +24,6 @@ import org.dromara.hutool.json.JSONObject;
*/
public class JSONXMLUtil {
/**
* The Character '&amp;'.
*/
public static final Character AMP = CharUtil.AMP;
/**
* The Character '''.
*/
public static final Character APOS = CharUtil.SINGLE_QUOTE;
/**
* The Character '!'.
*/
public static final Character BANG = '!';
/**
* The Character '='.
*/
public static final Character EQ = '=';
/**
* The Character '&gt;'.
*/
public static final Character GT = '>';
/**
* The Character '&lt;'.
*/
public static final Character LT = '<';
/**
* The Character '?'.
*/
public static final Character QUEST = '?';
/**
* The Character '"'.
*/
public static final Character QUOT = CharUtil.DOUBLE_QUOTES;
/**
* The Character '/'.
*/
public static final Character SLASH = CharUtil.SLASH;
/**
* 转换XML为JSONObject
* 转换过程中一些信息可能会丢失JSON中无法区分节点和属性相同的节点将被处理为JSONArray
@ -80,7 +34,7 @@ public class JSONXMLUtil {
* @throws JSONException Thrown if there is an errors while parsing the string
*/
public static JSONObject toJSONObject(final String string) throws JSONException {
return toJSONObject(string, false);
return toJSONObject(string, ParseConfig.of());
}
/**
@ -89,13 +43,13 @@ public class JSONXMLUtil {
* Content text may be placed in a "content" member. Comments, prologs, DTDs, and {@code <[ [ ]]>} are ignored.
* All values are converted as strings, for 1, 01, 29.0 will not be coerced to numbers but will instead be the exact value as seen in the XML document.
*
* @param string The source string.
* @param keepStrings If true, then values will not be coerced into boolean or numeric values and will instead be left as strings
* @param string XML字符串
* @param parseConfig XML解析选项
* @return A JSONObject containing the structured data from the XML string.
* @throws JSONException Thrown if there is an errors while parsing the string
*/
public static JSONObject toJSONObject(final String string, final boolean keepStrings) throws JSONException {
return toJSONObject(new JSONObject(), string, keepStrings);
public static JSONObject toJSONObject(final String string, final ParseConfig parseConfig) throws JSONException {
return toJSONObject(string, new JSONObject(), parseConfig);
}
/**
@ -104,13 +58,13 @@ public class JSONXMLUtil {
*
* @param jo JSONObject
* @param xmlStr XML字符串
* @param keepStrings 如果为{@code true}则值保持String类型不转换为数字或boolean
* @param parseConfig XML解析选项
* @return A JSONObject 解析后的JSON对象与传入的jo为同一对象
* @throws JSONException 解析异常
* @since 5.3.1
*/
public static JSONObject toJSONObject(final JSONObject jo, final String xmlStr, final boolean keepStrings) throws JSONException {
JSONXMLParser.parseJSONObject(jo, xmlStr, keepStrings);
public static JSONObject toJSONObject(final String xmlStr, final JSONObject jo, final ParseConfig parseConfig) throws JSONException {
JSONXMLParser.parseJSONObject(xmlStr, jo, parseConfig);
return jo;
}

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2024. looly(loolly@aliyun.com)
* Hutool is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* https://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
package org.dromara.hutool.json.xml;
import java.io.Serializable;
/**
* XML解析为JSON的可选选项<br>
* 参考https://github.com/stleary/JSON-java/blob/master/src/main/java/org/json/ParserConfiguration.java
*
* @author AylwardJ, Looly
*/
public class ParseConfig implements Serializable {
private static final long serialVersionUID = 1L;
/**
* 默认最大嵌套深度
*/
public static final int DEFAULT_MAXIMUM_NESTING_DEPTH = 512;
/**
* 创建ParseConfig
*
* @return ParseConfig
*/
public static ParseConfig of() {
return new ParseConfig();
}
/**
* 是否保持值为String类型如果为{@code false}则尝试转换为对应类型(numeric, boolean, string)
*/
private boolean keepStrings;
/**
* 最大嵌套深度用于解析时限制解析层级当大于这个层级时抛出异常-1表示无限制
*/
private int maxNestingDepth;
/**
* 是否保持值为String类型如果为{@code false}则尝试转换为对应类型(numeric, boolean, string)
*
* @return 是否保持值为String类型
*/
public boolean isKeepStrings() {
return keepStrings;
}
/**
* 设置是否保持值为String类型如果为{@code false}则尝试转换为对应类型(numeric, boolean, string)
*
* @param keepStrings 是否保持值为String类型
* @return this
*/
public ParseConfig setKeepStrings(final boolean keepStrings) {
this.keepStrings = keepStrings;
return this;
}
/**
* 获取最大嵌套深度用于解析时限制解析层级当大于这个层级时抛出异常-1表示无限制
*
* @return 最大嵌套深度
*/
public int getMaxNestingDepth() {
return maxNestingDepth;
}
/**
* 设置最大嵌套深度用于解析时限制解析层级当大于这个层级时抛出异常-1表示无限制
*
* @param maxNestingDepth 最大嵌套深度
* @return this
*/
public ParseConfig setMaxNestingDepth(final int maxNestingDepth) {
this.maxNestingDepth = maxNestingDepth;
return this;
}
}

View File

@ -12,6 +12,8 @@
package org.dromara.hutool.json.xml;
import org.dromara.hutool.core.text.CharUtil;
import org.dromara.hutool.core.xml.XmlConstants;
import org.dromara.hutool.json.JSONConfig;
import org.dromara.hutool.json.JSONException;
import org.dromara.hutool.json.JSONTokener;
@ -31,11 +33,11 @@ public class XMLTokener extends JSONTokener {
static {
entity = new java.util.HashMap<>(8);
entity.put("amp", JSONXMLUtil.AMP);
entity.put("apos", JSONXMLUtil.APOS);
entity.put("gt", JSONXMLUtil.GT);
entity.put("lt", JSONXMLUtil.LT);
entity.put("quot", JSONXMLUtil.QUOT);
entity.put("amp", XmlConstants.C_AMP);
entity.put("apos", XmlConstants.C_APOS);
entity.put("gt", XmlConstants.C_GT);
entity.put("lt", XmlConstants.C_LT);
entity.put("quot", CharUtil.DOUBLE_QUOTES);
}
/**
@ -89,7 +91,7 @@ public class XMLTokener extends JSONTokener {
return null;
}
if (c == '<') {
return JSONXMLUtil.LT;
return XmlConstants.C_LT;
}
sb = new StringBuilder();
for (; ; ) {
@ -175,17 +177,17 @@ public class XMLTokener extends JSONTokener {
case 0:
throw syntaxError("Misshaped meta tag");
case '<':
return JSONXMLUtil.LT;
return XmlConstants.C_LT;
case '>':
return JSONXMLUtil.GT;
return XmlConstants.C_GT;
case '/':
return JSONXMLUtil.SLASH;
return CharUtil.SLASH;
case '=':
return JSONXMLUtil.EQ;
return CharUtil.EQUAL;
case '!':
return JSONXMLUtil.BANG;
return XmlConstants.C_BANG;
case '?':
return JSONXMLUtil.QUEST;
return XmlConstants.C_QUEST;
case '"':
case '\'':
q = c;
@ -242,15 +244,15 @@ public class XMLTokener extends JSONTokener {
case '<':
throw syntaxError("Misplaced '<'");
case '>':
return JSONXMLUtil.GT;
return XmlConstants.C_GT;
case '/':
return JSONXMLUtil.SLASH;
return CharUtil.SLASH;
case '=':
return JSONXMLUtil.EQ;
return CharUtil.EQUAL;
case '!':
return JSONXMLUtil.BANG;
return XmlConstants.C_BANG;
case '?':
return JSONXMLUtil.QUEST;
return XmlConstants.C_QUEST;
// Quoted string