From 11ba7ba36c30afa4be5b0ea8bf0728ae1923fe25 Mon Sep 17 00:00:00 2001 From: Looly Date: Sun, 4 Aug 2024 19:59:12 +0800 Subject: [PATCH] fix code --- .../org/dromara/hutool/json/JSONParser.java | 51 ++-- .../org/dromara/hutool/json/JSONTokener.java | 229 ++++++++++-------- .../hutool/json/convert/JSONConverter.java | 2 +- .../dromara/hutool/json/JSONTokenerTest.java | 40 ++- 4 files changed, 177 insertions(+), 145 deletions(-) diff --git a/hutool-json/src/main/java/org/dromara/hutool/json/JSONParser.java b/hutool-json/src/main/java/org/dromara/hutool/json/JSONParser.java index 5b4055a2b..530b4bae5 100644 --- a/hutool-json/src/main/java/org/dromara/hutool/json/JSONParser.java +++ b/hutool-json/src/main/java/org/dromara/hutool/json/JSONParser.java @@ -110,7 +110,7 @@ public class JSONParser { } default: tokener.back(); - key = nextValue(true).toString(); + key = tokener.nextString(); } // The key is followed by ':'. @@ -120,7 +120,7 @@ public class JSONParser { throw tokener.syntaxError("Expected a ':' after a key"); } - jsonObject.set(key, nextValue(false), predicate); + jsonObject.set(key, nextValue(), predicate); // Pairs are separated by ','. @@ -162,7 +162,7 @@ public class JSONParser { jsonArray.addRaw(null, predicate); } else { x.back(); - jsonArray.addRaw(nextValue(false), predicate); + jsonArray.addRaw(nextValue(), predicate); } switch (x.nextClean()) { case CharUtil.COMMA: @@ -184,20 +184,19 @@ public class JSONParser { /** * 获得下一个值,值类型可以是Boolean, Double, Integer, JSONArray, JSONObject, Long, or String * - * @param getOnlyStringValue 是否只获取String值 * @return Boolean, Double, Integer, JSONArray, JSONObject, Long, or String * @throws JSONException 语法错误 */ - public Object nextValue(final boolean getOnlyStringValue) throws JSONException { - return nextValue(getOnlyStringValue, (token, tokener, config) -> { + public Object nextValue() throws JSONException { + return nextValue((token, tokener, config) -> { switch (token) { - case '{': + case CharUtil.DELIM_START: try { return new JSONObject(this, config); } catch (final StackOverflowError e) { throw new JSONException("JSONObject depth too large to process.", e); } - case '[': + case CharUtil.BRACKET_START: try { return new JSONArray(this, config); } catch (final StackOverflowError e) { @@ -211,45 +210,29 @@ public class JSONParser { /** * 获得下一个值,值类型可以是Boolean, Double, Integer, JSONArray, JSONObject, Long, or String * - * @param getOnlyStringValue 是否只获取String值 * @param objectBuilder JSON对象构建器 * @return Boolean, Double, Integer, JSONArray, JSONObject, Long, or String * @throws JSONException 语法错误 */ - public Object nextValue(final boolean getOnlyStringValue, final ObjectBuilder objectBuilder) throws JSONException { + public Object nextValue(final ObjectBuilder objectBuilder) throws JSONException { final JSONTokener tokener = this.tokener; - char c = tokener.nextClean(); + final char c = tokener.nextClean(); switch (c) { - case '"': - case '\'': + case CharUtil.DOUBLE_QUOTES: + case CharUtil.SINGLE_QUOTE: return tokener.nextString(c); - case '{': - case '[': - if (getOnlyStringValue) { - throw tokener.syntaxError("String value must not begin with '{'"); - } + case CharUtil.DELIM_START: + case CharUtil.BRACKET_START: tokener.back(); return objectBuilder.build(c, tokener, this.config); } /* - * Handle unquoted text. This could be the values true, false, or null, or it can be a number. - * An implementation (such as this one) is allowed to also accept non-standard forms. Accumulate - * characters until we reach the end of the text or a formatting character. + * 处理无引号包装的字符串,如: true, false, 或 null, 或 number. + * 同样兼容非标准的字符串,如key无引号包装。 + * 此方法会不断读取并积累字符直到遇到token符 */ - - final StringBuilder sb = new StringBuilder(); - while (c >= ' ' && ",:]}/\\\"[{;=#".indexOf(c) < 0) { - sb.append(c); - c = tokener.next(); - } - tokener.back(); - - final String valueString = sb.toString().trim(); - if (valueString.isEmpty()) { - throw tokener.syntaxError("Missing value"); - } - return getOnlyStringValue ? valueString : InternalJSONUtil.parseValueFromString(valueString); + return InternalJSONUtil.parseValueFromString(tokener.nextUnwrapString(c)); } /** diff --git a/hutool-json/src/main/java/org/dromara/hutool/json/JSONTokener.java b/hutool-json/src/main/java/org/dromara/hutool/json/JSONTokener.java index a29af097a..c9e735dfe 100644 --- a/hutool-json/src/main/java/org/dromara/hutool/json/JSONTokener.java +++ b/hutool-json/src/main/java/org/dromara/hutool/json/JSONTokener.java @@ -16,6 +16,8 @@ import org.dromara.hutool.core.io.IoUtil; import org.dromara.hutool.core.io.ReaderWrapper; import org.dromara.hutool.core.lang.Assert; import org.dromara.hutool.core.math.NumberUtil; +import org.dromara.hutool.core.text.CharUtil; +import org.dromara.hutool.core.text.StrUtil; import java.io.IOException; import java.io.InputStream; @@ -23,12 +25,18 @@ import java.io.Reader; import java.io.StringReader; /** - * JSON解析器,用于将JSON字符串解析为JSONObject或者JSONArray + * JSON解析器
+ * 用于解析JSON字符串,支持流式解析,即逐个字符解析,而不是一次性解析整个字符串。 * * @author from JSON.org */ public class JSONTokener extends ReaderWrapper { + /** + * JSON的分界符 + */ + private static final String TOKENS = ",:]}/\\\"[{;=#"; + /** * 定义结束(End of stream)为:0 */ @@ -207,6 +215,20 @@ public class JSONTokener extends ReaderWrapper { return chars; } + /** + * 获取下一个token字符 + * + * @return token字符 + * @throws JSONException 非Token字符 + */ + public char nextTokenChar() throws JSONException { + final char c = this.nextClean(); + if (isNotTokenChar(c)) { + throw this.syntaxError("Invalid token char: " + c); + } + return c; + } + /** * 获得下一个字符,跳过空白符 * @@ -223,6 +245,59 @@ public class JSONTokener extends ReaderWrapper { } } + /** + * 读取一个字符串,包括: + * + * + * @return 截止到引号前的字符串 + * @throws JSONException 出现无结束的字符串时抛出此异常 + */ + public String nextString() throws JSONException { + final char c = nextClean(); + switch (c) { + case CharUtil.DOUBLE_QUOTES: + case CharUtil.SINGLE_QUOTE: + return nextString(c); + } + + // 兼容不严格的JSON,如key不被双引号包围的情况 + return nextUnwrapString(c); + } + + /** + * 获得下一个字符串,此字符串不以引号包围,不会处理转义符,主要解析: + * + * + * @param c 首个字符 + * @return 字符串 + * @throws JSONException 读取空串时抛出此异常 + */ + public String nextUnwrapString(char c) throws JSONException { + // 兼容不严格的JSON,如key不被双引号包围的情况 + final StringBuilder sb = new StringBuilder(); + while (isNotTokenChar(c)) { + sb.append(c); + c = next(); + } + if (c != EOF) { + back(); + } + + final String valueString = StrUtil.trim(sb); + if (valueString.isEmpty()) { + throw syntaxError("Missing value, maybe a token"); + } + return valueString; + } + /** * 返回当前位置到指定引号前的所有字符,反斜杠的转义符也会被处理。
* 标准的JSON是不允许使用单引号包含字符串的,但是此实现允许。 @@ -237,47 +312,21 @@ public class JSONTokener extends ReaderWrapper { while (true) { c = this.next(); switch (c) { - case 0: + case EOF: throw this.syntaxError("Unterminated string"); - case '\n': - case '\r': + case CharUtil.LF: + case CharUtil.CR: //throw this.syntaxError("Unterminated string"); // https://gitee.com/dromara/hutool/issues/I76CSU // 兼容非转义符 sb.append(c); break; - case '\\':// 转义符 + case CharUtil.BACKSLASH:// 转义符 c = this.next(); - switch (c) { - case 'b': - sb.append('\b'); - break; - case 't': - sb.append('\t'); - break; - case 'n': - sb.append('\n'); - break; - case 'f': - sb.append('\f'); - break; - case 'r': - sb.append('\r'); - break; - case 'u':// Unicode符 - sb.append(nextUnicode()); - break; - case '"': - case '\'': - case '\\': - case '/': - sb.append(c); - break; - default: - throw this.syntaxError("Illegal escape."); - } + sb.append(getUnescapeChar(c)); break; default: + // 字符串结束 if (c == quote) { return sb.toString(); } @@ -286,80 +335,6 @@ public class JSONTokener extends ReaderWrapper { } } - /** - * 获得从当前位置直到分隔符(不包括分隔符)或行尾的的所有字符。 - * - * @param delimiter 分隔符 - * @return 字符串 - * @throws JSONException JSON异常,包装IO异常 - */ - public String nextTo(final char delimiter) throws JSONException { - final StringBuilder sb = new StringBuilder(); - for (; ; ) { - final char c = this.next(); - if (c == delimiter || c == 0 || c == '\n' || c == '\r') { - if (c != 0) { - this.back(); - } - return sb.toString().trim(); - } - sb.append(c); - } - } - - /** - * Get the text up but not including one of the specified delimiter characters or the end of line, whichever comes first. - * - * @param delimiters A set of delimiter characters. - * @return A string, trimmed. - * @throws JSONException JSON异常,包装IO异常 - */ - public String nextTo(final String delimiters) throws JSONException { - char c; - final StringBuilder sb = new StringBuilder(); - for (; ; ) { - c = this.next(); - if (delimiters.indexOf(c) >= 0 || c == 0 || c == '\n' || c == '\r') { - if (c != 0) { - this.back(); - } - return sb.toString().trim(); - } - sb.append(c); - } - } - - /** - * Skip characters until the next character is the requested character. If the requested character is not found, no characters are skipped. 在遇到指定字符前,跳过其它字符。如果字符未找到,则不跳过任何字符。 - * - * @param to 需要定位的字符 - * @return 定位的字符,如果字符未找到返回0 - * @throws JSONException IO异常 - */ - public char skipTo(final char to) throws JSONException { - char c; - try { - final long startIndex = this.index; - final long startCharacter = this.character; - final long startLine = this.line; - mark(1000000); - do { - c = this.next(); - if (c == 0) { - reset(); - this.index = startIndex; - this.character = startCharacter; - this.line = startLine; - return c; - } - } while (c != to); - } catch (final IOException e) { - throw new JSONException(e); - } - this.back(); - return c; - } - /** * Make a JSONException to signal a syntax error.
* 构建 JSONException 用于表示语法错误 @@ -380,4 +355,44 @@ public class JSONTokener extends ReaderWrapper { public String toString() { return " at " + this.index + " [character " + this.character + " line " + this.line + "]"; } + + /** + * 获取反转义的字符 + * + * @param c 转义的字符,即`\`后的字符 + * @return 反转义字符 + */ + private char getUnescapeChar(final char c) { + switch (c) { + case 'b': + return '\b'; + case 't': + return '\t'; + case 'n': + return '\n'; + case 'f': + return '\f'; + case 'r': + return '\r'; + case 'u':// Unicode符 + return nextUnicode(); + case CharUtil.DOUBLE_QUOTES: + case CharUtil.SINGLE_QUOTE: + case CharUtil.BACKSLASH: + case CharUtil.SLASH: + return c; + default: + throw this.syntaxError("Illegal escape."); + } + } + + /** + * 是否为可见的非Token字符,这些字符存在于JSON的非字符串value中。 + * + * @param c char + * @return 是否为可见的非Token字符 + */ + private static boolean isNotTokenChar(final char c) { + return c >= ' ' && TOKENS.indexOf(c) < 0; + } } diff --git a/hutool-json/src/main/java/org/dromara/hutool/json/convert/JSONConverter.java b/hutool-json/src/main/java/org/dromara/hutool/json/convert/JSONConverter.java index ff4a73786..dc9f310a0 100644 --- a/hutool-json/src/main/java/org/dromara/hutool/json/convert/JSONConverter.java +++ b/hutool-json/src/main/java/org/dromara/hutool/json/convert/JSONConverter.java @@ -183,7 +183,7 @@ public class JSONConverter implements Converter, Serializable { final char firstC = jsonStr.charAt(0); // RFC8259,JSON字符串值、number, boolean, or null final JSONParser jsonParser = JSONParser.of(new JSONTokener(jsonStr), config); - final Object value = jsonParser.nextValue(false); + final Object value = jsonParser.nextValue(); if(jsonParser.getTokener().nextClean() != JSONTokener.EOF){ // 对于用户提供的未转义字符串导致解析未结束,报错 throw new JSONException("JSON format error: {}", jsonStr); diff --git a/hutool-json/src/test/java/org/dromara/hutool/json/JSONTokenerTest.java b/hutool-json/src/test/java/org/dromara/hutool/json/JSONTokenerTest.java index c41241cec..fab01580d 100644 --- a/hutool-json/src/test/java/org/dromara/hutool/json/JSONTokenerTest.java +++ b/hutool-json/src/test/java/org/dromara/hutool/json/JSONTokenerTest.java @@ -12,14 +12,48 @@ package org.dromara.hutool.json; +import org.dromara.hutool.core.io.IoUtil; import org.dromara.hutool.core.io.resource.ResourceUtil; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + public class JSONTokenerTest { @Test - public void parseTest() { + void parseTest() { final JSONObject jsonObject = JSONUtil.parseObj(ResourceUtil.getUtf8Reader("issue1200.json")); - Assertions.assertNotNull(jsonObject); + assertNotNull(jsonObject); + } + + @Test + void nextTest() { + final JSONTokener jsonTokener = new JSONTokener("{\"ab\": \"abc\"}"); + final char c = jsonTokener.nextTokenChar(); + assertEquals('{', c); + assertEquals("ab", jsonTokener.nextString()); + final char c2 = jsonTokener.nextTokenChar(); + assertEquals(':', c2); + assertEquals("abc", jsonTokener.nextString()); + + + IoUtil.closeQuietly(jsonTokener); + } + + /** + * 兼容非包装符包装的value和key + */ + @Test + void nextWithoutWrapperTest() { + final JSONTokener jsonTokener = new JSONTokener("{ab: abc}"); + final char c = jsonTokener.nextTokenChar(); + assertEquals('{', c); + assertEquals("ab", jsonTokener.nextString()); + final char c2 = jsonTokener.nextTokenChar(); + assertEquals(':', c2); + assertEquals("abc", jsonTokener.nextString()); + + + IoUtil.closeQuietly(jsonTokener); } }