From 66b1f72affede091ee9a386c2ef63f5f34b7ae87 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Sat, 13 Jun 2026 21:24:52 -0500 Subject: [PATCH 1/2] Ported the parser.c to the java extension and remove the dependency on ragel. --- Rakefile | 34 +- java/src/json/ext/Parser.java | 954 +++++++ java/src/json/ext/ParserConfig.java | 2537 ----------------- java/src/json/ext/ParserConfig.rl | 861 ------ java/src/json/ext/ParserService.java | 4 +- java/src/json/ext/StringDecoder.java | 13 +- java/src/json/ext/StringScanner.java | 136 + .../src/json/ext/VectorizedStringScanner.java | 72 + 8 files changed, 1177 insertions(+), 3434 deletions(-) create mode 100644 java/src/json/ext/Parser.java delete mode 100644 java/src/json/ext/ParserConfig.java delete mode 100644 java/src/json/ext/ParserConfig.rl create mode 100644 java/src/json/ext/StringScanner.java create mode 100644 java/src/json/ext/VectorizedStringScanner.java diff --git a/Rakefile b/Rakefile index d05b45b79..20f1b3d8c 100644 --- a/Rakefile +++ b/Rakefile @@ -84,8 +84,6 @@ namespace :ci do end JAVA_DIR = "java/src/json/ext" -JAVA_RAGEL_PATH = "#{JAVA_DIR}/ParserConfig.rl" -JAVA_PARSER_SRC = "#{JAVA_DIR}/ParserConfig.java" JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"].exclude("#{JAVA_DIR}/Vectorized*.java") JAVA_VEC_SOURCES = FileList["#{JAVA_DIR}/Vectorized*.java"] JAVA_CLASSES = [] @@ -96,35 +94,6 @@ CLEAN.concat FileList["java/src/**/*.class"] CLEAN << JRUBY_PARSER_JAR CLEAN << JRUBY_GENERATOR_JAR -CLOBBER << JAVA_PARSER_SRC - -which = lambda { |c| - w = `which #{c}` - break w.chomp unless w.empty? -} - -if RUBY_PLATFORM =~ /mingw|mswin/ - # cleans up Windows CI output - RAGEL_CODEGEN = %w[ragel].find(&which) - RAGEL_DOTGEN = %w[ragel].find(&which) -else - RAGEL_CODEGEN = %w[rlcodegen rlgen-cd ragel].find(&which) - RAGEL_DOTGEN = %w[rlgen-dot rlgen-cd ragel].find(&which) -end - -file JAVA_PARSER_SRC => JAVA_RAGEL_PATH do - cd JAVA_DIR do - if RAGEL_CODEGEN == 'ragel' - sh "ragel ParserConfig.rl -J -o ParserConfig.java" - else - sh "ragel -x ParserConfig.rl | #{RAGEL_CODEGEN} -J" - end - end -end - -desc "Generate parser with ragel" -task :ragel => [JAVA_PARSER_SRC] - if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' path_separator = File::PATH_SEPARATOR ENV['JAVA_HOME'] ||= [ @@ -174,7 +143,7 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' end desc "Compiling jruby extension" - task :compile => [:ragel] + JAVA_CLASSES + task :compile => JAVA_CLASSES desc "Package the jruby gem" task :jruby_gem => :create_jar do @@ -199,6 +168,7 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' "json/ext/Parser*.class", "json/ext/RuntimeInfo*.class", "json/ext/StringDecoder*.class", + "json/ext/*StringScanner*.class", "json/ext/Utils*.class" ] sh 'jar', 'cf', File.basename(JRUBY_PARSER_JAR), *parser_classes diff --git a/java/src/json/ext/Parser.java b/java/src/json/ext/Parser.java new file mode 100644 index 000000000..b7f505421 --- /dev/null +++ b/java/src/json/ext/Parser.java @@ -0,0 +1,954 @@ +package json.ext; + +import org.jcodings.Encoding; +import org.jcodings.specific.ASCIIEncoding; +import org.jcodings.specific.UTF8Encoding; +import org.jruby.Ruby; +import org.jruby.RubyArray; +import org.jruby.RubyClass; +import org.jruby.RubyFloat; +import org.jruby.RubyHash; +import org.jruby.RubyObject; +import org.jruby.RubyProc; +import org.jruby.RubyString; +import org.jruby.RubySymbol; +import org.jruby.anno.JRubyMethod; +import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.Block; +import org.jruby.runtime.ObjectAllocator; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.Visibility; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.util.ByteList; +import org.jruby.util.ConvertBytes; +import org.jruby.util.ConvertDouble.DoubleConverter; +import org.jruby.util.ConvertBytes; +import org.jruby.util.ConvertDouble.DoubleConverter; +import org.jruby.util.ConvertBytes; +import org.jruby.util.ConvertDouble.DoubleConverter; +import org.jruby.util.ConvertBytes; +import org.jruby.util.ConvertDouble.DoubleConverter; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Arrays; +import java.util.function.BiFunction; + +import static org.jruby.util.ConvertDouble.DoubleConverter; + +/** + * This is a port of the parser from the C extension. + */ +public class Parser extends RubyObject { + private final RuntimeInfo info; + private int maxNesting; + private boolean allowNaN; + private boolean allowTrailingComma; + private boolean allowComments; + private boolean deprecateComments; + private boolean allowControlCharacters; + private boolean allowInvalidEscape; + private boolean allowDuplicateKey; + private boolean deprecateDuplicateKey; + private boolean symbolizeNames; + private boolean freeze; + private RubyProc onLoadProc; + private RubyClass decimalClass; + BiFunction decimalFactory; + + private static final int DEFAULT_MAX_NESTING = 100; + + // Maximum number of deprecation warnings emitted per parse, to avoid + // flooding output on pathological inputs. + private static final int MAX_DEPRECATIONS = 5; + + // constant names in the JSON module containing those values + private static final String CONST_NAN = "NaN"; + private static final String CONST_INFINITY = "Infinity"; + private static final String CONST_MINUS_INFINITY = "MinusInfinity"; + + static final ObjectAllocator ALLOCATOR = Parser::new; + + public Parser(Ruby runtime, RubyClass metaClass) { + super(runtime, metaClass); + info = RuntimeInfo.forRuntime(runtime); + } + + @JRubyMethod(name = "new", meta = true) + public static IRubyObject newInstance(IRubyObject clazz, IRubyObject arg0, Block block) { + Parser config = (Parser)((RubyClass)clazz).allocate(); + config.callInit(arg0, block); + return config; + } + + @JRubyMethod(name = "new", meta = true) + public static IRubyObject newInstance(IRubyObject clazz, IRubyObject arg0, IRubyObject arg1, Block block) { + Parser config = (Parser)((RubyClass)clazz).allocate(); + config.callInit(arg0, arg1, block); + return config; + } + + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject initialize(ThreadContext context, IRubyObject options) { + checkFrozen(); + Ruby runtime = context.runtime; + + OptionsReader opts = new OptionsReader(context, options); + this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); + this.allowNaN = opts.getBool("allow_nan", false); + if (opts.hasKey("allow_comments")) { + this.allowComments = opts.getBool("allow_comments", false); + this.deprecateComments = false; + } else { + this.allowComments = true; + this.deprecateComments = true; + } + + this.allowControlCharacters = opts.getBool("allow_control_characters", false); + this.allowInvalidEscape = opts.getBool("allow_invalid_escape", false); + this.allowTrailingComma = opts.getBool("allow_trailing_comma", false); + this.symbolizeNames = opts.getBool("symbolize_names", false); + if (opts.hasKey("allow_duplicate_key")) { + this.allowDuplicateKey = opts.getBool("allow_duplicate_key", false); + this.deprecateDuplicateKey = false; + } else { + this.allowDuplicateKey = false; + this.deprecateDuplicateKey = true; + } + + this.freeze = opts.getBool("freeze", false); + this.onLoadProc = opts.getProc("on_load"); + + this.decimalClass = opts.getClass("decimal_class", null); + + if (decimalClass == null) { + this.decimalFactory = this::createFloat; + } else if (decimalClass == runtime.getClass("BigDecimal")) { + this.decimalFactory = this::createBigDecimal; + } else { + this.decimalFactory = this::createCustomDecimal; + } + + return this; + } + + public IRubyObject onLoad(ThreadContext context, IRubyObject object) { + if (onLoadProc == null) { + return object; + } else { + return onLoadProc.call(context, object); + } + } + + /** + * Checks the given string's encoding. If a non-UTF-8 encoding is detected, + * a converted copy is returned. + * Returns the source string if no conversion is needed. + */ + private RubyString convertEncoding(ThreadContext context, RubyString source) { + Encoding encoding = source.getEncoding(); + if (encoding == ASCIIEncoding.INSTANCE) { + source = (RubyString) source.dup(); + source.setEncoding(UTF8Encoding.INSTANCE); + source.clearCodeRange(); + } else if (encoding != UTF8Encoding.INSTANCE) { + source = (RubyString) source.encode(context, context.runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE)); + } + return source; + } + + /** + * Parser#parse() + * + *

Parses the current JSON text source and returns the + * complete data structure as a result. + */ + @JRubyMethod + public IRubyObject parse(ThreadContext context, IRubyObject source) { + return new ParserSession(this, convertEncoding(context, source.convertToString()), info).parse(context); + } + + private RubyFloat createFloat(final ThreadContext context, final ByteList num) { + return RubyFloat.newFloat(context.runtime, new DoubleConverter().parse(num, true, true)); + } + + private IRubyObject createBigDecimal(final ThreadContext context, final ByteList num) { + final Ruby runtime = context.runtime; + return runtime.getKernel().callMethod(context, "BigDecimal", runtime.newString(num)); + } + + private IRubyObject createCustomDecimal(final ThreadContext context, final ByteList num) { + return decimalClass.newInstance(context, context.runtime.newString(num), Block.NULL_BLOCK); + } + + /** + * A single parsing session over one source string. + * + *

Once a ParserSession is instantiated, the source string should not + * change until the parsing is complete. The ParserSession object assumes + * the source {@link RubyString} is still associated to its original + * {@link ByteList}, which in turn must still be bound to the same + * byte[] value (and on the same offset). + */ + private static final class ParserSession { + private static final int INITIAL_FRAME_CAPACITY = 32; + private static final int INITIAL_VALUE_CAPACITY = 64; + + // Integers with fewer than this many digits are built directly from a + // long accumulated during the digit scan; longer ones fall back to + // ConvertBytes (Bignum). 17 digits (< 1e17) always fit a signed long, + // including after negation. Mirrors parser.c's MAX_FAST_INTEGER_SIZE. + private static final int MAX_FAST_INTEGER_SIZE = 18; + + // Same idea as the rvalue_cache in the C extension. + // + // This is bigger than the C implementation as this is + // heap allocated. + private static final int KEY_CACHE_CAPA = 128; + private static final int KEY_CACHE_MAX_ENTRY_LENGTH = 55; + + private enum FrameType { + ROOT(FramePhase.DONE), + ARRAY(FramePhase.ARRAY_COMMA), + OBJECT(FramePhase.OBJECT_COMMA); + + private final FramePhase nextPhase; + + FrameType(FramePhase nextPhase) { + this.nextPhase = nextPhase; + } + + FramePhase nextPhase() { + return nextPhase; + } + } + + private enum FramePhase { + DONE, + ARRAY_COMMA, + OBJECT_COMMA, + VALUE, + OBJECT_KEY, + OBJECT_COLON + } + + private static final class Frame { + FrameType type; + FramePhase phase; + // The position within the value stack when this frame was created. + int valueStackHead; + // The cursor position when we encountered a '{'. + // Used for error message reporting when decoding an JSON object. + int startCursor; + } + + private final Parser config; + private final RuntimeInfo info; + private final ByteList byteList; + private final ByteList view; + private final byte[] data; + + // Little-endian view over {@link #data}, used by the SWAR string scan + // so that {@link Long#numberOfTrailingZeros} locates the first + // interesting" byte within an 8-byte chunk. + private final ByteBuffer chunks; + private final StringScanner scanner; + private final StringDecoder decoder; + private final int begin; + private final int end; + private int cursor; + + private ThreadContext context; + + // Integer value accumulated by the most recent scanDigits() call. + private long digitsValue; + private int currentNesting = 0; + + private int inArray = 0; + + private final IRubyObject[] keyCache = new IRubyObject[KEY_CACHE_CAPA]; + private int keyCacheLength = 0; + private int emittedDeprecations = 0; + + + private Frame[] frameStack = new Frame[INITIAL_FRAME_CAPACITY]; + private int frameDepth = 0; + + private IRubyObject[] valueStack = new IRubyObject[INITIAL_VALUE_CAPACITY]; + private int valueTop = 0; + + private ParserSession(Parser config, RubyString source, RuntimeInfo info) { + this.config = config; + this.info = info; + this.byteList = source.getByteList(); + this.data = byteList.unsafeBytes(); + this.chunks = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN); + this.scanner = StringScanner.getInstance(); + this.view = new ByteList(data, false); + this.begin = byteList.begin(); + this.end = begin + byteList.length(); + this.decoder = new StringDecoder(config.allowControlCharacters, config.allowInvalidEscape); + } + + public IRubyObject parse(ThreadContext context) { + this.context = context; + this.cursor = begin; + pushFrame(FrameType.ROOT, FramePhase.VALUE, 0, -1); + + IRubyObject result = run(); + + // Only trailing whitespace (and comments) may follow the document. + eatWhitespace(); + if (cursor < end) { + throw unexpectedToken(cursor, end); + } + return result; + } + + private IRubyObject run() { + while (true) { + Frame frame = topFrame(); + + switch (frame.phase) { + case DONE: + return valueStack[valueTop - 1]; + + case VALUE: { + eatWhitespace(); + IRubyObject value; + switch (peek()) { + case 'n': + if (matchKeyword("null")) { value = context.nil; break; } + throw unexpectedToken(cursor, end); + case 't': + if (matchKeyword("true")) { value = context.tru; break; } + throw unexpectedToken(cursor, end); + case 'f': + if (matchKeyword("false")) { value = context.fals; break; } + throw unexpectedToken(cursor, end); + case 'N': + if (config.allowNaN && matchKeyword("NaN")) { + value = getConstant(CONST_NAN); + break; + } + throw unexpectedToken(cursor, end); + case 'I': + if (config.allowNaN && matchKeyword("Infinity")) { + value = getConstant(CONST_INFINITY); + break; + } + throw unexpectedToken(cursor, end); + case '-': + cursor++; + if (config.allowNaN && matchKeyword("Infinity")) { + value = getConstant(CONST_MINUS_INFINITY); + } else { + value = parseNumber(cursor - 1); + } + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + value = parseNumber(cursor); + break; + case '"': + value = parseString(false); + break; + case '[': { + cursor++; + eatWhitespace(); + if (peek() == ']') { + cursor++; + value = decodeArray(0); + break; + } + currentNesting++; + checkNesting(); + inArray++; + // Phase stays VALUE: the next iteration reads + // the first element. + pushFrame(FrameType.ARRAY, FramePhase.VALUE, valueTop, -1); + continue; + } + case '{': { + int objectStart = cursor; + cursor++; + eatWhitespace(); + if (peek() == '}') { + cursor++; + value = decodeObject(0); + break; + } + currentNesting++; + checkNesting(); + // Phase KEY: the next iteration reads the first key. + pushFrame(FrameType.OBJECT, FramePhase.OBJECT_KEY, valueTop, objectStart); + continue; + } + case 0: + throw newException(Utils.M_PARSER_ERROR, "unexpected end of input"); + default: + throw unexpectedToken(cursor, end); + } + + pushValue(value); + valueCompleted(frame); + continue; + } + + case OBJECT_KEY: { + eatWhitespace(); + if (peek() == '"') { + pushValue(parseString(true)); + frame.phase = FramePhase.OBJECT_COLON; + continue; + } + throw unexpectedToken(cursor, end); + } + + case OBJECT_COLON: { + eatWhitespace(); + if (peek() == ':') { + cursor++; + frame.phase = FramePhase.VALUE; + continue; + } + throw unexpectedToken(cursor, end); + } + + case ARRAY_COMMA: { + eatWhitespace(); + byte b = peek(); + if (b == ',') { + cursor++; + if (config.allowTrailingComma) { + eatWhitespace(); + if (peek() == ']') { + // Trailing comma: re-enter COMMA to close. + continue; + } + } + frame.phase = FramePhase.VALUE; + continue; + } else if (b == ']') { + cursor++; + int count = entryCount(frame); + currentNesting--; + inArray--; + popFrame(); + pushValue(decodeArray(count)); + valueCompleted(topFrame()); + continue; + } + throw unexpectedToken(cursor, end); + } + + case OBJECT_COMMA: { + eatWhitespace(); + byte b = peek(); + if (b == ',') { + cursor++; + if (config.allowTrailingComma) { + eatWhitespace(); + if (peek() == '}') { + // Trailing comma: re-enter COMMA to close. + continue; + } + } + frame.phase = FramePhase.OBJECT_KEY; + continue; + } else if (b == '}') { + cursor++; + currentNesting--; + int count = entryCount(frame); + // Temporarily rewind the cursor so a duplicate-key + // error points at the object's opening brace. + int finalCursor = cursor; + cursor = frame.startCursor; + IRubyObject object = decodeObject(count); + cursor = finalCursor; + popFrame(); + pushValue(object); + valueCompleted(topFrame()); + continue; + } + throw unexpectedToken(cursor, end); + } + + default: + throw context.runtime.newRuntimeError("unreachable parser state"); + } + } + } + + private void pushValue(IRubyObject value) { + if (valueTop == valueStack.length) { + valueStack = Arrays.copyOf(valueStack, valueStack.length * 2); + } + valueStack[valueTop++] = config.onLoad(context, value); + } + + private void valueCompleted(Frame frame) { + frame.phase = frame.type.nextPhase(); + } + + private int entryCount(Frame frame) { + return valueTop - frame.valueStackHead; + } + + private Frame topFrame() { + return frameStack[frameDepth - 1]; + } + + private void pushFrame(FrameType type, FramePhase phase, + int valueStackHead, int startCursor) { + if (frameDepth == frameStack.length) { + frameStack = Arrays.copyOf(frameStack, frameStack.length * 2); + } + Frame frame = frameStack[frameDepth]; + if (frame == null) { + frame = new Frame(); + frameStack[frameDepth] = frame; + } + frame.type = type; + frame.phase = phase; + frame.valueStackHead = valueStackHead; + frame.startCursor = startCursor; + frameDepth++; + } + + private void popFrame() { + frameDepth--; + } + + private void checkNesting() { + if (config.maxNesting > 0 && currentNesting > config.maxNesting) { + throw newException(Utils.M_NESTING_ERROR, + "nesting of " + currentNesting + " is too deep"); + } + } + + private IRubyObject decodeArray(int count) { + int base = valueTop - count; + IRubyObject[] elements = new IRubyObject[count]; + System.arraycopy(valueStack, base, elements, 0, count); + valueTop = base; + RubyArray array = RubyArray.newArrayNoCopy(context.runtime, elements); + if (config.freeze) { + array.setFrozen(true); + } + return array; + } + + private IRubyObject decodeObject(int count) { + final Ruby runtime = context.runtime; + int base = valueTop - count; + int limit = valueTop; + RubyHash hash = RubyHash.newHash(runtime); + for (int i = base; i < limit; i += 2) { + // We use RubyHash#fastASet because all object keys have already been + // frozen and deduplicated. + // + // This was significantly faster than RubyHash#op_aset. + hash.fastASet(valueStack[i], valueStack[i + 1]); + } + valueTop = base; + + if (!config.allowDuplicateKey && hash.size() < count / 2) { + onDuplicateKey(findDuplicateKey(base, limit)); + } + + if (config.freeze) { + hash.setFrozen(true); + } + return hash; + } + + private IRubyObject findDuplicateKey(int base, int limit) { + RubyHash seen = RubyHash.newHash(context.runtime); + for (int i = base; i < limit; i += 2) { + int before = seen.size(); + IRubyObject key = valueStack[i]; + seen.fastASetCheckString(context.runtime, key, context.tru); + if (seen.size() == before) { + return key; + } + } + return context.nil; + } + + private void onDuplicateKey(IRubyObject key) { + // Symbol keys are reported by their string form (":a" -> "a") to + // match the C parser's message. + String keyInspect = key.callMethod(context, "to_s") + .callMethod(context, "inspect").asJavaString(); + if (config.deprecateDuplicateKey) { + if (emittedDeprecations < MAX_DEPRECATIONS) { + emittedDeprecations++; + context.runtime.getWarnings().warning( + "detected duplicate key " + keyInspect + " in JSON object. " + + "This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`"); + } + } else { + throw newException(Utils.M_PARSER_ERROR, "duplicate key " + keyInspect); + } + } + + private IRubyObject parseNumber(int numberStart) { + byte first = cursor < end ? data[cursor] : 0; + boolean negative = data[numberStart] == '-'; + + int intStart = cursor; + int p = scanDigits(intStart); + int intDigits = p - intStart; + long mantissa = digitsValue; + + if ((first == '0' && intDigits > 1) || intDigits == 0) { + cursor = p; + throw unexpectedToken(numberStart, end); + } + + boolean integer = true; + + if (p < end && data[p] == '.') { + integer = false; + p++; + int fracStart = p; + p = scanDigits(fracStart); + if (p == fracStart) { + cursor = p; + throw unexpectedToken(numberStart, end); + } + } + + if (p < end && (data[p] == 'e' || data[p] == 'E')) { + integer = false; + p++; + if (p < end && (data[p] == '+' || data[p] == '-')) p++; + int expStart = p; + p = scanDigits(expStart); + if (p == expStart) { + cursor = p; + throw unexpectedToken(numberStart, end); + } + } + + cursor = p; + + if (integer) { + if (intDigits < MAX_FAST_INTEGER_SIZE) { + return context.runtime.newFixnum(negative ? -mantissa : mantissa); + } + return ConvertBytes.byteListToInum(context.runtime, absSubSequence(numberStart, p), 10, true); + } + return config.decimalFactory.apply(context, absSubSequence(numberStart, p)); + } + + private int scanDigits(int p) { + long acc = 0; + while (p + 8 <= end) { + long next8 = chunks.getLong(p); + // Branchless all-eight-are-digits test (simdjson / 0x80.pl): + // each nibble pair resolves to 0x3 iff the byte is '0'..'9'. + long match = (next8 & 0xF0F0F0F0F0F0F0F0L) + | (((next8 + 0x0606060606060606L) & 0xF0F0F0F0F0F0F0F0L) >>> 4); + if (match == 0x3333333333333333L) { + acc = acc * 100000000L + decode8digits(next8); + p += 8; + continue; + } + int consecutive = Long.numberOfTrailingZeros(match ^ 0x3333333333333333L) >>> 3; + if (consecutive >= 4) { + acc = acc * 10000L + decode4digits((int) next8); + p += 4; + consecutive -= 4; + } + while (consecutive > 0) { + acc = acc * 10 + (data[p] - '0'); + p++; + consecutive--; + } + digitsValue = acc; + return p; + } + while (p < end && data[p] >= '0' && data[p] <= '9') { + acc = acc * 10 + (data[p] - '0'); + p++; + } + digitsValue = acc; + return p; + } + + // Decode eight packed little-endian ASCII digits to their integer value. + // From https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/ + private static long decode8digits(long val) { + final long mask = 0x000000FF000000FFL; + final long mul1 = 0x000F424000000064L; // 100 + (1000000 << 32) + final long mul2 = 0x0000271000000001L; // 1 + (10000 << 32) + val -= 0x3030303030303030L; + val = (val * 10) + (val >>> 8); + val = (((val & mask) * mul1) + (((val >>> 16) & mask) * mul2)) >>> 32; + return val; + } + + // Decode four packed little-endian ASCII digits to their integer value. + private static long decode4digits(int val) { + val -= 0x30303030; + val = (val * 10) + (val >>> 8); + return ((val & 0xFF) * 100) + ((val >>> 16) & 0xFF); + } + + private IRubyObject parseString(boolean isName) { + final byte[] data = this.data; + final int contentStart = cursor + 1; // skip opening quote + + // The scanner finds the closing quote and reports whether the body + // is plain printable ASCII (no escape, no ASCII control character, + // no non-ASCII byte). Anything non-plain is handed to StringDecoder, + // which performs the UTF-8/control validation, escape expansion, and + // error reporting. + long scanned = scanner.scan(data, chunks, contentStart, end); + final int q = (int) scanned; + if (q < 0) { + throw newException(Utils.M_PARSER_ERROR, + "unexpected end of input, expected closing \""); + } + + boolean plain = (scanned & StringScanner.PLAIN_BIT) != 0; + cursor = q + 1; // past closing quote + + // Note: When running multiple read-world benchmarks in the same JVM, + // this seems consistently faster than "if (isName && plain)" + // and only handling the ASCII-only path in the cache. + // + // Note 2: It's important that all object keys are frozen and deduplicated, + // decodeObject relies this. + if (isName) { + // Resolve the key's decoded bytes without building a RubyString + // yet, so a cache hit skips allocation entirely. + byte[] buf; + int off; + int len; + if (plain) { + buf = data; + off = contentStart; + len = q - contentStart; + } else { + ByteList decoded = decoder.decode(context, byteList, + contentStart - begin, q - begin); + buf = decoded.getUnsafeBytes(); + off = decoded.begin(); + len = decoded.realSize(); + } + // Same as the C extension. + if (inArray > 0 && len > 0 && len <= KEY_CACHE_MAX_ENTRY_LENGTH && isLetter(buf[off])) { + return cachedKey(buf, off, len); + } + return internedKey(buf, off, len); + } + + ByteList content; + if (plain) { + // Plain ASCII string, skip the decoder. + content = new ByteList(data, contentStart, q - contentStart, true); + } else { + content = decoder.decode(context, byteList, + contentStart - begin, q - begin); + } + + RubyString string = context.runtime.newString(content); + string.setEncoding(UTF8Encoding.INSTANCE); + string.clearCodeRange(); + + if (config.freeze) { + string.setFrozen(true); + return context.runtime.freezeAndDedupString(string); + } + + return string; + } + + private static boolean isLetter(byte b) { + return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z'); + } + + private IRubyObject internedKey(byte[] buf, int off, int len) { + RubyString string = context.runtime.newString( + new ByteList(buf, off, len, UTF8Encoding.INSTANCE, true)); + if (config.symbolizeNames) { + return string.intern(); + } + string.setFrozen(true); + return context.runtime.freezeAndDedupString(string); + } + + private IRubyObject cachedKey(byte[] buf, int off, int len) { + int low = 0; + int high = keyCacheLength - 1; + while (low <= high) { + int mid = (low + high) >>> 1; + int cmp = compareKey(buf, off, len, keyCache[mid]); + if (cmp == 0) { + return keyCache[mid]; + } else if (cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + IRubyObject key = internedKey(buf, off, len); + if (keyCacheLength < KEY_CACHE_CAPA) { + System.arraycopy(keyCache, low, keyCache, low + 1, keyCacheLength - low); + keyCache[low] = key; + keyCacheLength++; + } + return key; + } + + // Orders by length first, then unsigned byte value + private static int compareKey(byte[] buf, int off, int len, IRubyObject entry) { + ByteList eb = entry instanceof RubySymbol + ? ((RubySymbol) entry).getBytes() + : ((RubyString) entry).getByteList(); + int elen = eb.realSize(); + if (len != elen) { + return len - elen; + } + byte[] ebuf = eb.getUnsafeBytes(); + int ebeg = eb.begin(); + for (int i = 0; i < len; i++) { + int cmp = (buf[off + i] & 0xFF) - (ebuf[ebeg + i] & 0xFF); + if (cmp != 0) { + return cmp; + } + } + return 0; + } + + private byte peek() { + return cursor < end ? data[cursor] : 0; + } + + private boolean matchKeyword(String keyword) { + int len = keyword.length(); + if (end - cursor < len) { + return false; + } + for (int i = 0; i < len; i++) { + if (data[cursor + i] != (byte) keyword.charAt(i)) { + return false; + } + } + cursor += len; + return true; + } + + private void eatWhitespace() { + while (cursor < end) { + switch (data[cursor]) { + case ' ': + case '\t': + case '\r': + cursor++; + break; + case '\n': + cursor++; + // Same heuristic from the C parser: a newline in + // pretty-printed JSON is almost always followed by a run + // of indentation spaces, so skip them eight at a time. + while (cursor + 8 <= end) { + long x = chunks.getLong(cursor); + if (x == 0x2020202020202020L) { + cursor += 8; + } else { + cursor += Long.numberOfTrailingZeros( + x ^ 0x2020202020202020L) >>> 3; + break; + } + } + break; + case '/': + eatComments(); + break; + default: + return; + } + } + } + + private void eatComments() { + if (!config.allowComments) { + if (config.deprecateComments) { + if (emittedDeprecations < MAX_DEPRECATIONS) { + emittedDeprecations++; + context.runtime.getWarnings().warning( + "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`"); + } + } else { + throw unexpectedToken(cursor, end); + } + } + + int start = cursor; + cursor++; // skip '/' + switch (peek()) { + case '/': + cursor++; + while (cursor < end && data[cursor] != '\n') cursor++; + if (cursor < end) cursor++; // consume newline + break; + case '*': + cursor++; + while (true) { + while (cursor < end && data[cursor] != '*') cursor++; + if (cursor >= end) { + throw newException(Utils.M_PARSER_ERROR, + "unterminated comment, expected closing '*/'"); + } + cursor++; // past '*' + if (peek() == '/') { + cursor++; + break; + } + } + break; + default: + throw unexpectedToken(start, end); + } + } + + /** + * Updates the "view" ByteList with the new offsets and returns it. The + * returned ByteList must be consumed before the next call, since the + * same instance is reused. + */ + private ByteList absSubSequence(int absStart, int absEnd) { + view.setBegin(absStart); + view.setRealSize(absEnd - absStart); + return view; + } + + private IRubyObject getConstant(String name) { + return info.jsonModule.get().getConstant(name); + } + + private RaiseException parsingError(int absStart, int absEnd) { + RubyString msg = context.runtime.newString("unexpected token at '") + .cat(data, absStart, Math.min(absEnd - absStart, 32)) + .cat((byte)'\''); + return newException(Utils.M_PARSER_ERROR, msg); + } + + private RaiseException unexpectedToken(int absStart, int absEnd) { + return parsingError(absStart, absEnd); + } + + private RaiseException newException(String className, String message) { + return Utils.newException(context, className, message); + } + + private RaiseException newException(String className, RubyString message) { + return Utils.newException(context, className, message); + } + } +} diff --git a/java/src/json/ext/ParserConfig.java b/java/src/json/ext/ParserConfig.java deleted file mode 100644 index 562c0ab61..000000000 --- a/java/src/json/ext/ParserConfig.java +++ /dev/null @@ -1,2537 +0,0 @@ - -// line 1 "ParserConfig.rl" -/* - * This code is copyrighted work by Daniel Luz . - * - * Distributed under the Ruby license: https://www.ruby-lang.org/en/about/license.txt - */ -package json.ext; - -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; -import org.jruby.Ruby; -import org.jruby.RubyArray; -import org.jruby.RubyClass; -import org.jruby.RubyFloat; -import org.jruby.RubyHash; -import org.jruby.RubyInteger; -import org.jruby.RubyObject; -import org.jruby.RubyProc; -import org.jruby.RubyString; -import org.jruby.anno.JRubyMethod; -import org.jruby.exceptions.JumpException; -import org.jruby.exceptions.RaiseException; -import org.jruby.runtime.Block; -import org.jruby.runtime.Helpers; -import org.jruby.runtime.ObjectAllocator; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.Visibility; -import org.jruby.runtime.builtin.IRubyObject; -import org.jruby.util.ByteList; -import org.jruby.util.ConvertBytes; - -import java.util.function.BiFunction; - -import static org.jruby.util.ConvertDouble.DoubleConverter; - -/** - * The JSON::Ext::Parser class. - * - *

This is the JSON parser implemented as a Java class. To use it as the - * standard parser, set - *

JSON.parser = JSON::Ext::Parser
- * This is performed for you when you include "json/ext". - * - *

This class does not perform the actual parsing, just acts as an interface - * to Ruby code. When the {@link #parse(ThreadContext)} method is invoked, a - * ParserConfig.ParserSession object is instantiated, which handles the process. - * - * @author mernen - */ -public class ParserConfig extends RubyObject { - private final RuntimeInfo info; - private int maxNesting; - private boolean allowNaN; - private boolean allowTrailingComma; - private boolean allowComments; - private boolean deprecateComments; - private boolean allowControlCharacters; - private boolean allowInvalidEscape; - private boolean allowDuplicateKey; - private boolean deprecateDuplicateKey; - private boolean symbolizeNames; - private boolean freeze; - private RubyProc onLoadProc; - private RubyClass decimalClass; - BiFunction decimalFactory; - private RubyHash match_string; - - private static final int DEFAULT_MAX_NESTING = 100; - - private static final ByteList JSON_MINUS_INFINITY = new ByteList(ByteList.plain("-Infinity")); - // constant names in the JSON module containing those values - private static final String CONST_NAN = "NaN"; - private static final String CONST_INFINITY = "Infinity"; - private static final String CONST_MINUS_INFINITY = "MinusInfinity"; - - static final ObjectAllocator ALLOCATOR = ParserConfig::new; - - /** - * Multiple-value return for internal parser methods. - * - *

All the parseStuff methods return instances of - * ParserResult when successful, or null when - * there's a problem with the input data. - */ - static final class ParserResult { - /** - * The result of the successful parsing. Should never be - * null. - */ - IRubyObject result; - /** - * The point where the parser returned. - */ - int p; - - void update(IRubyObject result, int p) { - this.result = result; - this.p = p; - } - } - - public ParserConfig(Ruby runtime, RubyClass metaClass) { - super(runtime, metaClass); - info = RuntimeInfo.forRuntime(runtime); - } - - /** - * ParserConfig.new(source, opts = {}) - * - *

Creates a new JSON::Ext::Parser instance for the string - * source. - * It will be configured by the opts Hash. - * opts can have the following keys: - *

- *

- *
:max_nesting - *
The maximum depth of nesting allowed in the parsed data - * structures. Disable depth checking with :max_nesting => false|nil|0, - * it defaults to 100. - *

- *

:allow_nan - *
If set to true, allow NaN, - * Infinity and -Infinity in defiance of RFC 4627 - * to be parsed by the Parser. This option defaults to false. - *

- *

:allow_trailing_comma - *
If set to true, allow arrays and objects with a trailing - * comma in defiance of RFC 4627 to be parsed by the Parser. - * This option defaults to false. - *

- *

:symbolize_names - *
If set to true, returns symbols for the names (keys) in - * a JSON object. Otherwise strings are returned, which is also the default. - *

- *

:create_additions - *
If set to false, the Parser doesn't create additions - * even if a matching class and create_id was found. This option - * defaults to true. - *

- *

:object_class - *
Defaults to Hash. If another type is provided, it will be used - * instead of Hash to represent JSON objects. The type must respond to - * new without arguments, and return an object that respond to []=. - *

- *

:array_class - *
Defaults to Array. If another type is provided, it will be used - * instead of Hash to represent JSON arrays. The type must respond to - * new without arguments, and return an object that respond to <<. - *

- *

:decimal_class - *
Specifies which class to use instead of the default (Float) when - * parsing decimal numbers. This class must accept a single string argument - * in its constructor. - *
- */ - - @JRubyMethod(name = "new", meta = true) - public static IRubyObject newInstance(IRubyObject clazz, IRubyObject arg0, Block block) { - ParserConfig config = (ParserConfig)((RubyClass)clazz).allocate(); - - config.callInit(arg0, block); - - return config; - } - - @JRubyMethod(name = "new", meta = true) - public static IRubyObject newInstance(IRubyObject clazz, IRubyObject arg0, IRubyObject arg1, Block block) { - ParserConfig config = (ParserConfig)((RubyClass)clazz).allocate(); - - config.callInit(arg0, arg1, block); - - return config; - } - - @JRubyMethod(visibility = Visibility.PRIVATE) - public IRubyObject initialize(ThreadContext context, IRubyObject options) { - checkFrozen(); - Ruby runtime = context.runtime; - - OptionsReader opts = new OptionsReader(context, options); - this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); - this.allowNaN = opts.getBool("allow_nan", false); - if (opts.hasKey("allow_comments")) { - this.allowComments = opts.getBool("allow_comments", false); - this.deprecateComments = false; - } else { - this.allowComments = true; - this.deprecateComments = true; - } - - this.allowControlCharacters = opts.getBool("allow_control_characters", false); - this.allowInvalidEscape = opts.getBool("allow_invalid_escape", false); - this.allowTrailingComma = opts.getBool("allow_trailing_comma", false); - this.symbolizeNames = opts.getBool("symbolize_names", false); - if (opts.hasKey("allow_duplicate_key")) { - this.allowDuplicateKey = opts.getBool("allow_duplicate_key", false); - this.deprecateDuplicateKey = false; - } else { - this.allowDuplicateKey = false; - this.deprecateDuplicateKey = true; - } - - this.freeze = opts.getBool("freeze", false); - this.onLoadProc = opts.getProc("on_load"); - - this.decimalClass = opts.getClass("decimal_class", null); - - if (decimalClass == null) { - this.decimalFactory = this::createFloat; - } else if (decimalClass == runtime.getClass("BigDecimal")) { - this.decimalFactory = this::createBigDecimal; - } else { - this.decimalFactory = this::createCustomDecimal; - } - - return this; - } - - public IRubyObject onLoad(ThreadContext context, IRubyObject object) { - if (onLoadProc == null) { - return object; - } else { - return onLoadProc.call(context, object); - } - } - - /** - * Checks the given string's encoding. If a non-UTF-8 encoding is detected, - * a converted copy is returned. - * Returns the source string if no conversion is needed. - */ - private RubyString convertEncoding(ThreadContext context, RubyString source) { - Encoding encoding = source.getEncoding(); - if (encoding == ASCIIEncoding.INSTANCE) { - source = (RubyString) source.dup(); - source.setEncoding(UTF8Encoding.INSTANCE); - source.clearCodeRange(); - } else if (encoding != UTF8Encoding.INSTANCE) { - source = (RubyString) source.encode(context, context.runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE)); - } - return source; - } - - /** - * Parser#parse() - * - *

Parses the current JSON text source and returns the - * complete data structure as a result. - */ - @JRubyMethod - public IRubyObject parse(ThreadContext context, IRubyObject source) { - return new ParserSession(this, convertEncoding(context, source.convertToString()), context, info).parse(context); - } - - /** - * Queries JSON.create_id. Returns null if it is - * set to nil or false, and a String if not. - */ - private RubyString getCreateId(ThreadContext context) { - IRubyObject v = info.jsonModule.get().callMethod(context, "create_id"); - return v.isTrue() ? v.convertToString() : null; - } - - private RubyFloat createFloat(final ThreadContext context, final ByteList num) { - return RubyFloat.newFloat(context.runtime, new DoubleConverter().parse(num, true, true)); - } - - private IRubyObject createBigDecimal(final ThreadContext context, final ByteList num) { - final Ruby runtime = context.runtime; - return runtime.getKernel().callMethod(context, "BigDecimal", runtime.newString(num)); - } - - private IRubyObject createCustomDecimal(final ThreadContext context, final ByteList num) { - return decimalClass.newInstance(context, context.runtime.newString(num), Block.NULL_BLOCK); - } - - /** - * A string parsing session. - * - *

Once a ParserSession is instantiated, the source string should not - * change until the parsing is complete. The ParserSession object assumes - * the source {@link RubyString} is still associated to its original - * {@link ByteList}, which in turn must still be bound to the same - * byte[] value (and on the same offset). - */ - // Ragel uses lots of fall-through - @SuppressWarnings("fallthrough") - private static class ParserSession { - private final ParserConfig config; - private final RuntimeInfo info; - private final ByteList byteList; - private final ByteList view; - private final byte[] data; - private final StringDecoder decoder; - private int currentNesting = 0; - private int emittedDeprecations = 0; - - private ParserSession(ParserConfig config, RubyString source, ThreadContext context, RuntimeInfo info) { - this.config = config; - this.info = info; - this.byteList = source.getByteList(); - this.data = byteList.unsafeBytes(); - this.view = new ByteList(data, false); - this.decoder = new StringDecoder(config.allowControlCharacters, config.allowInvalidEscape); - } - - private RaiseException parsingError(ThreadContext context, String message, int absStart, int absEnd) { - RubyString msg = context.runtime.newString("unexpected token at '") - .cat(data, absStart, Math.min(absEnd - absStart, 32)) - .cat((byte)'\''); - return newException(context, Utils.M_PARSER_ERROR, msg); - } - - private RaiseException unexpectedToken(ThreadContext context, int absStart, int absEnd) { - return parsingError(context, "unexpected token at '", absStart, absEnd); - } - - -// line 360 "ParserConfig.rl" - - - -// line 326 "ParserConfig.java" -private static byte[] init__JSON_value_actions_0() -{ - return new byte [] { - 0, 1, 0, 1, 1, 1, 2, 1, 3, 1, 4, 1, - 5, 1, 6, 1, 7, 1, 8, 1, 9 - }; -} - -private static final byte _JSON_value_actions[] = init__JSON_value_actions_0(); - - -private static byte[] init__JSON_value_key_offsets_0() -{ - return new byte [] { - 0, 0, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 - }; -} - -private static final byte _JSON_value_key_offsets[] = init__JSON_value_key_offsets_0(); - - -private static char[] init__JSON_value_trans_keys_0() -{ - return new char [] { - 34, 45, 73, 78, 91, 102, 110, 116, 123, 48, 57, 110, - 102, 105, 110, 105, 116, 121, 97, 78, 97, 108, 115, 101, - 117, 108, 108, 114, 117, 101, 0 - }; -} - -private static final char _JSON_value_trans_keys[] = init__JSON_value_trans_keys_0(); - - -private static byte[] init__JSON_value_single_lengths_0() -{ - return new byte [] { - 0, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 - }; -} - -private static final byte _JSON_value_single_lengths[] = init__JSON_value_single_lengths_0(); - - -private static byte[] init__JSON_value_range_lengths_0() -{ - return new byte [] { - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; -} - -private static final byte _JSON_value_range_lengths[] = init__JSON_value_range_lengths_0(); - - -private static byte[] init__JSON_value_index_offsets_0() -{ - return new byte [] { - 0, 0, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, - 31, 33, 35, 37, 39, 41, 43, 45, 47, 49 - }; -} - -private static final byte _JSON_value_index_offsets[] = init__JSON_value_index_offsets_0(); - - -private static byte[] init__JSON_value_trans_targs_0() -{ - return new byte [] { - 21, 21, 2, 9, 21, 11, 15, 18, 21, 21, 0, 3, - 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 0, 21, - 0, 10, 0, 21, 0, 12, 0, 13, 0, 14, 0, 21, - 0, 16, 0, 17, 0, 21, 0, 19, 0, 20, 0, 21, - 0, 0, 0 - }; -} - -private static final byte _JSON_value_trans_targs[] = init__JSON_value_trans_targs_0(); - - -private static byte[] init__JSON_value_trans_actions_0() -{ - return new byte [] { - 13, 11, 0, 0, 15, 0, 0, 0, 17, 11, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, - 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 3, - 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 5, - 0, 0, 0 - }; -} - -private static final byte _JSON_value_trans_actions[] = init__JSON_value_trans_actions_0(); - - -private static byte[] init__JSON_value_from_state_actions_0() -{ - return new byte [] { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 19 - }; -} - -private static final byte _JSON_value_from_state_actions[] = init__JSON_value_from_state_actions_0(); - - -static final int JSON_value_start = 1; -static final int JSON_value_first_final = 21; -static final int JSON_value_error = 0; - -static final int JSON_value_en_main = 1; - - -// line 466 "ParserConfig.rl" - - - void parseValue(ThreadContext context, ParserResult res, int p, int pe) { - int cs; - IRubyObject result = null; - - -// line 448 "ParserConfig.java" - { - cs = JSON_value_start; - } - -// line 473 "ParserConfig.rl" - -// line 455 "ParserConfig.java" - { - int _klen; - int _trans = 0; - int _acts; - int _nacts; - int _keys; - int _goto_targ = 0; - - _goto: while (true) { - switch ( _goto_targ ) { - case 0: - if ( p == pe ) { - _goto_targ = 4; - continue _goto; - } - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } -case 1: - _acts = _JSON_value_from_state_actions[cs]; - _nacts = (int) _JSON_value_actions[_acts++]; - while ( _nacts-- > 0 ) { - switch ( _JSON_value_actions[_acts++] ) { - case 9: -// line 451 "ParserConfig.rl" - { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } - break; -// line 487 "ParserConfig.java" - } - } - - _match: do { - _keys = _JSON_value_key_offsets[cs]; - _trans = _JSON_value_index_offsets[cs]; - _klen = _JSON_value_single_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + _klen - 1; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( data[p] < _JSON_value_trans_keys[_mid] ) - _upper = _mid - 1; - else if ( data[p] > _JSON_value_trans_keys[_mid] ) - _lower = _mid + 1; - else { - _trans += (_mid - _keys); - break _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = _JSON_value_range_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + (_klen<<1) - 2; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( data[p] < _JSON_value_trans_keys[_mid] ) - _upper = _mid - 2; - else if ( data[p] > _JSON_value_trans_keys[_mid+1] ) - _lower = _mid + 2; - else { - _trans += ((_mid - _keys)>>1); - break _match; - } - } - _trans += _klen; - } - } while (false); - - cs = _JSON_value_trans_targs[_trans]; - - if ( _JSON_value_trans_actions[_trans] != 0 ) { - _acts = _JSON_value_trans_actions[_trans]; - _nacts = (int) _JSON_value_actions[_acts++]; - while ( _nacts-- > 0 ) - { - switch ( _JSON_value_actions[_acts++] ) - { - case 0: -// line 368 "ParserConfig.rl" - { - result = context.nil; - } - break; - case 1: -// line 371 "ParserConfig.rl" - { - result = context.fals; - } - break; - case 2: -// line 374 "ParserConfig.rl" - { - result = context.tru; - } - break; - case 3: -// line 377 "ParserConfig.rl" - { - if (config.allowNaN) { - result = getConstant(CONST_NAN); - } else { - throw unexpectedToken(context, p - 2, pe); - } - } - break; - case 4: -// line 384 "ParserConfig.rl" - { - if (config.allowNaN) { - result = getConstant(CONST_INFINITY); - } else { - throw unexpectedToken(context, p - 7, pe); - } - } - break; - case 5: -// line 391 "ParserConfig.rl" - { - if (pe > p + 8 && - absSubSequence(p, p + 9).equals(JSON_MINUS_INFINITY)) { - - if (config.allowNaN) { - result = getConstant(CONST_MINUS_INFINITY); - {p = (( p + 10))-1;} - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } else { - throw unexpectedToken(context, p, pe); - } - } - parseFloat(context, res, p, pe); - if (res.result != null) { - result = res.result; - {p = (( res.p))-1;} - } - parseInteger(context, res, p, pe); - if (res.result != null) { - result = res.result; - {p = (( res.p))-1;} - } - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } - break; - case 6: -// line 417 "ParserConfig.rl" - { - parseString(context, res, p, pe); - if (res.result == null) { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } else { - result = res.result; - {p = (( res.p))-1;} - } - } - break; - case 7: -// line 427 "ParserConfig.rl" - { - currentNesting++; - parseArray(context, res, p, pe); - currentNesting--; - if (res.result == null) { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } else { - result = res.result; - {p = (( res.p))-1;} - } - } - break; - case 8: -// line 439 "ParserConfig.rl" - { - currentNesting++; - parseObject(context, res, p, pe); - currentNesting--; - if (res.result == null) { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } else { - result = res.result; - {p = (( res.p))-1;} - } - } - break; -// line 659 "ParserConfig.java" - } - } - } - -case 2: - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } - if ( ++p != pe ) { - _goto_targ = 1; - continue _goto; - } -case 4: -case 5: - } - break; } - } - -// line 474 "ParserConfig.rl" - - if (cs >= JSON_value_first_final && result != null) { - if (config.freeze) { - result.setFrozen(true); - } - res.update(result, p); - } else { - res.update(null, p); - } - } - - -// line 692 "ParserConfig.java" -private static byte[] init__JSON_integer_actions_0() -{ - return new byte [] { - 0, 1, 0 - }; -} - -private static final byte _JSON_integer_actions[] = init__JSON_integer_actions_0(); - - -private static byte[] init__JSON_integer_key_offsets_0() -{ - return new byte [] { - 0, 0, 4, 7, 9, 9 - }; -} - -private static final byte _JSON_integer_key_offsets[] = init__JSON_integer_key_offsets_0(); - - -private static char[] init__JSON_integer_trans_keys_0() -{ - return new char [] { - 45, 48, 49, 57, 48, 49, 57, 48, 57, 48, 57, 0 - }; -} - -private static final char _JSON_integer_trans_keys[] = init__JSON_integer_trans_keys_0(); - - -private static byte[] init__JSON_integer_single_lengths_0() -{ - return new byte [] { - 0, 2, 1, 0, 0, 0 - }; -} - -private static final byte _JSON_integer_single_lengths[] = init__JSON_integer_single_lengths_0(); - - -private static byte[] init__JSON_integer_range_lengths_0() -{ - return new byte [] { - 0, 1, 1, 1, 0, 1 - }; -} - -private static final byte _JSON_integer_range_lengths[] = init__JSON_integer_range_lengths_0(); - - -private static byte[] init__JSON_integer_index_offsets_0() -{ - return new byte [] { - 0, 0, 4, 7, 9, 10 - }; -} - -private static final byte _JSON_integer_index_offsets[] = init__JSON_integer_index_offsets_0(); - - -private static byte[] init__JSON_integer_indicies_0() -{ - return new byte [] { - 0, 2, 3, 1, 2, 3, 1, 1, 4, 1, 3, 4, - 0 - }; -} - -private static final byte _JSON_integer_indicies[] = init__JSON_integer_indicies_0(); - - -private static byte[] init__JSON_integer_trans_targs_0() -{ - return new byte [] { - 2, 0, 3, 5, 4 - }; -} - -private static final byte _JSON_integer_trans_targs[] = init__JSON_integer_trans_targs_0(); - - -private static byte[] init__JSON_integer_trans_actions_0() -{ - return new byte [] { - 0, 0, 0, 0, 1 - }; -} - -private static final byte _JSON_integer_trans_actions[] = init__JSON_integer_trans_actions_0(); - - -static final int JSON_integer_start = 1; -static final int JSON_integer_first_final = 3; -static final int JSON_integer_error = 0; - -static final int JSON_integer_en_main = 1; - - -// line 496 "ParserConfig.rl" - - - void parseInteger(ThreadContext context, ParserResult res, int p, int pe) { - int new_p = parseIntegerInternal(p, pe); - if (new_p == -1) { - res.update(null, p); - return; - } - RubyInteger number = createInteger(context, p, new_p); - res.update(config.onLoad(context, number), new_p + 1); - } - - int parseIntegerInternal(int p, int pe) { - int cs; - - -// line 808 "ParserConfig.java" - { - cs = JSON_integer_start; - } - -// line 512 "ParserConfig.rl" - int memo = p; - -// line 816 "ParserConfig.java" - { - int _klen; - int _trans = 0; - int _acts; - int _nacts; - int _keys; - int _goto_targ = 0; - - _goto: while (true) { - switch ( _goto_targ ) { - case 0: - if ( p == pe ) { - _goto_targ = 4; - continue _goto; - } - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } -case 1: - _match: do { - _keys = _JSON_integer_key_offsets[cs]; - _trans = _JSON_integer_index_offsets[cs]; - _klen = _JSON_integer_single_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + _klen - 1; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( data[p] < _JSON_integer_trans_keys[_mid] ) - _upper = _mid - 1; - else if ( data[p] > _JSON_integer_trans_keys[_mid] ) - _lower = _mid + 1; - else { - _trans += (_mid - _keys); - break _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = _JSON_integer_range_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + (_klen<<1) - 2; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( data[p] < _JSON_integer_trans_keys[_mid] ) - _upper = _mid - 2; - else if ( data[p] > _JSON_integer_trans_keys[_mid+1] ) - _lower = _mid + 2; - else { - _trans += ((_mid - _keys)>>1); - break _match; - } - } - _trans += _klen; - } - } while (false); - - _trans = _JSON_integer_indicies[_trans]; - cs = _JSON_integer_trans_targs[_trans]; - - if ( _JSON_integer_trans_actions[_trans] != 0 ) { - _acts = _JSON_integer_trans_actions[_trans]; - _nacts = (int) _JSON_integer_actions[_acts++]; - while ( _nacts-- > 0 ) - { - switch ( _JSON_integer_actions[_acts++] ) - { - case 0: -// line 490 "ParserConfig.rl" - { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } - break; -// line 903 "ParserConfig.java" - } - } - } - -case 2: - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } - if ( ++p != pe ) { - _goto_targ = 1; - continue _goto; - } -case 4: -case 5: - } - break; } - } - -// line 514 "ParserConfig.rl" - - if (cs < JSON_integer_first_final) { - return -1; - } - - return p; - } - - RubyInteger createInteger(ThreadContext context, int p, int new_p) { - Ruby runtime = context.runtime; - ByteList num = absSubSequence(p, new_p); - return bytesToInum(runtime, num); - } - - RubyInteger bytesToInum(Ruby runtime, ByteList num) { - return ConvertBytes.byteListToInum(runtime, num, 10, true); - } - - -// line 943 "ParserConfig.java" -private static byte[] init__JSON_float_actions_0() -{ - return new byte [] { - 0, 1, 0 - }; -} - -private static final byte _JSON_float_actions[] = init__JSON_float_actions_0(); - - -private static byte[] init__JSON_float_key_offsets_0() -{ - return new byte [] { - 0, 0, 4, 7, 10, 12, 16, 18, 23, 29, 29 - }; -} - -private static final byte _JSON_float_key_offsets[] = init__JSON_float_key_offsets_0(); - - -private static char[] init__JSON_float_trans_keys_0() -{ - return new char [] { - 45, 48, 49, 57, 48, 49, 57, 46, 69, 101, 48, 57, - 43, 45, 48, 57, 48, 57, 46, 69, 101, 48, 57, 69, - 101, 45, 46, 48, 57, 69, 101, 45, 46, 48, 57, 0 - }; -} - -private static final char _JSON_float_trans_keys[] = init__JSON_float_trans_keys_0(); - - -private static byte[] init__JSON_float_single_lengths_0() -{ - return new byte [] { - 0, 2, 1, 3, 0, 2, 0, 3, 2, 0, 2 - }; -} - -private static final byte _JSON_float_single_lengths[] = init__JSON_float_single_lengths_0(); - - -private static byte[] init__JSON_float_range_lengths_0() -{ - return new byte [] { - 0, 1, 1, 0, 1, 1, 1, 1, 2, 0, 2 - }; -} - -private static final byte _JSON_float_range_lengths[] = init__JSON_float_range_lengths_0(); - - -private static byte[] init__JSON_float_index_offsets_0() -{ - return new byte [] { - 0, 0, 4, 7, 11, 13, 17, 19, 24, 29, 30 - }; -} - -private static final byte _JSON_float_index_offsets[] = init__JSON_float_index_offsets_0(); - - -private static byte[] init__JSON_float_indicies_0() -{ - return new byte [] { - 0, 2, 3, 1, 2, 3, 1, 4, 5, 5, 1, 6, - 1, 7, 7, 8, 1, 8, 1, 4, 5, 5, 3, 1, - 5, 5, 1, 6, 9, 1, 1, 1, 1, 8, 9, 0 - }; -} - -private static final byte _JSON_float_indicies[] = init__JSON_float_indicies_0(); - - -private static byte[] init__JSON_float_trans_targs_0() -{ - return new byte [] { - 2, 0, 3, 7, 4, 5, 8, 6, 10, 9 - }; -} - -private static final byte _JSON_float_trans_targs[] = init__JSON_float_trans_targs_0(); - - -private static byte[] init__JSON_float_trans_actions_0() -{ - return new byte [] { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 - }; -} - -private static final byte _JSON_float_trans_actions[] = init__JSON_float_trans_actions_0(); - - -static final int JSON_float_start = 1; -static final int JSON_float_first_final = 8; -static final int JSON_float_error = 0; - -static final int JSON_float_en_main = 1; - - -// line 547 "ParserConfig.rl" - - - void parseFloat(ThreadContext context, ParserResult res, int p, int pe) { - int new_p = parseFloatInternal(p, pe); - if (new_p == -1) { - res.update(null, p); - return; - } - final ByteList num = absSubSequence(p, new_p); - IRubyObject number = config.decimalFactory.apply(context, num); - - res.update(config.onLoad(context, number), new_p + 1); - } - - int parseFloatInternal(int p, int pe) { - int cs; - - -// line 1064 "ParserConfig.java" - { - cs = JSON_float_start; - } - -// line 565 "ParserConfig.rl" - int memo = p; - -// line 1072 "ParserConfig.java" - { - int _klen; - int _trans = 0; - int _acts; - int _nacts; - int _keys; - int _goto_targ = 0; - - _goto: while (true) { - switch ( _goto_targ ) { - case 0: - if ( p == pe ) { - _goto_targ = 4; - continue _goto; - } - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } -case 1: - _match: do { - _keys = _JSON_float_key_offsets[cs]; - _trans = _JSON_float_index_offsets[cs]; - _klen = _JSON_float_single_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + _klen - 1; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( data[p] < _JSON_float_trans_keys[_mid] ) - _upper = _mid - 1; - else if ( data[p] > _JSON_float_trans_keys[_mid] ) - _lower = _mid + 1; - else { - _trans += (_mid - _keys); - break _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = _JSON_float_range_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + (_klen<<1) - 2; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( data[p] < _JSON_float_trans_keys[_mid] ) - _upper = _mid - 2; - else if ( data[p] > _JSON_float_trans_keys[_mid+1] ) - _lower = _mid + 2; - else { - _trans += ((_mid - _keys)>>1); - break _match; - } - } - _trans += _klen; - } - } while (false); - - _trans = _JSON_float_indicies[_trans]; - cs = _JSON_float_trans_targs[_trans]; - - if ( _JSON_float_trans_actions[_trans] != 0 ) { - _acts = _JSON_float_trans_actions[_trans]; - _nacts = (int) _JSON_float_actions[_acts++]; - while ( _nacts-- > 0 ) - { - switch ( _JSON_float_actions[_acts++] ) - { - case 0: -// line 538 "ParserConfig.rl" - { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } - break; -// line 1159 "ParserConfig.java" - } - } - } - -case 2: - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } - if ( ++p != pe ) { - _goto_targ = 1; - continue _goto; - } -case 4: -case 5: - } - break; } - } - -// line 567 "ParserConfig.rl" - - if (cs < JSON_float_first_final) { - return -1; - } - - return p; - } - - -// line 1189 "ParserConfig.java" -private static byte[] init__JSON_string_actions_0() -{ - return new byte [] { - 0, 2, 0, 1 - }; -} - -private static final byte _JSON_string_actions[] = init__JSON_string_actions_0(); - - -private static byte[] init__JSON_string_key_offsets_0() -{ - return new byte [] { - 0, 0, 1, 3, 4, 10, 16, 22, 28 - }; -} - -private static final byte _JSON_string_key_offsets[] = init__JSON_string_key_offsets_0(); - - -private static char[] init__JSON_string_trans_keys_0() -{ - return new char [] { - 34, 34, 92, 117, 48, 57, 65, 70, 97, 102, 48, 57, - 65, 70, 97, 102, 48, 57, 65, 70, 97, 102, 48, 57, - 65, 70, 97, 102, 0 - }; -} - -private static final char _JSON_string_trans_keys[] = init__JSON_string_trans_keys_0(); - - -private static byte[] init__JSON_string_single_lengths_0() -{ - return new byte [] { - 0, 1, 2, 1, 0, 0, 0, 0, 0 - }; -} - -private static final byte _JSON_string_single_lengths[] = init__JSON_string_single_lengths_0(); - - -private static byte[] init__JSON_string_range_lengths_0() -{ - return new byte [] { - 0, 0, 0, 0, 3, 3, 3, 3, 0 - }; -} - -private static final byte _JSON_string_range_lengths[] = init__JSON_string_range_lengths_0(); - - -private static byte[] init__JSON_string_index_offsets_0() -{ - return new byte [] { - 0, 0, 2, 5, 7, 11, 15, 19, 23 - }; -} - -private static final byte _JSON_string_index_offsets[] = init__JSON_string_index_offsets_0(); - - -private static byte[] init__JSON_string_indicies_0() -{ - return new byte [] { - 0, 1, 2, 3, 0, 4, 0, 5, 5, 5, 1, 6, - 6, 6, 1, 7, 7, 7, 1, 0, 0, 0, 1, 1, - 0 - }; -} - -private static final byte _JSON_string_indicies[] = init__JSON_string_indicies_0(); - - -private static byte[] init__JSON_string_trans_targs_0() -{ - return new byte [] { - 2, 0, 8, 3, 4, 5, 6, 7 - }; -} - -private static final byte _JSON_string_trans_targs[] = init__JSON_string_trans_targs_0(); - - -private static byte[] init__JSON_string_trans_actions_0() -{ - return new byte [] { - 0, 0, 1, 0, 0, 0, 0, 0 - }; -} - -private static final byte _JSON_string_trans_actions[] = init__JSON_string_trans_actions_0(); - - -static final int JSON_string_start = 1; -static final int JSON_string_first_final = 8; -static final int JSON_string_error = 0; - -static final int JSON_string_en_main = 1; - - -// line 606 "ParserConfig.rl" - - - void parseString(ThreadContext context, ParserResult res, int p, int pe) { - int cs; - IRubyObject result = null; - - -// line 1299 "ParserConfig.java" - { - cs = JSON_string_start; - } - -// line 613 "ParserConfig.rl" - int memo = p; - -// line 1307 "ParserConfig.java" - { - int _klen; - int _trans = 0; - int _acts; - int _nacts; - int _keys; - int _goto_targ = 0; - - _goto: while (true) { - switch ( _goto_targ ) { - case 0: - if ( p == pe ) { - _goto_targ = 4; - continue _goto; - } - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } -case 1: - _match: do { - _keys = _JSON_string_key_offsets[cs]; - _trans = _JSON_string_index_offsets[cs]; - _klen = _JSON_string_single_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + _klen - 1; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( data[p] < _JSON_string_trans_keys[_mid] ) - _upper = _mid - 1; - else if ( data[p] > _JSON_string_trans_keys[_mid] ) - _lower = _mid + 1; - else { - _trans += (_mid - _keys); - break _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = _JSON_string_range_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + (_klen<<1) - 2; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( data[p] < _JSON_string_trans_keys[_mid] ) - _upper = _mid - 2; - else if ( data[p] > _JSON_string_trans_keys[_mid+1] ) - _lower = _mid + 2; - else { - _trans += ((_mid - _keys)>>1); - break _match; - } - } - _trans += _klen; - } - } while (false); - - _trans = _JSON_string_indicies[_trans]; - cs = _JSON_string_trans_targs[_trans]; - - if ( _JSON_string_trans_actions[_trans] != 0 ) { - _acts = _JSON_string_trans_actions[_trans]; - _nacts = (int) _JSON_string_actions[_acts++]; - while ( _nacts-- > 0 ) - { - switch ( _JSON_string_actions[_acts++] ) - { - case 0: -// line 581 "ParserConfig.rl" - { - int offset = byteList.begin(); - ByteList decoded = decoder.decode(context, byteList, memo + 1 - offset, - p - offset); - result = context.runtime.newString(decoded); - if (result == null) { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } else { - {p = (( p + 1))-1;} - } - } - break; - case 1: -// line 594 "ParserConfig.rl" - { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } - break; -// line 1409 "ParserConfig.java" - } - } - } - -case 2: - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } - if ( ++p != pe ) { - _goto_targ = 1; - continue _goto; - } -case 4: -case 5: - } - break; } - } - -// line 615 "ParserConfig.rl" - - if (cs >= JSON_string_first_final && result != null) { - if (result instanceof RubyString) { - RubyString string = (RubyString)result; - string.setEncoding(UTF8Encoding.INSTANCE); - string.clearCodeRange(); - if (config.freeze) { - string.setFrozen(true); - string = context.runtime.freezeAndDedupString(string); - } - res.update(config.onLoad(context, string), p + 1); - } else { - res.update(config.onLoad(context, result), p + 1); - } - } else { - res.update(null, p + 1); - } - } - - -// line 1450 "ParserConfig.java" -private static byte[] init__JSON_array_actions_0() -{ - return new byte [] { - 0, 1, 0, 1, 1, 1, 2 - }; -} - -private static final byte _JSON_array_actions[] = init__JSON_array_actions_0(); - - -private static byte[] init__JSON_array_cond_offsets_0() -{ - return new byte [] { - 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 6, 6, 6, 6, 6, 8, 11, 16, 19 - }; -} - -private static final byte _JSON_array_cond_offsets[] = init__JSON_array_cond_offsets_0(); - - -private static byte[] init__JSON_array_cond_lengths_0() -{ - return new byte [] { - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 5, 0, 0, 0, 0, 2, 3, 5, 3, 0 - }; -} - -private static final byte _JSON_array_cond_lengths[] = init__JSON_array_cond_lengths_0(); - - -private static int[] init__JSON_array_cond_keys_0() -{ - return new int [] { - 44, 44, 9, 9, 10, 10, 13, 13, 32, 32, 47, 47, - 42, 42, 47, 47, 0, 41, 42, 42, 43,65535, 0, 41, - 42, 42, 43, 46, 47, 47, 48,65535, 0, 9, 10, 10, - 11,65535, 0 - }; -} - -private static final int _JSON_array_cond_keys[] = init__JSON_array_cond_keys_0(); - - -private static byte[] init__JSON_array_cond_spaces_0() -{ - return new byte [] { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - }; -} - -private static final byte _JSON_array_cond_spaces[] = init__JSON_array_cond_spaces_0(); - - -private static byte[] init__JSON_array_key_offsets_0() -{ - return new byte [] { - 0, 0, 1, 18, 26, 28, 29, 31, 32, 48, 50, 51, - 53, 54, 76, 78, 79, 81, 82, 86, 92, 100, 106 - }; -} - -private static final byte _JSON_array_key_offsets[] = init__JSON_array_key_offsets_0(); - - -private static int[] init__JSON_array_trans_keys_0() -{ - return new int [] { - 91, 13, 32, 34, 45, 47, 73, 78, 91, 93, 102, 110, - 116, 123, 9, 10, 48, 57, 13, 32, 47, 93,65580,131116, - 9, 10, 42, 47, 42, 42, 47, 10, 13, 32, 34, 45, - 47, 73, 78, 91, 102, 110, 116, 123, 9, 10, 48, 57, - 42, 47, 42, 42, 47, 10, 34, 45, 73, 78, 91, 93, - 102, 110, 116, 123,65549,65568,65583,131085,131104,131119, 48, 57, - 65545,65546,131081,131082, 42, 47, 42, 42, 47, 10,65578,65583, - 131114,131119,65578,131114,65536,131071,131072,196607,65578,65583,131114,131119, - 65536,131071,131072,196607,65546,131082,65536,131071,131072,196607, 0 - }; -} - -private static final int _JSON_array_trans_keys[] = init__JSON_array_trans_keys_0(); - - -private static byte[] init__JSON_array_single_lengths_0() -{ - return new byte [] { - 0, 1, 13, 6, 2, 1, 2, 1, 12, 2, 1, 2, - 1, 16, 2, 1, 2, 1, 4, 2, 4, 2, 0 - }; -} - -private static final byte _JSON_array_single_lengths[] = init__JSON_array_single_lengths_0(); - - -private static byte[] init__JSON_array_range_lengths_0() -{ - return new byte [] { - 0, 0, 2, 1, 0, 0, 0, 0, 2, 0, 0, 0, - 0, 3, 0, 0, 0, 0, 0, 2, 2, 2, 0 - }; -} - -private static final byte _JSON_array_range_lengths[] = init__JSON_array_range_lengths_0(); - - -private static short[] init__JSON_array_index_offsets_0() -{ - return new short [] { - 0, 0, 2, 18, 26, 29, 31, 34, 36, 51, 54, 56, - 59, 61, 81, 84, 86, 89, 91, 96, 101, 108, 113 - }; -} - -private static final short _JSON_array_index_offsets[] = init__JSON_array_index_offsets_0(); - - -private static byte[] init__JSON_array_indicies_0() -{ - return new byte [] { - 0, 1, 0, 0, 2, 2, 3, 2, 2, 2, 4, 2, - 2, 2, 2, 0, 2, 1, 5, 5, 6, 4, 7, 8, - 5, 1, 9, 10, 1, 11, 9, 11, 5, 9, 12, 10, - 7, 7, 2, 2, 13, 2, 2, 2, 2, 2, 2, 2, - 7, 2, 1, 14, 15, 1, 16, 14, 16, 7, 14, 17, - 15, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 0, - 0, 3, 8, 8, 18, 2, 0, 8, 1, 19, 20, 1, - 21, 19, 21, 0, 19, 22, 20, 19, 20, 23, 24, 1, - 21, 25, 19, 23, 1, 21, 0, 25, 8, 19, 23, 1, - 22, 26, 20, 24, 1, 1, 0 - }; -} - -private static final byte _JSON_array_indicies[] = init__JSON_array_indicies_0(); - - -private static byte[] init__JSON_array_trans_targs_0() -{ - return new byte [] { - 2, 0, 3, 14, 22, 3, 4, 8, 13, 5, 7, 6, - 3, 9, 10, 12, 11, 8, 18, 15, 17, 16, 2, 19, - 21, 20, 13 - }; -} - -private static final byte _JSON_array_trans_targs[] = init__JSON_array_trans_targs_0(); - - -private static byte[] init__JSON_array_trans_actions_0() -{ - return new byte [] { - 0, 0, 3, 0, 5, 0, 0, 0, 0, 0, 0, 1, - 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, - 0, 1, 1 - }; -} - -private static final byte _JSON_array_trans_actions[] = init__JSON_array_trans_actions_0(); - - -static final int JSON_array_start = 1; -static final int JSON_array_first_final = 22; -static final int JSON_array_error = 0; - -static final int JSON_array_en_main = 1; - - -// line 669 "ParserConfig.rl" - - - void parseArray(ThreadContext context, ParserResult res, int p, int pe) { - int cs; - - if (config.maxNesting > 0 && currentNesting > config.maxNesting) { - throw newException(context, Utils.M_NESTING_ERROR, - "nesting of " + currentNesting + " is too deep"); - } - - IRubyObject result = RubyArray.newArray(context.runtime); - - -// line 1633 "ParserConfig.java" - { - cs = JSON_array_start; - } - -// line 682 "ParserConfig.rl" - -// line 1640 "ParserConfig.java" - { - int _klen; - int _trans = 0; - int _widec; - int _acts; - int _nacts; - int _keys; - int _goto_targ = 0; - - _goto: while (true) { - switch ( _goto_targ ) { - case 0: - if ( p == pe ) { - _goto_targ = 4; - continue _goto; - } - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } -case 1: - _widec = data[p]; - _keys = _JSON_array_cond_offsets[cs]*2 -; _klen = _JSON_array_cond_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys -; int _mid; - int _upper = _keys + (_klen<<1) - 2; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( _widec < _JSON_array_cond_keys[_mid] ) - _upper = _mid - 2; - else if ( _widec > _JSON_array_cond_keys[_mid+1] ) - _lower = _mid + 2; - else { - switch ( _JSON_array_cond_spaces[_JSON_array_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { - case 0: { - _widec = 65536 + (data[p] - 0); - if ( -// line 640 "ParserConfig.rl" - config.allowTrailingComma ) _widec += 65536; - break; - } - } - break; - } - } - } - - _match: do { - _keys = _JSON_array_key_offsets[cs]; - _trans = _JSON_array_index_offsets[cs]; - _klen = _JSON_array_single_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + _klen - 1; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( _widec < _JSON_array_trans_keys[_mid] ) - _upper = _mid - 1; - else if ( _widec > _JSON_array_trans_keys[_mid] ) - _lower = _mid + 1; - else { - _trans += (_mid - _keys); - break _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = _JSON_array_range_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + (_klen<<1) - 2; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( _widec < _JSON_array_trans_keys[_mid] ) - _upper = _mid - 2; - else if ( _widec > _JSON_array_trans_keys[_mid+1] ) - _lower = _mid + 2; - else { - _trans += ((_mid - _keys)>>1); - break _match; - } - } - _trans += _klen; - } - } while (false); - - _trans = _JSON_array_indicies[_trans]; - cs = _JSON_array_trans_targs[_trans]; - - if ( _JSON_array_trans_actions[_trans] != 0 ) { - _acts = _JSON_array_trans_actions[_trans]; - _nacts = (int) _JSON_array_actions[_acts++]; - while ( _nacts-- > 0 ) - { - switch ( _JSON_array_actions[_acts++] ) - { - case 0: -// line 321 "ParserConfig.rl" - { - if (!config.allowComments) { - if (config.deprecateComments) { - if (config.deprecateDuplicateKey && emittedDeprecations < 5) { - emittedDeprecations++; - context.runtime.getWarnings().warning( - "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`" - ); - } - } else { - throw unexpectedToken(context, p, pe); - } - } - } - break; - case 1: -// line 642 "ParserConfig.rl" - { - parseValue(context, res, p, pe); - if (res.result == null) { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } else { - ((RubyArray)result).append(res.result); - {p = (( res.p))-1;} - } - } - break; - case 2: -// line 653 "ParserConfig.rl" - { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } - break; -// line 1789 "ParserConfig.java" - } - } - } - -case 2: - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } - if ( ++p != pe ) { - _goto_targ = 1; - continue _goto; - } -case 4: -case 5: - } - break; } - } - -// line 683 "ParserConfig.rl" - - if (cs >= JSON_array_first_final) { - res.update(config.onLoad(context, result), p + 1); - } else { - throw unexpectedToken(context, p, pe); - } - } - - -// line 1819 "ParserConfig.java" -private static byte[] init__JSON_object_actions_0() -{ - return new byte [] { - 0, 1, 0, 1, 1, 1, 2, 1, 3 - }; -} - -private static final byte _JSON_object_actions[] = init__JSON_object_actions_0(); - - -private static byte[] init__JSON_object_cond_offsets_0() -{ - return new byte [] { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 11, 16, - 19, 19, 19, 19, 19, 19, 19, 19, 19 - }; -} - -private static final byte _JSON_object_cond_offsets[] = init__JSON_object_cond_offsets_0(); - - -private static byte[] init__JSON_object_cond_lengths_0() -{ - return new byte [] { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 3, - 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; -} - -private static final byte _JSON_object_cond_lengths[] = init__JSON_object_cond_lengths_0(); - - -private static int[] init__JSON_object_cond_keys_0() -{ - return new int [] { - 9, 9, 10, 10, 13, 13, 32, 32, 44, 44, 47, 47, - 42, 42, 47, 47, 0, 41, 42, 42, 43,65535, 0, 41, - 42, 42, 43, 46, 47, 47, 48,65535, 0, 9, 10, 10, - 11,65535, 0 - }; -} - -private static final int _JSON_object_cond_keys[] = init__JSON_object_cond_keys_0(); - - -private static byte[] init__JSON_object_cond_spaces_0() -{ - return new byte [] { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - }; -} - -private static final byte _JSON_object_cond_spaces[] = init__JSON_object_cond_spaces_0(); - - -private static byte[] init__JSON_object_key_offsets_0() -{ - return new byte [] { - 0, 0, 1, 8, 14, 16, 17, 19, 20, 36, 49, 56, - 62, 64, 65, 67, 68, 70, 71, 73, 74, 78, 84, 92, - 98, 100, 101, 103, 104, 106, 107, 109, 110 - }; -} - -private static final byte _JSON_object_key_offsets[] = init__JSON_object_key_offsets_0(); - - -private static int[] init__JSON_object_trans_keys_0() -{ - return new int [] { - 123, 13, 32, 34, 47, 125, 9, 10, 13, 32, 47, 58, - 9, 10, 42, 47, 42, 42, 47, 10, 13, 32, 34, 45, - 47, 73, 78, 91, 102, 110, 116, 123, 9, 10, 48, 57, - 125,65549,65568,65580,65583,131085,131104,131116,131119,65545,65546,131081, - 131082, 13, 32, 44, 47, 125, 9, 10, 13, 32, 34, 47, - 9, 10, 42, 47, 42, 42, 47, 10, 42, 47, 42, 42, - 47, 10,65578,65583,131114,131119,65578,131114,65536,131071,131072,196607, - 65578,65583,131114,131119,65536,131071,131072,196607,65546,131082,65536,131071, - 131072,196607, 42, 47, 42, 42, 47, 10, 42, 47, 42, 42, - 47, 10, 0 - }; -} - -private static final int _JSON_object_trans_keys[] = init__JSON_object_trans_keys_0(); - - -private static byte[] init__JSON_object_single_lengths_0() -{ - return new byte [] { - 0, 1, 5, 4, 2, 1, 2, 1, 12, 9, 5, 4, - 2, 1, 2, 1, 2, 1, 2, 1, 4, 2, 4, 2, - 2, 1, 2, 1, 2, 1, 2, 1, 0 - }; -} - -private static final byte _JSON_object_single_lengths[] = init__JSON_object_single_lengths_0(); - - -private static byte[] init__JSON_object_range_lengths_0() -{ - return new byte [] { - 0, 0, 1, 1, 0, 0, 0, 0, 2, 2, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; -} - -private static final byte _JSON_object_range_lengths[] = init__JSON_object_range_lengths_0(); - - -private static short[] init__JSON_object_index_offsets_0() -{ - return new short [] { - 0, 0, 2, 9, 15, 18, 20, 23, 25, 40, 52, 59, - 65, 68, 70, 73, 75, 78, 80, 83, 85, 90, 95, 102, - 107, 110, 112, 115, 117, 120, 122, 125, 127 - }; -} - -private static final short _JSON_object_index_offsets[] = init__JSON_object_index_offsets_0(); - - -private static byte[] init__JSON_object_indicies_0() -{ - return new byte [] { - 0, 1, 0, 0, 2, 3, 4, 0, 1, 5, 5, 6, - 7, 5, 1, 8, 9, 1, 10, 8, 10, 5, 8, 11, - 9, 7, 7, 12, 12, 13, 12, 12, 12, 12, 12, 12, - 12, 7, 12, 1, 4, 14, 14, 15, 16, 17, 17, 0, - 18, 14, 17, 1, 14, 14, 15, 16, 4, 14, 1, 15, - 15, 2, 19, 15, 1, 20, 21, 1, 22, 20, 22, 15, - 20, 23, 21, 24, 25, 1, 26, 24, 26, 14, 24, 27, - 25, 24, 25, 28, 29, 1, 26, 30, 24, 28, 1, 26, - 14, 30, 17, 24, 28, 1, 27, 31, 25, 29, 1, 32, - 33, 1, 34, 32, 34, 7, 32, 35, 33, 36, 37, 1, - 38, 36, 38, 0, 36, 39, 37, 1, 0 - }; -} - -private static final byte _JSON_object_indicies[] = init__JSON_object_indicies_0(); - - -private static byte[] init__JSON_object_trans_targs_0() -{ - return new byte [] { - 2, 0, 3, 28, 32, 3, 4, 8, 5, 7, 6, 3, - 9, 24, 10, 11, 16, 9, 20, 12, 13, 15, 14, 11, - 17, 19, 18, 10, 21, 23, 22, 9, 25, 27, 26, 8, - 29, 31, 30, 2 - }; -} - -private static final byte _JSON_object_trans_targs[] = init__JSON_object_trans_targs_0(); - - -private static byte[] init__JSON_object_trans_actions_0() -{ - return new byte [] { - 0, 0, 5, 0, 7, 0, 0, 0, 0, 0, 1, 1, - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, - 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, - 0, 0, 1, 1 - }; -} - -private static final byte _JSON_object_trans_actions[] = init__JSON_object_trans_actions_0(); - - -static final int JSON_object_start = 1; -static final int JSON_object_first_final = 32; -static final int JSON_object_error = 0; - -static final int JSON_object_en_main = 1; - - -// line 754 "ParserConfig.rl" - - - void parseObject(ThreadContext context, ParserResult res, int p, int pe) { - int cs; - IRubyObject lastName = null; - - if (config.maxNesting > 0 && currentNesting > config.maxNesting) { - throw newException(context, Utils.M_NESTING_ERROR, - "nesting of " + currentNesting + " is too deep"); - } - - // this is guaranteed to be a RubyHash due to the earlier - // allocator test at OptionsReader#getClass - IRubyObject result = RubyHash.newHash(context.runtime); - - -// line 2015 "ParserConfig.java" - { - cs = JSON_object_start; - } - -// line 770 "ParserConfig.rl" - -// line 2022 "ParserConfig.java" - { - int _klen; - int _trans = 0; - int _widec; - int _acts; - int _nacts; - int _keys; - int _goto_targ = 0; - - _goto: while (true) { - switch ( _goto_targ ) { - case 0: - if ( p == pe ) { - _goto_targ = 4; - continue _goto; - } - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } -case 1: - _widec = data[p]; - _keys = _JSON_object_cond_offsets[cs]*2 -; _klen = _JSON_object_cond_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys -; int _mid; - int _upper = _keys + (_klen<<1) - 2; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( _widec < _JSON_object_cond_keys[_mid] ) - _upper = _mid - 2; - else if ( _widec > _JSON_object_cond_keys[_mid+1] ) - _lower = _mid + 2; - else { - switch ( _JSON_object_cond_spaces[_JSON_object_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { - case 0: { - _widec = 65536 + (data[p] - 0); - if ( -// line 697 "ParserConfig.rl" - config.allowTrailingComma ) _widec += 65536; - break; - } - } - break; - } - } - } - - _match: do { - _keys = _JSON_object_key_offsets[cs]; - _trans = _JSON_object_index_offsets[cs]; - _klen = _JSON_object_single_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + _klen - 1; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( _widec < _JSON_object_trans_keys[_mid] ) - _upper = _mid - 1; - else if ( _widec > _JSON_object_trans_keys[_mid] ) - _lower = _mid + 1; - else { - _trans += (_mid - _keys); - break _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = _JSON_object_range_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + (_klen<<1) - 2; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( _widec < _JSON_object_trans_keys[_mid] ) - _upper = _mid - 2; - else if ( _widec > _JSON_object_trans_keys[_mid+1] ) - _lower = _mid + 2; - else { - _trans += ((_mid - _keys)>>1); - break _match; - } - } - _trans += _klen; - } - } while (false); - - _trans = _JSON_object_indicies[_trans]; - cs = _JSON_object_trans_targs[_trans]; - - if ( _JSON_object_trans_actions[_trans] != 0 ) { - _acts = _JSON_object_trans_actions[_trans]; - _nacts = (int) _JSON_object_actions[_acts++]; - while ( _nacts-- > 0 ) - { - switch ( _JSON_object_actions[_acts++] ) - { - case 0: -// line 321 "ParserConfig.rl" - { - if (!config.allowComments) { - if (config.deprecateComments) { - if (config.deprecateDuplicateKey && emittedDeprecations < 5) { - emittedDeprecations++; - context.runtime.getWarnings().warning( - "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`" - ); - } - } else { - throw unexpectedToken(context, p, pe); - } - } - } - break; - case 1: -// line 699 "ParserConfig.rl" - { - parseValue(context, res, p, pe); - if (res.result == null) { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } else { - ((RubyHash)result).op_aset(context, lastName, res.result); - {p = (( res.p))-1;} - } - } - break; - case 2: -// line 710 "ParserConfig.rl" - { - parseString(context, res, p, pe); - if (res.result == null) { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } else { - RubyString name = (RubyString)res.result; - if (config.symbolizeNames) { - lastName = name.intern(); - } else { - lastName = name; - } - - if (!config.allowDuplicateKey) { - if (((RubyHash)result).hasKey(lastName)) { - if (config.deprecateDuplicateKey && emittedDeprecations < 5) { - emittedDeprecations++; - context.runtime.getWarnings().warning( - "detected duplicate key " + name.inspect() + " in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`" - ); - } else { - throw parsingError(context, "duplicate key" + name.inspect(), p, pe); - } - } - } - - {p = (( res.p))-1;} - } - } - break; - case 3: -// line 740 "ParserConfig.rl" - { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } - break; -// line 2203 "ParserConfig.java" - } - } - } - -case 2: - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } - if ( ++p != pe ) { - _goto_targ = 1; - continue _goto; - } -case 4: -case 5: - } - break; } - } - -// line 771 "ParserConfig.rl" - - if (cs < JSON_object_first_final) { - res.update(null, p + 1); - return; - } - - res.update(config.onLoad(context, result), p + 1); - } - - -// line 2234 "ParserConfig.java" -private static byte[] init__JSON_actions_0() -{ - return new byte [] { - 0, 1, 0, 1, 1 - }; -} - -private static final byte _JSON_actions[] = init__JSON_actions_0(); - - -private static byte[] init__JSON_key_offsets_0() -{ - return new byte [] { - 0, 0, 16, 18, 19, 21, 22, 24, 25, 27, 28 - }; -} - -private static final byte _JSON_key_offsets[] = init__JSON_key_offsets_0(); - - -private static char[] init__JSON_trans_keys_0() -{ - return new char [] { - 13, 32, 34, 45, 47, 73, 78, 91, 102, 110, 116, 123, - 9, 10, 48, 57, 42, 47, 42, 42, 47, 10, 42, 47, - 42, 42, 47, 10, 13, 32, 47, 9, 10, 0 - }; -} - -private static final char _JSON_trans_keys[] = init__JSON_trans_keys_0(); - - -private static byte[] init__JSON_single_lengths_0() -{ - return new byte [] { - 0, 12, 2, 1, 2, 1, 2, 1, 2, 1, 3 - }; -} - -private static final byte _JSON_single_lengths[] = init__JSON_single_lengths_0(); - - -private static byte[] init__JSON_range_lengths_0() -{ - return new byte [] { - 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1 - }; -} - -private static final byte _JSON_range_lengths[] = init__JSON_range_lengths_0(); - - -private static byte[] init__JSON_index_offsets_0() -{ - return new byte [] { - 0, 0, 15, 18, 20, 23, 25, 28, 30, 33, 35 - }; -} - -private static final byte _JSON_index_offsets[] = init__JSON_index_offsets_0(); - - -private static byte[] init__JSON_indicies_0() -{ - return new byte [] { - 0, 0, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, - 0, 2, 1, 4, 5, 1, 6, 4, 6, 7, 4, 8, - 5, 9, 10, 1, 11, 9, 11, 0, 9, 12, 10, 7, - 7, 13, 7, 1, 0 - }; -} - -private static final byte _JSON_indicies[] = init__JSON_indicies_0(); - - -private static byte[] init__JSON_trans_targs_0() -{ - return new byte [] { - 1, 0, 10, 6, 3, 5, 4, 10, 10, 7, 9, 8, - 1, 2 - }; -} - -private static final byte _JSON_trans_targs[] = init__JSON_trans_targs_0(); - - -private static byte[] init__JSON_trans_actions_0() -{ - return new byte [] { - 0, 0, 3, 0, 0, 0, 1, 0, 1, 0, 0, 1, - 1, 0 - }; -} - -private static final byte _JSON_trans_actions[] = init__JSON_trans_actions_0(); - - -static final int JSON_start = 1; -static final int JSON_first_final = 10; -static final int JSON_error = 0; - -static final int JSON_en_main = 1; - - -// line 800 "ParserConfig.rl" - - - public IRubyObject parseImplementation(ThreadContext context) { - int cs; - int p, pe; - IRubyObject result = null; - ParserResult res = new ParserResult(); - - -// line 2349 "ParserConfig.java" - { - cs = JSON_start; - } - -// line 809 "ParserConfig.rl" - p = byteList.begin(); - pe = p + byteList.length(); - -// line 2358 "ParserConfig.java" - { - int _klen; - int _trans = 0; - int _acts; - int _nacts; - int _keys; - int _goto_targ = 0; - - _goto: while (true) { - switch ( _goto_targ ) { - case 0: - if ( p == pe ) { - _goto_targ = 4; - continue _goto; - } - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } -case 1: - _match: do { - _keys = _JSON_key_offsets[cs]; - _trans = _JSON_index_offsets[cs]; - _klen = _JSON_single_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + _klen - 1; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( data[p] < _JSON_trans_keys[_mid] ) - _upper = _mid - 1; - else if ( data[p] > _JSON_trans_keys[_mid] ) - _lower = _mid + 1; - else { - _trans += (_mid - _keys); - break _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = _JSON_range_lengths[cs]; - if ( _klen > 0 ) { - int _lower = _keys; - int _mid; - int _upper = _keys + (_klen<<1) - 2; - while (true) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( data[p] < _JSON_trans_keys[_mid] ) - _upper = _mid - 2; - else if ( data[p] > _JSON_trans_keys[_mid+1] ) - _lower = _mid + 2; - else { - _trans += ((_mid - _keys)>>1); - break _match; - } - } - _trans += _klen; - } - } while (false); - - _trans = _JSON_indicies[_trans]; - cs = _JSON_trans_targs[_trans]; - - if ( _JSON_trans_actions[_trans] != 0 ) { - _acts = _JSON_trans_actions[_trans]; - _nacts = (int) _JSON_actions[_acts++]; - while ( _nacts-- > 0 ) - { - switch ( _JSON_actions[_acts++] ) - { - case 0: -// line 321 "ParserConfig.rl" - { - if (!config.allowComments) { - if (config.deprecateComments) { - if (config.deprecateDuplicateKey && emittedDeprecations < 5) { - emittedDeprecations++; - context.runtime.getWarnings().warning( - "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`" - ); - } - } else { - throw unexpectedToken(context, p, pe); - } - } - } - break; - case 1: -// line 786 "ParserConfig.rl" - { - parseValue(context, res, p, pe); - if (res.result == null) { - p--; - { p += 1; _goto_targ = 5; if (true) continue _goto;} - } else { - result = res.result; - {p = (( res.p))-1;} - } - } - break; -// line 2468 "ParserConfig.java" - } - } - } - -case 2: - if ( cs == 0 ) { - _goto_targ = 5; - continue _goto; - } - if ( ++p != pe ) { - _goto_targ = 1; - continue _goto; - } -case 4: -case 5: - } - break; } - } - -// line 812 "ParserConfig.rl" - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - throw unexpectedToken(context, p, pe); - } - } - - public IRubyObject parse(ThreadContext context) { - return parseImplementation(context); - } - - /** - * Updates the "view" bytelist with the new offsets and returns it. - * @param absStart - * @param absEnd - */ - private ByteList absSubSequence(int absStart, int absEnd) { - view.setBegin(absStart); - view.setRealSize(absEnd - absStart); - return view; - } - - /** - * Retrieves a constant directly descended from the JSON module. - * @param name The constant name - */ - private IRubyObject getConstant(String name) { - return config.info.jsonModule.get().getConstant(name); - } - - private RaiseException newException(ThreadContext context, String className, String message) { - return Utils.newException(context, className, message); - } - - private RaiseException newException(ThreadContext context, String className, RubyString message) { - return Utils.newException(context, className, message); - } - - RubyHash.VisitorWithState MATCH_VISITOR = new RubyHash.VisitorWithState() { - @Override - public void visit(ThreadContext context, RubyHash self, IRubyObject pattern, IRubyObject klass, int index, IRubyObject[] state) { - if (pattern.callMethod(context, "===", state[0]).isTrue()) { - state[1] = klass; - throw JumpException.SPECIAL_JUMP; - } - } - }; - } -} diff --git a/java/src/json/ext/ParserConfig.rl b/java/src/json/ext/ParserConfig.rl deleted file mode 100644 index e98c71f8c..000000000 --- a/java/src/json/ext/ParserConfig.rl +++ /dev/null @@ -1,861 +0,0 @@ -/* - * This code is copyrighted work by Daniel Luz . - * - * Distributed under the Ruby license: https://www.ruby-lang.org/en/about/license.txt - */ -package json.ext; - -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; -import org.jruby.Ruby; -import org.jruby.RubyArray; -import org.jruby.RubyClass; -import org.jruby.RubyFloat; -import org.jruby.RubyHash; -import org.jruby.RubyInteger; -import org.jruby.RubyObject; -import org.jruby.RubyProc; -import org.jruby.RubyString; -import org.jruby.anno.JRubyMethod; -import org.jruby.exceptions.JumpException; -import org.jruby.exceptions.RaiseException; -import org.jruby.runtime.Block; -import org.jruby.runtime.Helpers; -import org.jruby.runtime.ObjectAllocator; -import org.jruby.runtime.ThreadContext; -import org.jruby.runtime.Visibility; -import org.jruby.runtime.builtin.IRubyObject; -import org.jruby.util.ByteList; -import org.jruby.util.ConvertBytes; - -import java.util.function.BiFunction; - -import static org.jruby.util.ConvertDouble.DoubleConverter; - -/** - * The JSON::Ext::Parser class. - * - *

This is the JSON parser implemented as a Java class. To use it as the - * standard parser, set - *

JSON.parser = JSON::Ext::Parser
- * This is performed for you when you include "json/ext". - * - *

This class does not perform the actual parsing, just acts as an interface - * to Ruby code. When the {@link #parse(ThreadContext)} method is invoked, a - * ParserConfig.ParserSession object is instantiated, which handles the process. - * - * @author mernen - */ -public class ParserConfig extends RubyObject { - private final RuntimeInfo info; - private int maxNesting; - private boolean allowNaN; - private boolean allowTrailingComma; - private boolean allowComments; - private boolean deprecateComments; - private boolean allowControlCharacters; - private boolean allowInvalidEscape; - private boolean allowDuplicateKey; - private boolean deprecateDuplicateKey; - private boolean symbolizeNames; - private boolean freeze; - private RubyProc onLoadProc; - private RubyClass decimalClass; - BiFunction decimalFactory; - private RubyHash match_string; - - private static final int DEFAULT_MAX_NESTING = 100; - - private static final ByteList JSON_MINUS_INFINITY = new ByteList(ByteList.plain("-Infinity")); - // constant names in the JSON module containing those values - private static final String CONST_NAN = "NaN"; - private static final String CONST_INFINITY = "Infinity"; - private static final String CONST_MINUS_INFINITY = "MinusInfinity"; - - static final ObjectAllocator ALLOCATOR = ParserConfig::new; - - /** - * Multiple-value return for internal parser methods. - * - *

All the parseStuff methods return instances of - * ParserResult when successful, or null when - * there's a problem with the input data. - */ - static final class ParserResult { - /** - * The result of the successful parsing. Should never be - * null. - */ - IRubyObject result; - /** - * The point where the parser returned. - */ - int p; - - void update(IRubyObject result, int p) { - this.result = result; - this.p = p; - } - } - - public ParserConfig(Ruby runtime, RubyClass metaClass) { - super(runtime, metaClass); - info = RuntimeInfo.forRuntime(runtime); - } - - /** - * ParserConfig.new(source, opts = {}) - * - *

Creates a new JSON::Ext::Parser instance for the string - * source. - * It will be configured by the opts Hash. - * opts can have the following keys: - *

- *

- *
:max_nesting - *
The maximum depth of nesting allowed in the parsed data - * structures. Disable depth checking with :max_nesting => false|nil|0, - * it defaults to 100. - *

- *

:allow_nan - *
If set to true, allow NaN, - * Infinity and -Infinity in defiance of RFC 4627 - * to be parsed by the Parser. This option defaults to false. - *

- *

:allow_trailing_comma - *
If set to true, allow arrays and objects with a trailing - * comma in defiance of RFC 4627 to be parsed by the Parser. - * This option defaults to false. - *

- *

:symbolize_names - *
If set to true, returns symbols for the names (keys) in - * a JSON object. Otherwise strings are returned, which is also the default. - *

- *

:create_additions - *
If set to false, the Parser doesn't create additions - * even if a matching class and create_id was found. This option - * defaults to true. - *

- *

:object_class - *
Defaults to Hash. If another type is provided, it will be used - * instead of Hash to represent JSON objects. The type must respond to - * new without arguments, and return an object that respond to []=. - *

- *

:array_class - *
Defaults to Array. If another type is provided, it will be used - * instead of Hash to represent JSON arrays. The type must respond to - * new without arguments, and return an object that respond to <<. - *

- *

:decimal_class - *
Specifies which class to use instead of the default (Float) when - * parsing decimal numbers. This class must accept a single string argument - * in its constructor. - *
- */ - - @JRubyMethod(name = "new", meta = true) - public static IRubyObject newInstance(IRubyObject clazz, IRubyObject arg0, Block block) { - ParserConfig config = (ParserConfig)((RubyClass)clazz).allocate(); - - config.callInit(arg0, block); - - return config; - } - - @JRubyMethod(name = "new", meta = true) - public static IRubyObject newInstance(IRubyObject clazz, IRubyObject arg0, IRubyObject arg1, Block block) { - ParserConfig config = (ParserConfig)((RubyClass)clazz).allocate(); - - config.callInit(arg0, arg1, block); - - return config; - } - - @JRubyMethod(visibility = Visibility.PRIVATE) - public IRubyObject initialize(ThreadContext context, IRubyObject options) { - checkFrozen(); - Ruby runtime = context.runtime; - - OptionsReader opts = new OptionsReader(context, options); - this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); - this.allowNaN = opts.getBool("allow_nan", false); - if (opts.hasKey("allow_comments")) { - this.allowComments = opts.getBool("allow_comments", false); - this.deprecateComments = false; - } else { - this.allowComments = true; - this.deprecateComments = true; - } - - this.allowControlCharacters = opts.getBool("allow_control_characters", false); - this.allowInvalidEscape = opts.getBool("allow_invalid_escape", false); - this.allowTrailingComma = opts.getBool("allow_trailing_comma", false); - this.symbolizeNames = opts.getBool("symbolize_names", false); - if (opts.hasKey("allow_duplicate_key")) { - this.allowDuplicateKey = opts.getBool("allow_duplicate_key", false); - this.deprecateDuplicateKey = false; - } else { - this.allowDuplicateKey = false; - this.deprecateDuplicateKey = true; - } - - this.freeze = opts.getBool("freeze", false); - this.onLoadProc = opts.getProc("on_load"); - - this.decimalClass = opts.getClass("decimal_class", null); - - if (decimalClass == null) { - this.decimalFactory = this::createFloat; - } else if (decimalClass == runtime.getClass("BigDecimal")) { - this.decimalFactory = this::createBigDecimal; - } else { - this.decimalFactory = this::createCustomDecimal; - } - - return this; - } - - public IRubyObject onLoad(ThreadContext context, IRubyObject object) { - if (onLoadProc == null) { - return object; - } else { - return onLoadProc.call(context, object); - } - } - - /** - * Checks the given string's encoding. If a non-UTF-8 encoding is detected, - * a converted copy is returned. - * Returns the source string if no conversion is needed. - */ - private RubyString convertEncoding(ThreadContext context, RubyString source) { - Encoding encoding = source.getEncoding(); - if (encoding == ASCIIEncoding.INSTANCE) { - source = (RubyString) source.dup(); - source.setEncoding(UTF8Encoding.INSTANCE); - source.clearCodeRange(); - } else if (encoding != UTF8Encoding.INSTANCE) { - source = (RubyString) source.encode(context, context.runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE)); - } - return source; - } - - /** - * Parser#parse() - * - *

Parses the current JSON text source and returns the - * complete data structure as a result. - */ - @JRubyMethod - public IRubyObject parse(ThreadContext context, IRubyObject source) { - return new ParserSession(this, convertEncoding(context, source.convertToString()), context, info).parse(context); - } - - /** - * Queries JSON.create_id. Returns null if it is - * set to nil or false, and a String if not. - */ - private RubyString getCreateId(ThreadContext context) { - IRubyObject v = info.jsonModule.get().callMethod(context, "create_id"); - return v.isTrue() ? v.convertToString() : null; - } - - private RubyFloat createFloat(final ThreadContext context, final ByteList num) { - return RubyFloat.newFloat(context.runtime, new DoubleConverter().parse(num, true, true)); - } - - private IRubyObject createBigDecimal(final ThreadContext context, final ByteList num) { - final Ruby runtime = context.runtime; - return runtime.getKernel().callMethod(context, "BigDecimal", runtime.newString(num)); - } - - private IRubyObject createCustomDecimal(final ThreadContext context, final ByteList num) { - return decimalClass.newInstance(context, context.runtime.newString(num), Block.NULL_BLOCK); - } - - /** - * A string parsing session. - * - *

Once a ParserSession is instantiated, the source string should not - * change until the parsing is complete. The ParserSession object assumes - * the source {@link RubyString} is still associated to its original - * {@link ByteList}, which in turn must still be bound to the same - * byte[] value (and on the same offset). - */ - // Ragel uses lots of fall-through - @SuppressWarnings("fallthrough") - private static class ParserSession { - private final ParserConfig config; - private final RuntimeInfo info; - private final ByteList byteList; - private final ByteList view; - private final byte[] data; - private final StringDecoder decoder; - private int currentNesting = 0; - private int emittedDeprecations = 0; - - private ParserSession(ParserConfig config, RubyString source, ThreadContext context, RuntimeInfo info) { - this.config = config; - this.info = info; - this.byteList = source.getByteList(); - this.data = byteList.unsafeBytes(); - this.view = new ByteList(data, false); - this.decoder = new StringDecoder(config.allowControlCharacters, config.allowInvalidEscape); - } - - private RaiseException parsingError(ThreadContext context, String message, int absStart, int absEnd) { - RubyString msg = context.runtime.newString("unexpected token at '") - .cat(data, absStart, Math.min(absEnd - absStart, 32)) - .cat((byte)'\''); - return newException(context, Utils.M_PARSER_ERROR, msg); - } - - private RaiseException unexpectedToken(ThreadContext context, int absStart, int absEnd) { - return parsingError(context, "unexpected token at '", absStart, absEnd); - } - - %%{ - machine JSON_common; - - action parse_comment { - if (!config.allowComments) { - if (config.deprecateComments) { - if (config.deprecateDuplicateKey && emittedDeprecations < 5) { - emittedDeprecations++; - context.runtime.getWarnings().warning( - "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`" - ); - } - } else { - throw unexpectedToken(context, p, pe); - } - } - } - - cr = '\n'; - cr_neg = [^\n]; - ws = [ \t\r\n]; - c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/' >parse_comment; - cpp_comment = '//' cr_neg* cr >parse_comment; - comment = c_comment | cpp_comment; - ignore = ws | comment; - name_separator = ':'; - value_separator = ','; - Vnull = 'null'; - Vfalse = 'false'; - Vtrue = 'true'; - VNaN = 'NaN'; - VInfinity = 'Infinity'; - VMinusInfinity = '-Infinity'; - begin_value = [nft"\-[{NI] | digit; - begin_object = '{'; - end_object = '}'; - begin_array = '['; - end_array = ']'; - begin_string = '"'; - begin_name = begin_string; - begin_number = digit | '-'; - - }%% - - %%{ - machine JSON_value; - include JSON_common; - - write data; - - action parse_null { - result = context.nil; - } - action parse_false { - result = context.fals; - } - action parse_true { - result = context.tru; - } - action parse_nan { - if (config.allowNaN) { - result = getConstant(CONST_NAN); - } else { - throw unexpectedToken(context, p - 2, pe); - } - } - action parse_infinity { - if (config.allowNaN) { - result = getConstant(CONST_INFINITY); - } else { - throw unexpectedToken(context, p - 7, pe); - } - } - action parse_number { - if (pe > fpc + 8 && - absSubSequence(fpc, fpc + 9).equals(JSON_MINUS_INFINITY)) { - - if (config.allowNaN) { - result = getConstant(CONST_MINUS_INFINITY); - fexec p + 10; - fhold; - fbreak; - } else { - throw unexpectedToken(context, p, pe); - } - } - parseFloat(context, res, fpc, pe); - if (res.result != null) { - result = res.result; - fexec res.p; - } - parseInteger(context, res, fpc, pe); - if (res.result != null) { - result = res.result; - fexec res.p; - } - fhold; - fbreak; - } - action parse_string { - parseString(context, res, fpc, pe); - if (res.result == null) { - fhold; - fbreak; - } else { - result = res.result; - fexec res.p; - } - } - action parse_array { - currentNesting++; - parseArray(context, res, fpc, pe); - currentNesting--; - if (res.result == null) { - fhold; - fbreak; - } else { - result = res.result; - fexec res.p; - } - } - action parse_object { - currentNesting++; - parseObject(context, res, fpc, pe); - currentNesting--; - if (res.result == null) { - fhold; - fbreak; - } else { - result = res.result; - fexec res.p; - } - } - action exit { - fhold; - fbreak; - } - - main := ( Vnull @parse_null | - Vfalse @parse_false | - Vtrue @parse_true | - VNaN @parse_nan | - VInfinity @parse_infinity | - begin_number >parse_number | - begin_string >parse_string | - begin_array >parse_array | - begin_object >parse_object - ) %*exit; - }%% - - void parseValue(ThreadContext context, ParserResult res, int p, int pe) { - int cs; - IRubyObject result = null; - - %% write init; - %% write exec; - - if (cs >= JSON_value_first_final && result != null) { - if (config.freeze) { - result.setFrozen(true); - } - res.update(result, p); - } else { - res.update(null, p); - } - } - - %%{ - machine JSON_integer; - - write data; - - action exit { - fhold; - fbreak; - } - - main := '-'? ( '0' | [1-9][0-9]* ) ( ^[0-9]? @exit ); - }%% - - void parseInteger(ThreadContext context, ParserResult res, int p, int pe) { - int new_p = parseIntegerInternal(p, pe); - if (new_p == -1) { - res.update(null, p); - return; - } - RubyInteger number = createInteger(context, p, new_p); - res.update(config.onLoad(context, number), new_p + 1); - } - - int parseIntegerInternal(int p, int pe) { - int cs; - - %% write init; - int memo = p; - %% write exec; - - if (cs < JSON_integer_first_final) { - return -1; - } - - return p; - } - - RubyInteger createInteger(ThreadContext context, int p, int new_p) { - Ruby runtime = context.runtime; - ByteList num = absSubSequence(p, new_p); - return bytesToInum(runtime, num); - } - - RubyInteger bytesToInum(Ruby runtime, ByteList num) { - return ConvertBytes.byteListToInum(runtime, num, 10, true); - } - - %%{ - machine JSON_float; - include JSON_common; - - write data; - - action exit { - fhold; - fbreak; - } - - main := '-'? - ( ( ( '0' | [1-9][0-9]* ) '.' [0-9]+ ( [Ee] [+\-]?[0-9]+ )? ) - | ( ( '0' | [1-9][0-9]* ) ( [Ee] [+\-]? [0-9]+ ) ) ) - ( ^[0-9Ee.\-]? @exit ); - }%% - - void parseFloat(ThreadContext context, ParserResult res, int p, int pe) { - int new_p = parseFloatInternal(p, pe); - if (new_p == -1) { - res.update(null, p); - return; - } - final ByteList num = absSubSequence(p, new_p); - IRubyObject number = config.decimalFactory.apply(context, num); - - res.update(config.onLoad(context, number), new_p + 1); - } - - int parseFloatInternal(int p, int pe) { - int cs; - - %% write init; - int memo = p; - %% write exec; - - if (cs < JSON_float_first_final) { - return -1; - } - - return p; - } - - %%{ - machine JSON_string; - include JSON_common; - - write data; - - action parse_string { - int offset = byteList.begin(); - ByteList decoded = decoder.decode(context, byteList, memo + 1 - offset, - p - offset); - result = context.runtime.newString(decoded); - if (result == null) { - fhold; - fbreak; - } else { - fexec p + 1; - } - } - - action exit { - fhold; - fbreak; - } - - main := '"' - ( ( ^(["\\]) - | '\\'["\\/bfnrt] - | '\\u'[0-9a-fA-F]{4} - | '\\'^(["\\/bfnrtu]) - )* %parse_string - ) '"' @exit; - }%% - - void parseString(ThreadContext context, ParserResult res, int p, int pe) { - int cs; - IRubyObject result = null; - - %% write init; - int memo = p; - %% write exec; - - if (cs >= JSON_string_first_final && result != null) { - if (result instanceof RubyString) { - RubyString string = (RubyString)result; - string.setEncoding(UTF8Encoding.INSTANCE); - string.clearCodeRange(); - if (config.freeze) { - string.setFrozen(true); - string = context.runtime.freezeAndDedupString(string); - } - res.update(config.onLoad(context, string), p + 1); - } else { - res.update(config.onLoad(context, result), p + 1); - } - } else { - res.update(null, p + 1); - } - } - - %%{ - machine JSON_array; - include JSON_common; - - write data; - - action allow_trailing_comma { config.allowTrailingComma } - - action parse_value { - parseValue(context, res, fpc, pe); - if (res.result == null) { - fhold; - fbreak; - } else { - ((RubyArray)result).append(res.result); - fexec res.p; - } - } - - action exit { - fhold; - fbreak; - } - - next_element = value_separator ignore* begin_value >parse_value; - - main := begin_array - ignore* - ( ( begin_value >parse_value - ignore* ) - ( ignore* - next_element - ignore* )* ( (value_separator ignore*) when allow_trailing_comma )? )? - ignore* - end_array @exit; - }%% - - void parseArray(ThreadContext context, ParserResult res, int p, int pe) { - int cs; - - if (config.maxNesting > 0 && currentNesting > config.maxNesting) { - throw newException(context, Utils.M_NESTING_ERROR, - "nesting of " + currentNesting + " is too deep"); - } - - IRubyObject result = RubyArray.newArray(context.runtime); - - %% write init; - %% write exec; - - if (cs >= JSON_array_first_final) { - res.update(config.onLoad(context, result), p + 1); - } else { - throw unexpectedToken(context, p, pe); - } - } - - %%{ - machine JSON_object; - include JSON_common; - - write data; - - action allow_trailing_comma { config.allowTrailingComma } - - action parse_value { - parseValue(context, res, fpc, pe); - if (res.result == null) { - fhold; - fbreak; - } else { - ((RubyHash)result).op_aset(context, lastName, res.result); - fexec res.p; - } - } - - action parse_name { - parseString(context, res, fpc, pe); - if (res.result == null) { - fhold; - fbreak; - } else { - RubyString name = (RubyString)res.result; - if (config.symbolizeNames) { - lastName = name.intern(); - } else { - lastName = name; - } - - if (!config.allowDuplicateKey) { - if (((RubyHash)result).hasKey(lastName)) { - if (config.deprecateDuplicateKey && emittedDeprecations < 5) { - emittedDeprecations++; - context.runtime.getWarnings().warning( - "detected duplicate key " + name.inspect() + " in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`" - ); - } else { - throw parsingError(context, "duplicate key" + name.inspect(), p, pe); - } - } - } - - fexec res.p; - } - } - - action exit { - fhold; - fbreak; - } - - pair = ignore* begin_name >parse_name ignore* name_separator - ignore* begin_value >parse_value; - next_pair = ignore* value_separator pair; - - main := ( - begin_object - (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore* - end_object - ) @exit; - }%% - - void parseObject(ThreadContext context, ParserResult res, int p, int pe) { - int cs; - IRubyObject lastName = null; - - if (config.maxNesting > 0 && currentNesting > config.maxNesting) { - throw newException(context, Utils.M_NESTING_ERROR, - "nesting of " + currentNesting + " is too deep"); - } - - // this is guaranteed to be a RubyHash due to the earlier - // allocator test at OptionsReader#getClass - IRubyObject result = RubyHash.newHash(context.runtime); - - %% write init; - %% write exec; - - if (cs < JSON_object_first_final) { - res.update(null, p + 1); - return; - } - - res.update(config.onLoad(context, result), p + 1); - } - - %%{ - machine JSON; - include JSON_common; - - write data; - - action parse_value { - parseValue(context, res, fpc, pe); - if (res.result == null) { - fhold; - fbreak; - } else { - result = res.result; - fexec res.p; - } - } - - main := ignore* - ( begin_value >parse_value) - ignore*; - }%% - - public IRubyObject parseImplementation(ThreadContext context) { - int cs; - int p, pe; - IRubyObject result = null; - ParserResult res = new ParserResult(); - - %% write init; - p = byteList.begin(); - pe = p + byteList.length(); - %% write exec; - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - throw unexpectedToken(context, p, pe); - } - } - - public IRubyObject parse(ThreadContext context) { - return parseImplementation(context); - } - - /** - * Updates the "view" bytelist with the new offsets and returns it. - * @param absStart - * @param absEnd - */ - private ByteList absSubSequence(int absStart, int absEnd) { - view.setBegin(absStart); - view.setRealSize(absEnd - absStart); - return view; - } - - /** - * Retrieves a constant directly descended from the JSON module. - * @param name The constant name - */ - private IRubyObject getConstant(String name) { - return config.info.jsonModule.get().getConstant(name); - } - - private RaiseException newException(ThreadContext context, String className, String message) { - return Utils.newException(context, className, message); - } - - private RaiseException newException(ThreadContext context, String className, RubyString message) { - return Utils.newException(context, className, message); - } - - RubyHash.VisitorWithState MATCH_VISITOR = new RubyHash.VisitorWithState() { - @Override - public void visit(ThreadContext context, RubyHash self, IRubyObject pattern, IRubyObject klass, int index, IRubyObject[] state) { - if (pattern.callMethod(context, "===", state[0]).isTrue()) { - state[1] = klass; - throw JumpException.SPECIAL_JUMP; - } - } - }; - } -} diff --git a/java/src/json/ext/ParserService.java b/java/src/json/ext/ParserService.java index 88aa9674b..2d49bd1a4 100644 --- a/java/src/json/ext/ParserService.java +++ b/java/src/json/ext/ParserService.java @@ -27,8 +27,8 @@ public boolean basicLoad(Ruby runtime) throws IOException { RubyModule jsonExtModule = info.jsonModule.get().defineModuleUnder("Ext"); RubyClass parserConfigClass = jsonExtModule.defineClassUnder("ParserConfig", runtime.getObject(), - ParserConfig.ALLOCATOR); - parserConfigClass.defineAnnotatedMethods(ParserConfig.class); + Parser.ALLOCATOR); + parserConfigClass.defineAnnotatedMethods(Parser.class); return true; } } diff --git a/java/src/json/ext/StringDecoder.java b/java/src/json/ext/StringDecoder.java index 091b09f56..28874acdc 100644 --- a/java/src/json/ext/StringDecoder.java +++ b/java/src/json/ext/StringDecoder.java @@ -186,8 +186,7 @@ private int readHex(ThreadContext context) { } else if (digit >= 'A' && digit <= 'F') { digitValue = 10 + digit - 'A'; } else { - throw new NumberFormatException("Invalid base 16 number " - + src.subSequence(numberStart, numberStart + length)); + throw invalidUnicodeEscape(context, numberStart); } result = result * 16 + digitValue; } @@ -220,4 +219,14 @@ protected RaiseException invalidEscape(ThreadContext context) { return Utils.newException(context, Utils.M_PARSER_ERROR, context.runtime.newString(message)); } + + protected RaiseException invalidUnicodeEscape(ThreadContext context, int escapeStart) { + ByteList message = new ByteList( + ByteList.plain("incomplete unicode character escape sequence at ")); + // Point at the backslash-u that introduced the escape. + int start = Math.max(escapeStart - 2, 0); + message.append(src, start, srcEnd - start); + return Utils.newException(context, Utils.M_PARSER_ERROR, + context.runtime.newString(message)); + } } diff --git a/java/src/json/ext/StringScanner.java b/java/src/json/ext/StringScanner.java new file mode 100644 index 000000000..044f485da --- /dev/null +++ b/java/src/json/ext/StringScanner.java @@ -0,0 +1,136 @@ +package json.ext; + +import java.nio.ByteBuffer; + +/** + * Scans the body of a JSON string for its closing quote, reporting whether the + * body can be copied verbatim (the "plain" fast path) or must be handed to + * {@link StringDecoder} for escape expansion and UTF-8/control validation. + * + *

The default implementation is SWAR (8 bytes per step). A vectorized + * subclass ({@code VectorizedStringScanner}) is loaded reflectively when the + * {@code jruby.json.useVectorizedParser} system property is set and the + * {@code jdk.incubator.vector} module is available; otherwise this SWAR + * implementation is used. Instances are stateless and therefore shared. + */ +class StringScanner { + /** Set in the returned bits when the whole body is plain printable ASCII. */ + static final long PLAIN_BIT = 1L << 32; + /** Returned when no closing quote is found before {@code end}. */ + static final long NOT_FOUND = -1L; + + private static final long HIGH_BITS = 0x8080808080808080L; + private static final long ONES = 0x0101010101010101L; + + private static final String VECTORIZED_SCANNER_CLASS = "json.ext.VectorizedStringScanner"; + private static final String USE_VECTORIZED_PARSER_PROP = "jruby.json.useVectorizedParser"; + private static final String USE_VECTORIZED_PARSER_DEFAULT = "false"; + + private static final StringScanner INSTANCE; + + static { + StringScanner scanner = new StringScanner(); + String enable = System.getProperty(USE_VECTORIZED_PARSER_PROP, USE_VECTORIZED_PARSER_DEFAULT); + if ("true".equalsIgnoreCase(enable) || "1".equals(enable)) { + try { + Class vectorized = StringScanner.class.getClassLoader() + .loadClass(VECTORIZED_SCANNER_CLASS); + scanner = (StringScanner) vectorized.getDeclaredConstructor().newInstance(); + } catch (Throwable t) { + // jdk.incubator.vector unavailable (or any load failure): + // keep the SWAR implementation. + scanner = new StringScanner(); + } + } + INSTANCE = scanner; + } + + static StringScanner getInstance() { + return INSTANCE; + } + + /** + * Scans {@code data[start..end)} for the closing quote, honouring backslash + * escapes. + * + * @param chunks a little-endian {@link ByteBuffer} over {@code data}, used + * for the 8-byte SWAR reads (the vectorized subclass reads + * {@code data} directly and ignores it). + * @return packed result: the low 32 bits hold the index of the closing + * quote, or {@code -1} ({@link #NOT_FOUND}) when none is found + * before {@code end}; {@link #PLAIN_BIT} is set when the entire body + * is plain printable ASCII (no escape, no ASCII control character, + * and no non-ASCII byte) and can be copied verbatim. + */ + long scan(byte[] data, ByteBuffer chunks, int start, int end) { + int p = start; + boolean plain = true; + + outer: + while (true) { + // SWAR: skip 8-byte chunks that contain nothing interesting. + while (p + 8 <= end) { + long x = chunks.getLong(p); + // Due to the byte-by-byte handling if we match an interesting byte, + // if we already know this is a non-ASCII-only string, we simply + // look for quotes and backslashes. + long m = plain ? stringScanMask(x) : quoteBackslashMask(x); + if (m == 0) { + p += 8; + } else { + p += Long.numberOfTrailingZeros(m) >>> 3; + break; + } + } + // If we match on a byte above and/or tail handling for <8 remaining bytes. + while (p < end) { + int b = data[p] & 0xFF; + if (b == '"') { + return ((long) p) | (plain ? PLAIN_BIT : 0L); + } + if (b == '\\') { + plain = false; + p += 2; // skip the backslash and the escaped byte + continue outer; + } + if (b < 0x20 || b >= 0x80) { + plain = false; + p++; + continue outer; + } + p++; + } + return NOT_FOUND; + } + } + + /** + * Returns a mask whose high bit (0x80) is set in every lane of {@code x} + * that needs scalar attention: an ASCII control character (< 0x20), a + * double quote, a backslash, or a non-ASCII byte (high bit set). Returns 0 + * when the whole 8-byte chunk is printable ASCII copyable verbatim. + */ + private static long stringScanMask(long x) { + long control = (x - 0x2020202020202020L) & ~x; // bytes < 0x20 (ASCII) + long high = x; // bit 0x80 set iff non-ASCII + long q = x ^ 0x2222222222222222L; + long quote = (q - ONES) & ~q; + long s = x ^ 0x5C5C5C5C5C5C5C5CL; + long bslash = (s - ONES) & ~s; + return (control | high | quote | bslash) & HIGH_BITS; + } + + /** + * Like {@link #stringScanMask} but only flags double quotes and backslashes. + * Used once a string is known to require the decoder, so the remaining scan + * for the closing quote still skips clean chunks (including multi-byte + * UTF-8) eight bytes at a time. + */ + private static long quoteBackslashMask(long x) { + long q = x ^ 0x2222222222222222L; + long quote = (q - ONES) & ~q; + long s = x ^ 0x5C5C5C5C5C5C5C5CL; + long bslash = (s - ONES) & ~s; + return (quote | bslash) & HIGH_BITS; + } +} diff --git a/java/src/json/ext/VectorizedStringScanner.java b/java/src/json/ext/VectorizedStringScanner.java new file mode 100644 index 000000000..8a9c1e3e7 --- /dev/null +++ b/java/src/json/ext/VectorizedStringScanner.java @@ -0,0 +1,72 @@ +package json.ext; + +import java.nio.ByteBuffer; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorMask; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; + +final class VectorizedStringScanner extends StringScanner { + private static final VectorSpecies SP = ByteVector.SPECIES_PREFERRED; + private static final ByteVector ZERO = ByteVector.zero(SP); + private static final ByteVector TWO = ByteVector.broadcast(SP, 2); + private static final ByteVector THIRTY_THREE = ByteVector.broadcast(SP, 33); + private static final ByteVector BACKSLASH = ByteVector.broadcast(SP, '\\'); + private static final ByteVector DQUOTE = ByteVector.broadcast(SP, '"'); + + @Override + long scan(byte[] data, ByteBuffer chunks, int start, int end) { + final int width = SP.length(); + int p = start; + boolean plain = true; + + // The same structure as the StringEncoder. The logic is + // duplicated for maximum inlining. + outer: + while (true) { + while (p + width <= end) { + ByteVector chunk = ByteVector.fromArray(SP, data, p); + VectorMask interesting = + plain ? interestingLanes(chunk) : quoteOrBackslashLanes(chunk); + if (interesting.anyTrue()) { + p += interesting.firstTrue(); + break; + } + p += width; + } + while (p < end) { + int b = data[p] & 0xFF; + if (b == '"') { + return ((long) p) | (plain ? PLAIN_BIT : 0L); + } + if (b == '\\') { + plain = false; + p += 2; // skip the backslash and the escaped byte + continue outer; + } + if (b < 0x20 || b >= 0x80) { + plain = false; + p++; + continue outer; + } + p++; + } + return NOT_FOUND; + } + } + + // Lanes that are control characters, double quotes, backslashes or non-ASCII. + private static VectorMask interestingLanes(ByteVector chunk) { + VectorMask negative = chunk.lt(ZERO); + VectorMask lowOrQuote = chunk.lanewise(VectorOperators.XOR, TWO) + .lt(THIRTY_THREE) + .andNot(negative); + return lowOrQuote.or(chunk.eq(BACKSLASH)).or(negative); + } + + // Lanes that are a double quote or a backslash (non-plain phase). + private static VectorMask quoteOrBackslashLanes(ByteVector chunk) { + return chunk.eq(DQUOTE).or(chunk.eq(BACKSLASH)); + } +} From 9b3b483eea663c6c140c6742060ef7b5755888d9 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Tue, 16 Jun 2026 07:34:22 -0500 Subject: [PATCH 2/2] fix code style PR comments --- java/src/json/ext/Parser.java | 9 +++++---- java/src/json/ext/StringScanner.java | 14 +++++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/java/src/json/ext/Parser.java b/java/src/json/ext/Parser.java index b7f505421..092ff4d41 100644 --- a/java/src/json/ext/Parser.java +++ b/java/src/json/ext/Parser.java @@ -207,6 +207,8 @@ private static final class ParserSession { private static final int KEY_CACHE_CAPA = 128; private static final int KEY_CACHE_MAX_ENTRY_LENGTH = 55; + private static final long SPACES = 0x2020202020202020L; + private enum FrameType { ROOT(FramePhase.DONE), ARRAY(FramePhase.ARRAY_COMMA), @@ -817,7 +819,7 @@ private static int compareKey(byte[] buf, int off, int len, IRubyObject entry) { byte[] ebuf = eb.getUnsafeBytes(); int ebeg = eb.begin(); for (int i = 0; i < len; i++) { - int cmp = (buf[off + i] & 0xFF) - (ebuf[ebeg + i] & 0xFF); + int cmp = Byte.toUnsignedInt(buf[off + i]) - Byte.toUnsignedInt(ebuf[ebeg + i]); if (cmp != 0) { return cmp; } @@ -858,11 +860,10 @@ private void eatWhitespace() { // of indentation spaces, so skip them eight at a time. while (cursor + 8 <= end) { long x = chunks.getLong(cursor); - if (x == 0x2020202020202020L) { + if (x == SPACES) { cursor += 8; } else { - cursor += Long.numberOfTrailingZeros( - x ^ 0x2020202020202020L) >>> 3; + cursor += Long.numberOfTrailingZeros(x ^ SPACES) >>> 3; break; } } diff --git a/java/src/json/ext/StringScanner.java b/java/src/json/ext/StringScanner.java index 044f485da..5ca59d177 100644 --- a/java/src/json/ext/StringScanner.java +++ b/java/src/json/ext/StringScanner.java @@ -22,6 +22,10 @@ class StringScanner { private static final long HIGH_BITS = 0x8080808080808080L; private static final long ONES = 0x0101010101010101L; + private static final long SPACES = 0x2020202020202020L; + private static final long DOUBLE_QUOTES = 0x2222222222222222L; + private static final long BACKSLASHES = 0x5C5C5C5C5C5C5C5CL; + private static final String VECTORIZED_SCANNER_CLASS = "json.ext.VectorizedStringScanner"; private static final String USE_VECTORIZED_PARSER_PROP = "jruby.json.useVectorizedParser"; private static final String USE_VECTORIZED_PARSER_DEFAULT = "false"; @@ -111,11 +115,11 @@ long scan(byte[] data, ByteBuffer chunks, int start, int end) { * when the whole 8-byte chunk is printable ASCII copyable verbatim. */ private static long stringScanMask(long x) { - long control = (x - 0x2020202020202020L) & ~x; // bytes < 0x20 (ASCII) + long control = (x - SPACES) & ~x; // bytes < 0x20 (ASCII) long high = x; // bit 0x80 set iff non-ASCII - long q = x ^ 0x2222222222222222L; + long q = x ^ DOUBLE_QUOTES; long quote = (q - ONES) & ~q; - long s = x ^ 0x5C5C5C5C5C5C5C5CL; + long s = x ^ BACKSLASHES; long bslash = (s - ONES) & ~s; return (control | high | quote | bslash) & HIGH_BITS; } @@ -127,9 +131,9 @@ private static long stringScanMask(long x) { * UTF-8) eight bytes at a time. */ private static long quoteBackslashMask(long x) { - long q = x ^ 0x2222222222222222L; + long q = x ^ DOUBLE_QUOTES; long quote = (q - ONES) & ~q; - long s = x ^ 0x5C5C5C5C5C5C5C5CL; + long s = x ^ BACKSLASHES; long bslash = (s - ONES) & ~s; return (quote | bslash) & HIGH_BITS; }