diff --git a/src/main/cod/demo/src/main/test/json/JsonStandardLibraryComprehensive.cod b/src/main/cod/demo/src/main/test/json/JsonStandardLibraryComprehensive.cod new file mode 100644 index 00000000..c981f5e5 --- /dev/null +++ b/src/main/cod/demo/src/main/test/json/JsonStandardLibraryComprehensive.cod @@ -0,0 +1,120 @@ +unit test.json + +use {json} + +share JsonStandardLibraryComprehensive { + share check(label: text, actual: text, expected: text) { + if actual == expected { + out("PASS " + label) + return + } + out("FAIL " + label) + out(" actual: " + actual) + out(" expected: " + expected) + } + + share checkBool(label: text, actual: bool, expected: bool) { + if actual == expected { + out("PASS " + label) + return + } + out("FAIL " + label) + out(" actual: " + actual) + out(" expected: " + expected) + } + + share main() { + out("== json standard library comprehensive ==") + + nullValue := Json.parse("null") + JsonStandardLibraryComprehensive.checkBool("parse null", nullValue.isNull(), true) + + trueValue := Json.parse("true") + JsonStandardLibraryComprehensive.checkBool("parse true", trueValue.isBool(), true) + JsonStandardLibraryComprehensive.checkBool("true value", trueValue.asBool(), true) + + falseValue := Json.parse("false") + JsonStandardLibraryComprehensive.checkBool("parse false", falseValue.isBool(), true) + JsonStandardLibraryComprehensive.checkBool("false value", falseValue.asBool(), false) + + numberValue := Json.parse("-12.5e+2") + JsonStandardLibraryComprehensive.checkBool("parse number kind", numberValue.isNumber(), true) + JsonStandardLibraryComprehensive.check("parse number text", numberValue.asNumberText(), "-12.5e+2") + + textValue := Json.parse("\"hello \\\"coderive\\\"\\nline\"") + JsonStandardLibraryComprehensive.checkBool("parse text kind", textValue.isText(), true) + JsonStandardLibraryComprehensive.check("parse text data", textValue.asText(), "hello \"coderive\"\nline") + JsonStandardLibraryComprehensive.check("serialize text", Json.serialize(textValue), "\"hello \\\"coderive\\\"\\nline\"") + + mixedArray := Json.parse("[1, true, null, \"x\", [2,3]]") + JsonStandardLibraryComprehensive.checkBool("array kind", mixedArray.isArray(), true) + JsonStandardLibraryComprehensive.check("array size", "" + mixedArray.size(), "5") + JsonStandardLibraryComprehensive.checkBool("array nested kind", mixedArray.get(4).isArray(), true) + JsonStandardLibraryComprehensive.check("array nested value", mixedArray.get(4).get(1).asNumberText(), "3") + + objectValue := Json.parse("{\"name\":\"Coderive\",\"ok\":true,\"n\":10,\"tags\":[\"lang\",\"json\"],\"meta\":{\"major\":0}}") + JsonStandardLibraryComprehensive.checkBool("object kind", objectValue.isObject(), true) + JsonStandardLibraryComprehensive.checkBool("object has name", objectValue.has("name"), true) + JsonStandardLibraryComprehensive.check("object name", objectValue.getKey("name").asText(), "Coderive") + JsonStandardLibraryComprehensive.checkBool("object ok", objectValue.getKey("ok").asBool(), true) + JsonStandardLibraryComprehensive.check("object nested array value", objectValue.getKey("tags").get(1).asText(), "json") + JsonStandardLibraryComprehensive.check("object nested object value", objectValue.getKey("meta").getKey("major").asNumberText(), "0") + + compact := Json.serialize(objectValue) + JsonStandardLibraryComprehensive.check("compact serialize", compact, "{\"name\":\"Coderive\",\"ok\":true,\"n\":10,\"tags\":[\"lang\",\"json\"],\"meta\":{\"major\":0}}") + + pretty := Json.serializePretty(objectValue) + JsonStandardLibraryComprehensive.checkBool("pretty has new lines", pretty.has("\n"), true) + JsonStandardLibraryComprehensive.checkBool("pretty has indentation", pretty.has(" \"name\""), true) + + created := Json.object() + created.set("language", Json.text("Coderive")) + created.set("stable", Json.bool(false)) + versions := Json.array() + versions.add(Json.numberText("0.9")) + versions.add(Json.numberText("1.0")) + created.set("versions", versions) + JsonStandardLibraryComprehensive.check("constructed serialize", Json.serialize(created), "{\"language\":\"Coderive\",\"stable\":false,\"versions\":[0.9,1.0]}") + + updateObject := Json.object() + updateObject.set("x", Json.numberText("1")) + updateObject.set("x", Json.numberText("2")) + JsonStandardLibraryComprehensive.check("object key update", Json.serialize(updateObject), "{\"x\":2}") + + roundTripSource := " { \"a\" : [ 1 , 2 , {\"b\":false} ] , \"c\" : null } " + roundTrip := Json.parse(roundTripSource) + JsonStandardLibraryComprehensive.checkBool("round trip parse ok", roundTrip.isError(), false) + JsonStandardLibraryComprehensive.check("round trip compact", Json.serialize(roundTrip), "{\"a\":[1,2,{\"b\":false}],\"c\":null}") + + invalid1 := Json.parse("{\"a\":1,}") + JsonStandardLibraryComprehensive.checkBool("invalid trailing comma object", invalid1.isError(), true) + + invalid2 := Json.parse("[1,2,]") + JsonStandardLibraryComprehensive.checkBool("invalid trailing comma array", invalid2.isError(), true) + + invalid3 := Json.parse("{\"a\" 1}") + JsonStandardLibraryComprehensive.checkBool("invalid missing colon", invalid3.isError(), true) + + invalid4 := Json.parse("\"unterminated") + JsonStandardLibraryComprehensive.checkBool("invalid unterminated text", invalid4.isError(), true) + + invalid5 := Json.parse("true false") + JsonStandardLibraryComprehensive.checkBool("invalid trailing content", invalid5.isError(), true) + + unicodeEscaped := Json.parse("\"\\u0041\"") + JsonStandardLibraryComprehensive.check("unicode escape preserved", unicodeEscaped.asText(), "\\u0041") + JsonStandardLibraryComprehensive.check("unicode serialize", Json.serialize(unicodeEscaped), "\"\\u0041\"") + + unicodePair := Json.parse("\"\\uD83D\\uDE00\"") + JsonStandardLibraryComprehensive.check("unicode surrogate pair preserved", unicodePair.asText(), "\\uD83D\\uDE00") + JsonStandardLibraryComprehensive.check("unicode surrogate pair serialize", Json.serialize(unicodePair), "\"\\uD83D\\uDE00\"") + + invalid6 := Json.parse("\"\\uD83Dx\"") + JsonStandardLibraryComprehensive.checkBool("invalid missing low surrogate pair", invalid6.isError(), true) + + invalid7 := Json.parse("\"\\uDE00\"") + JsonStandardLibraryComprehensive.checkBool("invalid unexpected low surrogate", invalid7.isError(), true) + + out("== done ==") + } +} diff --git a/src/main/cod/demo/src/main/test/unicode/UnicodeStandardLibraryComprehensive.cod b/src/main/cod/demo/src/main/test/unicode/UnicodeStandardLibraryComprehensive.cod new file mode 100644 index 00000000..a4315154 --- /dev/null +++ b/src/main/cod/demo/src/main/test/unicode/UnicodeStandardLibraryComprehensive.cod @@ -0,0 +1,42 @@ +unit test.unicode + +use {unicode} + +share UnicodeStandardLibraryComprehensive { + share check(label: text, actual: text, expected: text) { + if actual == expected { + out("PASS " + label) + return + } + out("FAIL " + label) + out(" actual: " + actual) + out(" expected: " + expected) + } + + share checkBool(label: text, actual: bool, expected: bool) { + if actual == expected { + out("PASS " + label) + return + } + out("FAIL " + label) + out(" actual: " + actual) + out(" expected: " + expected) + } + + share main() { + out("== unicode standard library comprehensive ==") + + UnicodeStandardLibraryComprehensive.check("normalize simple", Unicode.normalizeEscaped("\\u0041"), "\\u0041") + UnicodeStandardLibraryComprehensive.check("normalize lowercase", Unicode.normalizeEscaped("\\u03c9"), "\\u03C9") + UnicodeStandardLibraryComprehensive.check("normalize surrogate pair", Unicode.normalizeEscaped("\\ud83d\\ude00"), "\\uD83D\\uDE00") + + UnicodeStandardLibraryComprehensive.checkBool("valid empty", Unicode.isValidEscaped(""), true) + UnicodeStandardLibraryComprehensive.checkBool("valid basic", Unicode.isValidEscaped("\\u0041"), true) + UnicodeStandardLibraryComprehensive.checkBool("valid surrogate", Unicode.isValidEscaped("\\uD83D\\uDE00"), true) + UnicodeStandardLibraryComprehensive.checkBool("invalid bad hex", Unicode.isValidEscaped("\\u00G1"), false) + UnicodeStandardLibraryComprehensive.checkBool("invalid lone high", Unicode.isValidEscaped("\\uD83D"), false) + UnicodeStandardLibraryComprehensive.checkBool("invalid lone low", Unicode.isValidEscaped("\\uDE00"), false) + + out("== done ==") + } +} diff --git a/src/main/cod/std/json/Json.cod b/src/main/cod/std/json/Json.cod new file mode 100644 index 00000000..c6fa9ccc --- /dev/null +++ b/src/main/cod/std/json/Json.cod @@ -0,0 +1,658 @@ +unit json + +use {unicode} + +// JSON standard library for Coderive. +// Supports parsing, mutation, and compact/pretty serialization of JSON values. +share JsonValue { + kind: int = 0 + boolData: bool = false + textData: text = "" + arrayData: [] = [] + objectKeys: [text] = [] + objectValues: [] = [] + errorData: text = "" + + share this(kindValue: int) { + this.kind = kindValue + } + + share isNull() :: bool { ~> (this.kind == 0) } + share isBool() :: bool { ~> (this.kind == 1) } + share isNumber() :: bool { ~> (this.kind == 2) } + share isText() :: bool { ~> (this.kind == 3) } + share isArray() :: bool { ~> (this.kind == 4) } + share isObject() :: bool { ~> (this.kind == 5) } + share isError() :: bool { ~> (this.kind == 6) } + + share asBool() :: bool { ~> (this.boolData) } + share asNumberText() :: text { ~> (this.textData) } + share asText() :: text { ~> (this.textData) } + share error() :: text { ~> (this.errorData) } + + share size() :: int { + if this.kind == 4 { ~> (this.arrayData.size) } + if this.kind == 5 { ~> (this.objectKeys.size) } + ~> (0) + } + + share add(item: JsonValue) { + if this.kind != 4 { return } + idx: int = this.arrayData.size + this.arrayData[idx] = item + } + + share get(index: int) :: JsonValue { + if this.kind != 4 { ~> (JsonValue.makeError("value is not an array")) } + if index < 0 { ~> (JsonValue.makeError("array index out of bounds")) } + if index >= this.arrayData.size { ~> (JsonValue.makeError("array index out of bounds")) } + ~> (this.arrayData[index]) + } + + share set(key: text, value: JsonValue) { + if this.kind != 5 { return } + for i of 0 to this.objectKeys.size - 1 { + if this.objectKeys[i] == key { + this.objectValues[i] = value + return + } + } + idx: int = this.objectKeys.size + this.objectKeys[idx] = key + this.objectValues[idx] = value + } + + share has(key: text) :: bool { + if this.kind != 5 { ~> (false) } + for i of 0 to this.objectKeys.size - 1 { + if this.objectKeys[i] == key { ~> (true) } + } + ~> (false) + } + + share getKey(key: text) :: JsonValue { + if this.kind != 5 { ~> (JsonValue.makeError("value is not an object")) } + for i of 0 to this.objectKeys.size - 1 { + if this.objectKeys[i] == key { + ~> (this.objectValues[i]) + } + } + ~> (JsonValue.makeError("missing object key: " + key)) + } + + share keyAt(index: int) :: text { + if this.kind != 5 { ~> ("") } + if index < 0 { ~> ("") } + if index >= this.objectKeys.size { ~> ("") } + ~> (this.objectKeys[index]) + } + + share valueAt(index: int) :: JsonValue { + if this.kind != 5 { ~> (JsonValue.makeError("value is not an object")) } + if index < 0 { ~> (JsonValue.makeError("object index out of bounds")) } + if index >= this.objectValues.size { ~> (JsonValue.makeError("object index out of bounds")) } + ~> (this.objectValues[index]) + } + + share toJson() :: text { ~> (Json.serializeValue(this, false, 0)) } + share toJsonPretty() :: text { ~> (Json.serializeValue(this, true, 0)) } + + share makeNull() :: JsonValue { + v := JsonValue(0) + ~> (v) + } + + share makeBool(value: bool) :: JsonValue { + v := JsonValue(1) + v.boolData = value + ~> (v) + } + + share makeNumber(textValue: text) :: JsonValue { + v := JsonValue(2) + v.textData = textValue + ~> (v) + } + + share makeText(textValue: text) :: JsonValue { + v := JsonValue(3) + v.textData = textValue + ~> (v) + } + + share makeArray(items: []) :: JsonValue { + v := JsonValue(4) + v.arrayData = items + ~> (v) + } + + share makeObject(keys: [text], values: []) :: JsonValue { + v := JsonValue(5) + v.objectKeys = keys + v.objectValues = values + ~> (v) + } + + share makeError(message: text) :: JsonValue { + v := JsonValue(6) + v.errorData = message + ~> (v) + } +} + +share Json { + share parse(source: text) :: JsonValue { + parser := JsonParser(source) + ~> (parser.parseRoot()) + } + + share serialize(value: JsonValue) :: text { + ~> (Json.serializeValue(value, false, 0)) + } + + share serializePretty(value: JsonValue) :: text { + ~> (Json.serializeValue(value, true, 0)) + } + + share nullValue() :: JsonValue { ~> (JsonValue.makeNull()) } + share bool(value: bool) :: JsonValue { ~> (JsonValue.makeBool(value)) } + share number(value: int|float) :: JsonValue { ~> (JsonValue.makeNumber("" + value)) } + share numberText(value: text) :: JsonValue { ~> (JsonValue.makeNumber(value)) } + share text(value: text) :: JsonValue { ~> (JsonValue.makeText(value)) } + share array() :: JsonValue { ~> (JsonValue.makeArray([])) } + share object() :: JsonValue { ~> (JsonValue.makeObject([], [])) } + + share serializeValue(value: JsonValue, pretty: bool, depth: int) :: text { + if value.isError() { + ~> ("\"\"") + } + + if value.isNull() { + ~> ("null") + } + + if value.isBool() { + if value.asBool() { ~> ("true") } + ~> ("false") + } + + if value.isNumber() { + raw := value.asNumberText().trim() + if raw.isEmpty() { ~> ("0") } + ~> (raw) + } + + if value.isText() { + ~> ("\"" + Json.escapeText(value.asText()) + "\"") + } + + if value.isArray() { + count := value.size() + if count == 0 { ~> ("[]") } + + if pretty { + outText := "[\n" + for i of 0 to count - 1 { + outText = outText + Json.indent(depth + 1) + item := value.get(i) + outText = outText + Json.serializeValue(item, true, depth + 1) + if i < count - 1 { outText = outText + "," } + outText = outText + "\n" + } + outText = outText + Json.indent(depth) + "]" + ~> (outText) + } + + outText := "[" + for i of 0 to count - 1 { + item := value.get(i) + outText = outText + Json.serializeValue(item, false, depth + 1) + if i < count - 1 { outText = outText + "," } + } + outText = outText + "]" + ~> (outText) + } + + if value.isObject() { + count := value.size() + if count == 0 { ~> ("{}") } + + if pretty { + outText := "{\n" + for i of 0 to count - 1 { + key := value.keyAt(i) + item := value.valueAt(i) + outText = outText + Json.indent(depth + 1) + outText = outText + "\"" + Json.escapeText(key) + "\": " + outText = outText + Json.serializeValue(item, true, depth + 1) + if i < count - 1 { outText = outText + "," } + outText = outText + "\n" + } + outText = outText + Json.indent(depth) + "}" + ~> (outText) + } + + outText := "{" + for i of 0 to count - 1 { + key := value.keyAt(i) + item := value.valueAt(i) + outText = outText + "\"" + Json.escapeText(key) + "\":" + outText = outText + Json.serializeValue(item, false, depth + 1) + if i < count - 1 { outText = outText + "," } + } + outText = outText + "}" + ~> (outText) + } + + ~> ("null") + } + + share escapeText(raw: text) :: text { + outText := "" + idx: int = 0 + for i of 0 to raw.length { + if idx >= raw.length { break } + ch := raw[idx] + if ch == "\\" { + if Json.isUnicodeEscapeAt(raw, idx) { + outText = outText + raw[idx to idx + 5] + idx = idx + 6 + continue + } + outText = outText + "\\\\" + idx = idx + 1 + continue + } + if ch == "\"" { + outText = outText + "\\\"" + idx = idx + 1 + continue + } + if ch == "\n" { + outText = outText + "\\n" + idx = idx + 1 + continue + } + if ch == "\r" { + outText = outText + "\\r" + idx = idx + 1 + continue + } + if ch == "\t" { + outText = outText + "\\t" + idx = idx + 1 + continue + } + if ch == "\b" { + outText = outText + "\\b" + idx = idx + 1 + continue + } + if ch == "\f" { + outText = outText + "\\f" + idx = idx + 1 + continue + } + outText = outText + ch + idx = idx + 1 + } + ~> (outText) + } + + share isUnicodeEscapeAt(raw: text, start: int) :: bool { + if start + 5 >= raw.length { ~> (false) } + if raw[start] != "\\" { ~> (false) } + if raw[start + 1] != "u" { ~> (false) } + if !Unicode.isHexDigit(raw[start + 2]) { ~> (false) } + if !Unicode.isHexDigit(raw[start + 3]) { ~> (false) } + if !Unicode.isHexDigit(raw[start + 4]) { ~> (false) } + if !Unicode.isHexDigit(raw[start + 5]) { ~> (false) } + ~> (true) + } + + share indent(depth: int) :: text { + textValue := "" + for i of 1 to depth { + textValue = textValue + " " + } + ~> (textValue) + } +} + +share JsonParser { + source: text = "" + index: int = 0 + + share this(sourceText: text) { + this.source = sourceText + this.index = 0 + } + + share parseRoot() :: JsonValue { + this.skipWhitespace() + if this.index >= this.source.length { + ~> (JsonValue.makeError("empty json input")) + } + + value := this.parseValue() + if value.isError() { ~> (value) } + + this.skipWhitespace() + if this.index != this.source.length { + ~> (JsonValue.makeError("unexpected trailing content at index " + this.index)) + } + + ~> (value) + } + + share parseValue() :: JsonValue { + this.skipWhitespace() + if this.index >= this.source.length { + ~> (JsonValue.makeError("unexpected end of input")) + } + + ch := this.source[this.index] + + if ch == "n" { ~> (this.parseNull()) } + if ch == "t" { ~> (this.parseTrue()) } + if ch == "f" { ~> (this.parseFalse()) } + if ch == "\"" { ~> (this.parseText()) } + if ch == "[" { ~> (this.parseArray()) } + if ch == "{" { ~> (this.parseObject()) } + if any[ch == "-", Unicode.isDigit(ch)] { ~> (this.parseNumber()) } + + ~> (JsonValue.makeError("invalid json value at index " + this.index)) + } + + share parseNull() :: JsonValue { + if this.matchKeyword("null") { + this.index = this.index + 4 + ~> (JsonValue.makeNull()) + } + ~> (JsonValue.makeError("invalid token, expected null at index " + this.index)) + } + + share parseTrue() :: JsonValue { + if this.matchKeyword("true") { + this.index = this.index + 4 + ~> (JsonValue.makeBool(true)) + } + ~> (JsonValue.makeError("invalid token, expected true at index " + this.index)) + } + + share parseFalse() :: JsonValue { + if this.matchKeyword("false") { + this.index = this.index + 5 + ~> (JsonValue.makeBool(false)) + } + ~> (JsonValue.makeError("invalid token, expected false at index " + this.index)) + } + + share parseText() :: JsonValue { + if this.source[this.index] != "\"" { + ~> (JsonValue.makeError("expected text at index " + this.index)) + } + + this.index = this.index + 1 + outText := "" + + for i of this.index to this.source.length { + if this.index >= this.source.length { + ~> (JsonValue.makeError("unterminated text")) + } + + ch := this.source[this.index] + this.index = this.index + 1 + + if ch == "\"" { + ~> (JsonValue.makeText(outText)) + } + + if ch == "\\" { + if this.index >= this.source.length { + ~> (JsonValue.makeError("unterminated escape sequence")) + } + + esc := this.source[this.index] + this.index = this.index + 1 + + if esc == "\"" { outText = outText + "\"" continue } + if esc == "\\" { outText = outText + "\\" continue } + if esc == "/" { outText = outText + "/" continue } + if esc == "b" { outText = outText + "\b" continue } + if esc == "f" { outText = outText + "\f" continue } + if esc == "n" { outText = outText + "\n" continue } + if esc == "r" { outText = outText + "\r" continue } + if esc == "t" { outText = outText + "\t" continue } + + if esc == "u" { + firstUnit := this.parseHex4At(this.index) + if firstUnit < 0 { + ~> (JsonValue.makeError("invalid unicode escape at index " + this.index)) + } + this.index = this.index + 4 + firstText := "\\u" + Unicode.hex4(firstUnit) + + if Unicode.isHighSurrogate(firstUnit) { + if this.index + 5 >= this.source.length { + ~> (JsonValue.makeError("missing low surrogate at index " + this.index)) + } + if any[this.source[this.index] != "\\", this.source[this.index + 1] != "u"] { + ~> (JsonValue.makeError("expected low surrogate escape at index " + this.index)) + } + this.index = this.index + 2 + secondUnit := this.parseHex4At(this.index) + if any[secondUnit < 0, !Unicode.isLowSurrogate(secondUnit)] { + ~> (JsonValue.makeError("invalid low surrogate at index " + this.index)) + } + this.index = this.index + 4 + outText = outText + firstText + "\\u" + Unicode.hex4(secondUnit) + continue + } + + if Unicode.isLowSurrogate(firstUnit) { + ~> (JsonValue.makeError("unexpected low surrogate at index " + (this.index - 4))) + } + + outText = outText + firstText + continue + } + + ~> (JsonValue.makeError("invalid escape sequence \\" + esc + " at index " + (this.index - 1))) + } + + outText = outText + ch + } + + ~> (JsonValue.makeError("unterminated text")) + } + + share parseNumber() :: JsonValue { + start := this.index + + if this.source[this.index] == "-" { + this.index = this.index + 1 + if this.index >= this.source.length { + ~> (JsonValue.makeError("invalid number at index " + start)) + } + } + + if this.source[this.index] == "0" { + this.index = this.index + 1 + } else { + if !Unicode.isDigit(this.source[this.index]) { + ~> (JsonValue.makeError("invalid number at index " + start)) + } + for i of this.index to this.source.length { + if this.index >= this.source.length { break } + if !Unicode.isDigit(this.source[this.index]) { break } + this.index = this.index + 1 + } + } + + if all[this.index < this.source.length, this.source[this.index] == "."] { + this.index = this.index + 1 + if any[this.index >= this.source.length, !Unicode.isDigit(this.source[this.index])] { + ~> (JsonValue.makeError("invalid number fraction at index " + start)) + } + for i of this.index to this.source.length { + if this.index >= this.source.length { break } + if !Unicode.isDigit(this.source[this.index]) { break } + this.index = this.index + 1 + } + } + + if all[this.index < this.source.length, any[this.source[this.index] == "e", this.source[this.index] == "E"]] { + this.index = this.index + 1 + if all[this.index < this.source.length, any[this.source[this.index] == "+", this.source[this.index] == "-"]] { + this.index = this.index + 1 + } + if any[this.index >= this.source.length, !Unicode.isDigit(this.source[this.index])] { + ~> (JsonValue.makeError("invalid number exponent at index " + start)) + } + for i of this.index to this.source.length { + if this.index >= this.source.length { break } + if !Unicode.isDigit(this.source[this.index]) { break } + this.index = this.index + 1 + } + } + + numberText := this.source[start to this.index - 1] + ~> (JsonValue.makeNumber(numberText)) + } + + share parseArray() :: JsonValue { + if this.source[this.index] != "[" { + ~> (JsonValue.makeError("expected [ at index " + this.index)) + } + + arr := JsonValue.makeArray([]) + this.index = this.index + 1 + this.skipWhitespace() + + if all[this.index < this.source.length, this.source[this.index] == "]"] { + this.index = this.index + 1 + ~> (arr) + } + + for n of 0 to this.source.length { + value := this.parseValue() + if value.isError() { ~> (value) } + arr.add(value) + + this.skipWhitespace() + if this.index >= this.source.length { + ~> (JsonValue.makeError("unterminated array")) + } + + ch := this.source[this.index] + if ch == "," { + this.index = this.index + 1 + this.skipWhitespace() + if all[this.index < this.source.length, this.source[this.index] == "]"] { + ~> (JsonValue.makeError("trailing comma in array at index " + this.index)) + } + continue + } + + if ch == "]" { + this.index = this.index + 1 + ~> (arr) + } + + ~> (JsonValue.makeError("expected , or ] in array at index " + this.index)) + } + + ~> (JsonValue.makeError("unterminated array")) + } + + share parseObject() :: JsonValue { + if this.source[this.index] != "{" { + ~> (JsonValue.makeError("expected { at index " + this.index)) + } + + obj := JsonValue.makeObject([], []) + this.index = this.index + 1 + this.skipWhitespace() + + if all[this.index < this.source.length, this.source[this.index] == "}"] { + this.index = this.index + 1 + ~> (obj) + } + + for n of 0 to this.source.length { + this.skipWhitespace() + keyValue := this.parseText() + if keyValue.isError() { ~> (keyValue) } + key := keyValue.asText() + + this.skipWhitespace() + if any[this.index >= this.source.length, this.source[this.index] != ":"] { + ~> (JsonValue.makeError("expected : after object key at index " + this.index)) + } + this.index = this.index + 1 + + value := this.parseValue() + if value.isError() { ~> (value) } + obj.set(key, value) + + this.skipWhitespace() + if this.index >= this.source.length { + ~> (JsonValue.makeError("unterminated object")) + } + + ch := this.source[this.index] + if ch == "," { + this.index = this.index + 1 + this.skipWhitespace() + if all[this.index < this.source.length, this.source[this.index] == "}"] { + ~> (JsonValue.makeError("trailing comma in object at index " + this.index)) + } + continue + } + + if ch == "}" { + this.index = this.index + 1 + ~> (obj) + } + + ~> (JsonValue.makeError("expected , or } in object at index " + this.index)) + } + + ~> (JsonValue.makeError("unterminated object")) + } + + share skipWhitespace() { + for n of this.index to this.source.length { + if this.index >= this.source.length { return } + ch := this.source[this.index] + if any[ch == " ", ch == "\n", ch == "\r", ch == "\t"] { + this.index = this.index + 1 + continue + } + return + } + } + + share matchKeyword(word: text) :: bool { + if this.index + word.length > this.source.length { ~> (false) } + for i of 0 to word.length - 1 { + if this.source[this.index + i] != word[i] { + ~> (false) + } + } + ~> (true) + } + + share parseHex4At(start: int) :: int { + if start + 3 >= this.source.length { ~> (-1) } + value: int = 0 + for j of 0 to 3 { + digit := Unicode.hexValue(this.source[start + j]) + if digit < 0 { ~> (-1) } + value = value * 16 + digit + } + ~> (value) + } + +} diff --git a/src/main/cod/std/unicode/Unicode.cod b/src/main/cod/std/unicode/Unicode.cod new file mode 100644 index 00000000..f9f92a31 --- /dev/null +++ b/src/main/cod/std/unicode/Unicode.cod @@ -0,0 +1,132 @@ +unit unicode + +share Unicode { + share isDigit(ch: text) :: bool { + ~> (any[ch == "0", ch == "1", ch == "2", ch == "3", ch == "4", ch == "5", ch == "6", ch == "7", ch == "8", ch == "9"]) + } + + share isHexDigit(ch: text) :: bool { + ~> (any[ + Unicode.isDigit(ch), + any[ch == "a", ch == "b", ch == "c", ch == "d", ch == "e", ch == "f"], + any[ch == "A", ch == "B", ch == "C", ch == "D", ch == "E", ch == "F"] + ]) + } + + share normalizeEscaped(raw: text) :: text { + outText := "" + idx: int = 0 + for i of 0 to raw.length { + if idx >= raw.length { break } + ch := raw[idx] + if ch != "\\" { + outText = outText + ch + idx = idx + 1 + continue + } + + if idx + 1 >= raw.length { + ~> ("") + } + + if raw[idx + 1] != "u" { + outText = outText + ch + raw[idx + 1] + idx = idx + 2 + continue + } + + if idx + 5 >= raw.length { ~> ("") } + first := Unicode.parseHex4(raw, idx + 2) + if first < 0 { ~> ("") } + firstText := "\\u" + Unicode.hex4(first) + idx = idx + 6 + + if Unicode.isHighSurrogate(first) { + if idx + 5 >= raw.length { ~> ("") } + if any[raw[idx] != "\\", raw[idx + 1] != "u"] { ~> ("") } + second := Unicode.parseHex4(raw, idx + 2) + if any[second < 0, !Unicode.isLowSurrogate(second)] { ~> ("") } + outText = outText + firstText + "\\u" + Unicode.hex4(second) + idx = idx + 6 + continue + } + + if Unicode.isLowSurrogate(first) { ~> ("") } + outText = outText + firstText + } + ~> (outText) + } + + share isValidEscaped(raw: text) :: bool { + ~> (!Unicode.normalizeEscaped(raw).isEmpty() || raw.isEmpty()) + } + + share parseHex4(raw: text, start: int) :: int { + if start + 3 >= raw.length { ~> (-1) } + value: int = 0 + for j of 0 to 3 { + digit := Unicode.hexValue(raw[start + j]) + if digit < 0 { ~> (-1) } + value = value * 16 + digit + } + ~> (value) + } + + share hexValue(ch: text) :: int { + if ch == "0" { ~> (0) } + if ch == "1" { ~> (1) } + if ch == "2" { ~> (2) } + if ch == "3" { ~> (3) } + if ch == "4" { ~> (4) } + if ch == "5" { ~> (5) } + if ch == "6" { ~> (6) } + if ch == "7" { ~> (7) } + if ch == "8" { ~> (8) } + if ch == "9" { ~> (9) } + if any[ch == "a", ch == "A"] { ~> (10) } + if any[ch == "b", ch == "B"] { ~> (11) } + if any[ch == "c", ch == "C"] { ~> (12) } + if any[ch == "d", ch == "D"] { ~> (13) } + if any[ch == "e", ch == "E"] { ~> (14) } + if any[ch == "f", ch == "F"] { ~> (15) } + ~> (-1) + } + + share hex4(value: int) :: text { + x: int = value + if x < 0 { x = 0 } + if x > 65535 { x = x % 65536 } + d0 := (x / 4096) % 16 + d1 := (x / 256) % 16 + d2 := (x / 16) % 16 + d3 := x % 16 + ~> (Unicode.hexDigit(d0) + Unicode.hexDigit(d1) + Unicode.hexDigit(d2) + Unicode.hexDigit(d3)) + } + + share hexDigit(value: int) :: text { + if value == 0 { ~> ("0") } + if value == 1 { ~> ("1") } + if value == 2 { ~> ("2") } + if value == 3 { ~> ("3") } + if value == 4 { ~> ("4") } + if value == 5 { ~> ("5") } + if value == 6 { ~> ("6") } + if value == 7 { ~> ("7") } + if value == 8 { ~> ("8") } + if value == 9 { ~> ("9") } + if value == 10 { ~> ("A") } + if value == 11 { ~> ("B") } + if value == 12 { ~> ("C") } + if value == 13 { ~> ("D") } + if value == 14 { ~> ("E") } + ~> ("F") + } + + share isHighSurrogate(unit: int) :: bool { + ~> (all[unit >= 55296, unit <= 56319]) + } + + share isLowSurrogate(unit: int) :: bool { + ~> (all[unit >= 56320, unit <= 57343]) + } +} diff --git a/src/main/java/cod/interpreter/registry/LiteralRegistry.java b/src/main/java/cod/interpreter/registry/LiteralRegistry.java index dcfada66..eab75733 100644 --- a/src/main/java/cod/interpreter/registry/LiteralRegistry.java +++ b/src/main/java/cod/interpreter/registry/LiteralRegistry.java @@ -269,6 +269,7 @@ public Object handle(Object literal, List arguments, ExecutionContext ct }, String.class ); + // Future definitions: // define("isEmpty", isEmptyHandler, String.class, List.class, NaturalArray.class); @@ -625,6 +626,7 @@ private Object handleStringIsUpper(Object literal, List arguments) { } return seenLetter; } + @SuppressWarnings("unchecked") private List asConcreteList(Object literal) {