From fc1e56fff89b06242eb2e2a67d9ab444ad062560 Mon Sep 17 00:00:00 2001 From: CodeAnarchist Date: Wed, 10 Dec 2025 01:48:26 +0100 Subject: [PATCH 1/6] [DATA] json --- shared/data/json.c | 732 +++++++++++++++++++++++++++++++++++++++++++++ shared/data/json.h | 99 ++++++ 2 files changed, 831 insertions(+) create mode 100644 shared/data/json.c create mode 100644 shared/data/json.h diff --git a/shared/data/json.c b/shared/data/json.c new file mode 100644 index 00000000..c43b579a --- /dev/null +++ b/shared/data/json.c @@ -0,0 +1,732 @@ +#include "json.h" +#include "std/std.h" +#include "syscalls/syscalls.h" + +JsonError parse_value(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out); + +static void json_skip_whitespace(const char *buf, uint32_t len, uint32_t *pos) { + while (*pos < len) { + char c = buf[*pos]; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') (*pos)++; + else break; + } +} + +JsonError parse_string(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out) { + if (!(*pos < len && buf[*pos] == '"')) return JSON_ERR_INVALID; + (*pos)++; + string s = string_repeat('\0', 0); + + while (*pos < len) { + char c = buf[(*pos)++]; + if (c == '"') { + JsonValue *v = malloc(sizeof(JsonValue)); + if (!v) { + free_sized(s.data, s.mem_length); + return JSON_ERR_OOM; + } + v->kind = JSON_STRING; + v->u.string = s; + *out = v; + return JSON_OK; + } + if (c == '\\') { + if (*pos >= len) { + free_sized(s.data, s.mem_length); + return JSON_ERR_INVALID; + } + char e = buf[(*pos)++]; + char r = e; + if (e == 'b') r = '\b'; + else if (e == 'f') r = '\f'; + else if (e == 'n') r = '\n'; + else if (e == 'r') r = '\r'; + else if (e == 't') r = '\t'; + else if (!(e == '"' || e == '\\' || e == '/')) { + free_sized(s.data, s.mem_length); + return JSON_ERR_INVALID; + } + string_append_bytes(&s, &r, 1); + continue; + } + string_append_bytes(&s, &c, 1); + } + + free_sized(s.data, s.mem_length); + return JSON_ERR_INVALID; +} + +JsonError parse_number(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out) { + uint32_t start = *pos; + bool neg = false; + + if (*pos < len && buf[*pos] == '-') { + neg = true; + (*pos)++; + } + + if (!(*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9')) return JSON_ERR_INVALID; + while (*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9') (*pos)++; + + bool has_frac = false; + uint32_t frac_start = 0; + + if (*pos < len && buf[*pos] == '.') { + has_frac = true; + (*pos)++; + if (!(*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9')) return JSON_ERR_INVALID; + frac_start = *pos; + while (*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9') (*pos)++; + } + + bool has_exp = false; + bool exp_neg = false; + int exp_val = 0; + + if (*pos < len && (buf[*pos] == 'e' || buf[*pos] == 'E')) { + has_exp = true; + (*pos)++; + if (*pos < len && (buf[*pos] == '+' || buf[*pos] == '-')) { + if (buf[*pos] == '-') exp_neg = true; + (*pos)++; + } + if (!(*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9')) return JSON_ERR_INVALID; + while (*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9') { + exp_val = exp_val * 10 + (buf[*pos] - '0'); + (*pos)++; + } + if (exp_neg) exp_val = -exp_val; + } + + uint32_t end = *pos; + if (end <= start) return JSON_ERR_INVALID; + + if (!has_frac && !has_exp) { + int64_t x = 0; + uint32_t i = start + (neg ? 1u : 0u); + for (; i < end; i++) x = x * 10 + (buf[i] - '0'); + if (neg) x = -x; + + JsonValue *v = malloc(sizeof(JsonValue)); + if (!v) return JSON_ERR_OOM; + v->kind = JSON_INT; + v->u.integer = x; + *out = v; + return JSON_OK; + } + + double ip = 0.0; + uint32_t i = start + (neg ? 1u : 0u); + for (; i < end; i++) { + char c = buf[i]; + if (c == '.' || c == 'e' || c == 'E') break; + ip = ip * 10.0 + (double)(c - '0'); + } + + double fp = 0.0; + if (has_frac) { + double base = 0.1; + for (i = frac_start; i < end; i++) { + char c = buf[i]; + if (c == 'e' || c == 'E') break; + fp += (double)(c - '0') * base; + base *= 0.1; + } + } + + double val = ip + fp; + if (neg) val = -val; + + if (has_exp) { + double y = 1.0; + if (exp_val > 0) while (exp_val--) y *= 10.0; + else while (exp_val++) y /= 10.0; + val *= y; + } + + JsonValue *v = malloc(sizeof(JsonValue)); + if (!v) return JSON_ERR_OOM; + v->kind = JSON_DOUBLE; + v->u.real = val; + *out = v; + return JSON_OK; +} + +JsonError parse_array(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out) { + if (!(*pos < len && buf[*pos] == '[')) return JSON_ERR_INVALID; + (*pos)++; + json_skip_whitespace(buf, len, pos); + + JsonValue *arr = malloc(sizeof(JsonValue)); + if (!arr) return JSON_ERR_OOM; + arr->kind = JSON_ARRAY; + arr->u.array.items = 0; + arr->u.array.count = 0; + + if (*pos < len && buf[*pos] == ']') { + (*pos)++; + *out = arr; + return JSON_OK; + } + + for (;;) { + JsonValue *elem = 0; + JsonError e = parse_value(buf, len, pos, &elem); + if (e != JSON_OK) { + json_free(arr); + return e; + } + + uint32_t n = arr->u.array.count; + JsonValue **tmp = malloc((n + 1) * sizeof(JsonValue *)); + if (!tmp) { + json_free(elem); + json_free(arr); + return JSON_ERR_OOM; + } + + for (uint32_t i = 0; i < n; i++) tmp[i] = arr->u.array.items[i]; + tmp[n] = elem; + + if (arr->u.array.items) free_sized(arr->u.array.items, n * sizeof(JsonValue *)); + arr->u.array.items = tmp; + arr->u.array.count = n + 1; + + json_skip_whitespace(buf, len, pos); + + if (*pos < len && buf[*pos] == ']') { + (*pos)++; + break; + } + + if (!(*pos < len && buf[*pos] == ',')) { + json_free(arr); + return JSON_ERR_INVALID; + } + + (*pos)++; + json_skip_whitespace(buf, len, pos); + } + + *out = arr; + return JSON_OK; +} + +JsonError parse_object(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out) { + if (!(*pos < len && buf[*pos] == '{')) return JSON_ERR_INVALID; + (*pos)++; + json_skip_whitespace(buf, len, pos); + + JsonValue *obj = malloc(sizeof(JsonValue)); + if (!obj) return JSON_ERR_OOM; + obj->kind = JSON_OBJECT; + obj->u.object.pairs = 0; + obj->u.object.count = 0; + + if (*pos < len && buf[*pos] == '}') { + (*pos)++; + *out = obj; + return JSON_OK; + } + + for (;;) { + JsonValue *ks = 0; + JsonError e = parse_string(buf, len, pos, &ks); + if (e != JSON_OK) { + if (ks) json_free(ks); + json_free(obj); + return JSON_ERR_INVALID; + } + + string key = ks->u.string; + free_sized(ks, sizeof(JsonValue)); + + json_skip_whitespace(buf, len, pos); + + if (!(*pos < len && buf[*pos] == ':')) { + free_sized(key.data, key.mem_length); + json_free(obj); + return JSON_ERR_INVALID; + } + + (*pos)++; + json_skip_whitespace(buf, len, pos); + + JsonValue *val = 0; + e = parse_value(buf, len, pos, &val); + if (e != JSON_OK) { + free_sized(key.data, key.mem_length); + json_free(obj); + return e; + } + + uint32_t n = obj->u.object.count; + JsonPair *tmp = malloc((n + 1) * sizeof(JsonPair)); + if (!tmp) { + free_sized(key.data, key.mem_length); + json_free(val); + json_free(obj); + return JSON_ERR_OOM; + } + + for (uint32_t i = 0; i < n; i++) tmp[i] = obj->u.object.pairs[i]; + tmp[n].key = key; + tmp[n].value = val; + + if (obj->u.object.pairs) free_sized(obj->u.object.pairs, n * sizeof(JsonPair)); + obj->u.object.pairs = tmp; + obj->u.object.count = n + 1; + + json_skip_whitespace(buf, len, pos); + + if (*pos < len && buf[*pos] == '}') { + (*pos)++; + break; + } + + if (!(*pos < len && buf[*pos] == ',')) { + json_free(obj); + return JSON_ERR_INVALID; + } + + (*pos)++; + json_skip_whitespace(buf, len, pos); + } + + *out = obj; + return JSON_OK; +} + +JsonError parse_value(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out) { + json_skip_whitespace(buf, len, pos); + if (*pos >= len) return JSON_ERR_INVALID; + + char c = buf[*pos]; + + if (c == '"') return parse_string(buf, len, pos, out); + if (c == '{') return parse_object(buf, len, pos, out); + if (c == '[') return parse_array(buf, len, pos, out); + if (c == '-' || (c >= '0' && c <= '9')) return parse_number(buf, len, pos, out); + + if (c == 't' && *pos + 4 <= len && + buf[*pos] == 't' && buf[*pos+1] == 'r' && buf[*pos+2] == 'u' && buf[*pos+3] == 'e') { + *pos += 4; + JsonValue *v = malloc(sizeof(JsonValue)); + if (!v) return JSON_ERR_OOM; + v->kind = JSON_BOOL; + v->u.boolean = true; + *out = v; + return JSON_OK; + } + + if (c == 'f' && *pos + 5 <= len && + buf[*pos] == 'f' && buf[*pos+1] == 'a' && buf[*pos+2] == 'l' && buf[*pos+3] == 's' && buf[*pos+4] == 'e') { + *pos += 5; + JsonValue *v = malloc(sizeof(JsonValue)); + if (!v) return JSON_ERR_OOM; + v->kind = JSON_BOOL; + v->u.boolean = false; + *out = v; + return JSON_OK; + } + + if (c == 'n' && *pos + 4 <= len && + buf[*pos] == 'n' && buf[*pos+1] == 'u' && buf[*pos+2] == 'l' && buf[*pos+3] == 'l') { + *pos += 4; + JsonValue *v = malloc(sizeof(JsonValue)); + if (!v) return JSON_ERR_OOM; + v->kind = JSON_NULL; + *out = v; + return JSON_OK; + } + + return JSON_ERR_INVALID; +} + +JsonError json_parse(const char *buf, uint32_t len, JsonValue **out) { + uint32_t pos = 0; + JsonError e = parse_value(buf, len, &pos, out); + if (e != JSON_OK) return e; + json_skip_whitespace(buf, len, &pos); + if (pos != len) { + json_free(*out); + return JSON_ERR_INVALID; + } + return JSON_OK; +} + +void json_free(JsonValue *v) { + if (!v) return; + + if (v->kind == JSON_STRING) { + free_sized(v->u.string.data, v->u.string.mem_length); + } + + else if (v->kind == JSON_ARRAY) { + for (uint32_t i = 0; i < v->u.array.count; i++) json_free(v->u.array.items[i]); + if (v->u.array.items) + free_sized(v->u.array.items, v->u.array.count * sizeof(JsonValue *)); + } + + else if (v->kind == JSON_OBJECT) { + for (uint32_t i = 0; i < v->u.object.count; i++) { + free_sized(v->u.object.pairs[i].key.data, v->u.object.pairs[i].key.mem_length); + json_free(v->u.object.pairs[i].value); + } + if (v->u.object.pairs) + free_sized(v->u.object.pairs, v->u.object.count * sizeof(JsonPair)); + } + + free_sized(v, sizeof(JsonValue)); +} + +bool json_get_bool(const JsonValue *v, bool *out) { + if (!v || v->kind != JSON_BOOL) return false; + *out = v->u.boolean; + return true; +} + +bool json_get_int(const JsonValue *v, int64_t *out) { + if (!v || v->kind != JSON_INT) return false; + *out = v->u.integer; + return true; +} + +bool json_get_double(const JsonValue *v, double *out) { + if (!v || v->kind != JSON_DOUBLE) return false; + *out = v->u.real; + return true; +} + +bool json_get_string(const JsonValue *v, string *out) { + if (!v || v->kind != JSON_STRING) return false; + *out = v->u.string; + return true; +} + +bool json_get_number_as_double(const JsonValue *v, double *out) { + if (!v) return false; + if (v->kind == JSON_DOUBLE) { + *out = v->u.real; + return true; + } + if (v->kind == JSON_INT) { + *out = (double)v->u.integer; + return true; + } + return false; +} + +uint32_t json_array_size(const JsonValue *v) { + if (!v || v->kind != JSON_ARRAY) return 0; + return v->u.array.count; +} + +JsonValue *json_array_get(const JsonValue *v, uint32_t index) { + if (!v || v->kind != JSON_ARRAY) return 0; + if (index >= v->u.array.count) return 0; + return v->u.array.items[index]; +} + +JsonValue *json_obj_get(const JsonValue *obj, const char *key) { + if (!obj || obj->kind != JSON_OBJECT) return 0; + for (uint32_t i = 0; i < obj->u.object.count; i++) { + if (strcmp(obj->u.object.pairs[i].key.data, key) == 0) + return obj->u.object.pairs[i].value; + } + return 0; +} + +bool json_obj_get_bool(const JsonValue *obj, const char *key, bool *out) { + return json_get_bool(json_obj_get(obj, key), out); +} + +bool json_obj_get_int(const JsonValue *obj, const char *key, int64_t *out) { + return json_get_int(json_obj_get(obj, key), out); +} + +bool json_obj_get_double(const JsonValue *obj, const char *key, double *out) { + return json_get_double(json_obj_get(obj, key), out); +} + +bool json_obj_get_string(const JsonValue *obj, const char *key, string *out) { + return json_get_string(json_obj_get(obj, key), out); +} + +JsonValue *json_new_null() { + JsonValue *x = malloc(sizeof(JsonValue)); + if (!x) return 0; + x->kind = JSON_NULL; + return x; +} + +JsonValue *json_new_bool(bool v) { + JsonValue *x = malloc(sizeof(JsonValue)); + if (!x) return 0; + x->kind = JSON_BOOL; + x->u.boolean = v; + return x; +} + +JsonValue *json_new_int(int64_t v) { + JsonValue *x = malloc(sizeof(JsonValue)); + if (!x) return 0; + x->kind = JSON_INT; + x->u.integer = v; + return x; +} + +JsonValue *json_new_double(double v) { + JsonValue *x = malloc(sizeof(JsonValue)); + if (!x) return 0; + x->kind = JSON_DOUBLE; + x->u.real = v; + return x; +} + +JsonValue *json_new_string(const char *data, uint32_t len) { + JsonValue *x = malloc(sizeof(JsonValue)); + if (!x) return 0; + x->kind = JSON_STRING; + x->u.string = string_from_literal_length((char *)data, len); + return x; +} + +JsonValue *json_new_array() { + JsonValue *x = malloc(sizeof(JsonValue)); + if (!x) return 0; + x->kind = JSON_ARRAY; + x->u.array.items = 0; + x->u.array.count = 0; + return x; +} + +JsonValue *json_new_object() { + JsonValue *x = malloc(sizeof(JsonValue)); + if (!x) return 0; + x->kind = JSON_OBJECT; + x->u.object.pairs = 0; + x->u.object.count = 0; + return x; +} + +bool json_array_push(JsonValue *arr, JsonValue *elem) { + if (!arr || arr->kind != JSON_ARRAY) return false; + uint32_t n = arr->u.array.count; + JsonValue **tmp = malloc((n + 1) * sizeof(JsonValue *)); + if (!tmp) return false; + for (uint32_t i = 0; i < n; i++) tmp[i] = arr->u.array.items[i]; + tmp[n] = elem; + if (arr->u.array.items) free_sized(arr->u.array.items, n * sizeof(JsonValue *)); + arr->u.array.items = tmp; + arr-> u.array.count = n + 1; + return true; +} + +bool json_obj_set(JsonValue *obj, const char *key, JsonValue *value) { + if (!obj || obj->kind != JSON_OBJECT) return false; + + uint32_t klen = strlen(key); + + for (uint32_t i = 0; i < obj->u.object.count; i++) { + if (strcmp(obj->u.object.pairs[i].key.data, key) == 0) { + free_sized(obj->u.object.pairs[i].key.data, obj->u.object.pairs[i].key.mem_length); + obj->u.object.pairs[i].key = string_from_literal_length((char *)key, klen); + json_free(obj->u.object.pairs[i].value); + obj->u.object.pairs[i ].value = value; + return true; + } + } + + string sk = string_from_literal_length((char *)key, klen); + uint32_t n = obj->u.object.count; + + JsonPair *tmp = malloc((n + 1) * sizeof(JsonPair)); + if (!tmp) { + free_sized(sk.data, sk.mem_length); + return false; + } + + for (uint32_t i = 0; i < n; i++) tmp[i] = obj->u.object.pairs[i]; + tmp[n].key = sk; + tmp[n].value = value; + + if (obj-> u.object.pairs) free_sized(obj->u.object.pairs, n * sizeof(JsonPair)); + obj->u.object.pairs = tmp; + obj->u.object.count = n + 1; + + return true; +} + +JsonValue *json_clone(const JsonValue *src) { + if (!src) return 0; + + if (src->kind == JSON_NULL) return json_new_null(); + if (src->kind == JSON_BOOL) return json_new_bool(src->u.boolean); + if (src->kind == JSON_INT) return json_new_int(src->u.integer); + if (src->kind == JSON_DOUBLE) return json_new_double(src->u.real); + if (src->kind == JSON_STRING) return json_new_string(src->u.string.data, src->u.string.length); + + if (src->kind == JSON_ARRAY) { + JsonValue *a = json_new_array(); + if (!a) return 0; + for (uint32_t i = 0; i < src->u.array.count; i++) { + JsonValue *c = json_clone(src->u.array.items[i]); + if (!c) { + json_free(a); + return 0; + } + json_array_push(a, c); + } + return a; + } + + if (src->kind == JSON_OBJECT) { + JsonValue *o = json_new_object(); + if (!o) return 0; + for (uint32_t i = 0; i < src->u.object.count; i++) { + JsonPair *p = &src->u.object.pairs[i]; + JsonValue *c = json_clone(p->value); + if (!c) { + json_free(o); + return 0; + } + json_obj_set(o, p->key.data, c); + } + return o; + } + + return 0; +} + +void serialize_string(const string *s, string *out) { + string_append_bytes(out, "\"", 1); + for (uint32_t i = 0; i < s->length; i++) { + char c = s->data[i]; + if (c == '"' || c == '\\') { + char b[2] = {'\\', c}; + string_append_bytes(out, b, 2); + } else if (c == '\b') string_append_bytes(out, "\\b", 2); + else if (c == '\f') string_append_bytes(out, "\\f", 2); + else if (c == '\n') string_append_bytes(out, "\\n", 2); + else if (c == '\r') string_append_bytes(out, "\\r", 2); + else if (c == '\t') string_append_bytes(out, "\\t", 2); + else string_append_bytes(out, &c, 1); + } + string_append_bytes(out, "\"", 1); +} + +void serialize_value(const JsonValue *v, string *out, uint32_t indent, uint32_t level); + +void serialize_array(const JsonValue *v, string *out, uint32_t indent, uint32_t level) { + string_append_bytes(out, "[", 1); + + uint32_t n = v->u.array.count; + if (n == 0) { + string_append_bytes(out, "]", 1); + return; + } + + if (indent) string_append_bytes(out, "\n", 1); + + for (uint32_t i = 0; i < n; i++) { + if (indent) { + for (uint32_t k = 0; k < (level + 1) * indent; k++) string_append_bytes(out, " ", 1); + } + + serialize_value(v->u.array.items[i], out, indent, level + 1); + + if (i + 1 < n) string_append_bytes(out, ",", 1); + if (indent) string_append_bytes(out, "\n", 1); + } + + if (indent) { + for (uint32_t k = 0; k < level * indent; k++) string_append_bytes(out, " ", 1); + } + + string_append_bytes(out, "]", 1); +} + +void serialize_object(const JsonValue *v, string *out, uint32_t indent, uint32_t level) { + string_append_bytes(out, "{", 1); + + uint32_t n = v->u.object.count; + if (n == 0) { + string_append_bytes(out, "}", 1); + return; + } + + if (indent) string_append_bytes(out, "\n", 1); + + for (uint32_t i = 0; i < n; i++) { + JsonPair *p = &v->u.object.pairs[i]; + + if (indent) { + for (uint32_t k = 0; k < (level + 1) * indent; k++) string_append_bytes(out, " ", 1); + } + + serialize_string(&p->key, out); + string_append_bytes(out, ":", 1); + if (indent) string_append_bytes(out, " ", 1); + + serialize_value(p->value, out, indent, level + 1); + + if (i + 1 < n) string_append_bytes(out, ",", 1); + if (indent) string_append_bytes(out, "\n", 1); + } + + if (indent) { + for (uint32_t k = 0; k < level * indent; k++) string_append_bytes(out, " ", 1); + } + + string_append_bytes(out, "}", 1); +} + +void serialize_value(const JsonValue *v, string *out, uint32_t indent, uint32_t level) { + if (v->kind == JSON_NULL) { + string_append_bytes(out, "null", 4); + return; + } + + if (v->kind == JSON_BOOL) { + if (v->u.boolean) string_append_bytes(out, "true", 4); + else string_append_bytes(out, "false", 5); + return; + } + + if (v->kind == JSON_INT) { + string s = string_format("%lli", (long long)v->u.integer); + string_append_bytes(out, s.data, s.length); + free_sized(s.data, s.mem_length); + return; + } + + if (v->kind == JSON_DOUBLE) { + string s = string_format("%.17g", v->u.real); + string_append_bytes(out, s.data, s.length); + free_sized(s.data, s.mem_length); + return; + } + + if (v->kind == JSON_STRING) { + serialize_string(&v->u.string, out); + return; + } + + if (v->kind == JSON_ARRAY) { + serialize_array(v, out, indent, level); + return; + } + + if (v->kind == JSON_OBJECT) { + serialize_object(v, out, indent, level); + return; + } +} + +JsonError json_serialize(const JsonValue *value, string *out, uint32_t indent) { + if (!value || !out) return JSON_ERR_INVALID; + *out = string_repeat('\0', 0); + serialize_value(value, out, indent, 0); + return JSON_OK; +} \ No newline at end of file diff --git a/shared/data/json.h b/shared/data/json.h new file mode 100644 index 00000000..37aecafa --- /dev/null +++ b/shared/data/json.h @@ -0,0 +1,99 @@ +#pragma once + +#include "types.h" +#include "std/string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + JSON_NULL = 0, + JSON_BOOL, + JSON_INT, + JSON_DOUBLE, + JSON_STRING, + JSON_ARRAY, + JSON_OBJECT +} JsonKind; + +struct JsonValue; + +typedef struct { + uint32_t count; + struct JsonValue **items; +} JsonArray; + +typedef struct { + string key; + struct JsonValue *value; +} JsonPair; + +typedef struct { + uint32_t count; + JsonPair *pairs; +} JsonObject; + +typedef struct JsonValue { + JsonKind kind; + union { + bool boolean; + int64_t integer; + double real; + string string; + JsonArray array; + JsonObject object; + } u; +} JsonValue; + +typedef enum { + JSON_OK = 0, + JSON_ERR_INVALID, + JSON_ERR_OOM, + JSON_ERR_TYPE +} JsonError; + +JsonError json_parse(const char *buf, uint32_t len, JsonValue **out); +JsonError json_serialize(const JsonValue *value, string *out, uint32_t indent); +void json_free(JsonValue *v); + +static inline bool json_is_null(const JsonValue *v) { return v && v->kind == JSON_NULL; } +static inline bool json_is_bool(const JsonValue *v) { return v && v->kind == JSON_BOOL; } +static inline bool json_is_int(const JsonValue *v) { return v && v->kind == JSON_INT; } +static inline bool json_is_double(const JsonValue *v) { return v && v->kind == JSON_DOUBLE; } +static inline bool json_is_number(const JsonValue *v) { return v && (v->kind == JSON_INT || v->kind == JSON_DOUBLE); } +static inline bool json_is_string(const JsonValue *v) { return v && v->kind == JSON_STRING; } +static inline bool json_is_array(const JsonValue *v) { return v && v->kind == JSON_ARRAY; } +static inline bool json_is_object(const JsonValue *v) { return v && v->kind == JSON_OBJECT; } + +bool json_get_bool(const JsonValue *v, bool *out); +bool json_get_int(const JsonValue *v, int64_t *out); +bool json_get_double(const JsonValue *v, double *out); +bool json_get_string(const JsonValue *v, string *out); +bool json_get_number_as_double(const JsonValue *v, double *out); + +uint32_t json_array_size(const JsonValue *v); +JsonValue *json_array_get(const JsonValue *v, uint32_t index); + +JsonValue *json_obj_get(const JsonValue *obj, const char *key); +bool json_obj_get_bool(const JsonValue *obj, const char *key, bool *out); +bool json_obj_get_int(const JsonValue *obj, const char *key, int64_t *out); +bool json_obj_get_double(const JsonValue *obj, const char *key, double *out); +bool json_obj_get_string(const JsonValue *obj, const char *key, string *out); + +JsonValue *json_new_null(); +JsonValue *json_new_bool(bool v); +JsonValue *json_new_int(int64_t v); +JsonValue *json_new_double(double v); +JsonValue *json_new_string(const char *data, uint32_t len); +JsonValue *json_new_array(); +JsonValue *json_new_object(); + +bool json_array_push(JsonValue *arr, JsonValue *elem); +bool json_obj_set(JsonValue *obj, const char *key, JsonValue *value); + +JsonValue *json_clone(const JsonValue *src); + +#ifdef __cplusplus +} +#endif From 3ba8e55d71b4b5b59efcf7b6e57b6f339bab3182 Mon Sep 17 00:00:00 2001 From: CodeAnarchist Date: Wed, 10 Dec 2025 19:01:48 +0100 Subject: [PATCH 2/6] [DATA] scanner, parser stub --- shared/data/parser/parser.c | 321 ++++++++++++++++++++++++++++++++++ shared/data/parser/parser.h | 68 +++++++ shared/data/scanner/scanner.c | 236 +++++++++++++++++++++++++ shared/data/scanner/scanner.h | 27 +++ shared/std/string.c | 2 +- shared/std/string.h | 1 + 6 files changed, 654 insertions(+), 1 deletion(-) create mode 100644 shared/data/parser/parser.c create mode 100644 shared/data/parser/parser.h create mode 100644 shared/data/scanner/scanner.c create mode 100644 shared/data/scanner/scanner.h diff --git a/shared/data/parser/parser.c b/shared/data/parser/parser.c new file mode 100644 index 00000000..c736b3c2 --- /dev/null +++ b/shared/data/parser/parser.c @@ -0,0 +1,321 @@ +#include "data/parser/parser.h" +#include "std/string.h" + +static bool is_alpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static bool is_alnum(char c) { + return is_alpha(c) || (c >= '0' && c <= '9'); +} + +static bool parse_number_double_token(const char *buf, uint32_t len, double *out) { + if (!buf || !len) return false; + + uint32_t pos = 0; + bool neg = false; + + if (buf[pos] == '-') { + neg = true; + pos++; + if (pos >= len) return false; + } + + if (buf[pos] < '0' || buf[pos] > '9') return false; + + double ip = 0.0; + while (pos < len) { + char c = buf[pos]; + if (c < '0' || c > '9') break; + ip = ip * 10.0 + (double)(c - '0'); + pos++; + } + + double fp = 0.0; + if (pos < len && buf[pos] == '.') { + pos++; + if (pos >= len) return false; + if (buf[pos] < '0' || buf[pos] > '9') return false; + double base = 0.1; + while (pos < len) { + char c = buf[pos]; + if (c < '0' || c > '9') break; + fp += (double)(c - '0') * base; + base *= 0.1; + pos++; + } + } + + int exp_val = 0; + if (pos < len && (buf[pos] == 'e' || buf[pos] == 'E')) { + pos++; + if (pos >= len) return false; + + bool exp_neg = false; + if (buf[pos] == '+' || buf[pos] == '-') { + if (buf[pos] == '-') exp_neg = true; + pos++; + if (pos >= len) return false; + } + if (buf[pos] < '0' || buf[pos] > '9') return false; + + while (pos < len) { + char c = buf[pos]; + if (c < '0' || c > '9') break; + exp_val = exp_val * 10 + (c - '0'); + pos++; + } + if (exp_neg) exp_val = -exp_val; + } + + if (pos != len) return false; + + double val = ip + fp; + if (neg) val = -val; + + if (exp_val != 0) { + double y = 1.0; + if (exp_val > 0) { + while (exp_val--) y *= 10.0; + } else { + while (exp_val++) y /= 10.0; + } + val *= y; + } + + *out = val; + return true; +} + +ParserMark parser_mark(Parser *p) { + ParserMark m; + m.pos = p->s->pos; + return m; +} + +void parser_reset(Parser *p, ParserMark m) { + p->s->pos = m.pos; + p->failed = false; + p->err = PARSER_ERR_NONE; + p->err_msg = 0; + p->err_pos = m.pos; +} + +void parser_fail(Parser *p, ParserError err, const char *msg) { + if (p->failed) return; + + p->failed = true; + p->err = err; + p->err_msg = msg; + p->err_pos = p->s->pos; +} + +char parser_peek(Parser *p) { + return scan_peek(p->s); +} + +char parser_next(Parser *p) { + return scan_next(p->s); +} + +bool parser_eof(Parser *p) { + return scan_eof(p->s); +} + +void parser_skip_ws(Parser *p) { + scan_skip_ws(p->s); +} + +bool parser_expect_char(Parser *p, char c, const char *msg) { + if (p->failed) return false; + + if (!scan_match(p->s, c)) { + parser_fail(p, scan_eof(p->s) ? PARSER_ERR_EOF : PARSER_ERR_UNEXPECTED_CHAR, msg); + return false; + } + return true; +} + +bool parser_expect_string(Parser *p, const char *lit, const char *msg) { + if (p->failed) return false; + + if (!scan_match_string(p->s, lit)) { + parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, msg); + return false; + } + return true; +} + +bool parser_read_identifier(Parser *p, string *out) { + if (p->failed) return false; + + if (parser_eof(p)) { + parser_fail(p, PARSER_ERR_EOF, 0); + return false; + } + uint32_t start = p->s->pos; + char c = parser_peek(p); + if (!(is_alpha(c) || c == '_')) { + parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, 0); + return false; + } + parser_next(p); + while (!parser_eof(p)) { + char d = parser_peek(p); + if (!(is_alnum(d) || d == '_')) break; + parser_next(p); + } + uint32_t end = p->s->pos; + *out = string_from_literal_length(p->s->buf + start, end - start); + return true; +} + +bool parser_read_number_string(Parser *p, string *out) { + if (p->failed) return false; + + if (!scan_read_number_token(p->s, out)) { + parser_fail(p, PARSER_ERR_INVALID_NUMBER, 0); + return false; + } + return true; +} + +bool parser_read_quoted_string(Parser *p, string *out) { + if (p->failed) return false; + + if (!scan_read_string_token(p->s, out)) { + parser_fail(p, PARSER_ERR_INVALID_STRING, 0); + return false; + } + return true; +} + +bool parser_read_int64(Parser *p, int64_t *out) { + if (p->failed) return false; + + string tmp; + if (!parser_read_number_string(p, &tmp)) return false; + + bool ok = true; + for (uint32_t i = 0; i < tmp.length; i++) { + char c = tmp.data[i]; + if (i == 0 && c == '-') continue; + if (c < '0' || c > '9') { + ok = false; + break; + } + } + + if (!ok) { + string_free(tmp); + parser_fail(p, PARSER_ERR_INVALID_NUMBER, 0); + return false; + } + + int64_t v = parse_int64(tmp.data, tmp.length); + string_free(tmp); + *out = v; + return true; +} + +bool parser_read_uint64(Parser *p, uint64_t *out) { + if (p->failed) return false; + + string tmp; + if (!parser_read_number_string(p, &tmp)) return false; + + bool ok = true; + for (uint32_t i = 0; i < tmp.length; i++) { + char c = tmp.data[i]; + if (c < '0' || c > '9') { + ok = false; + break; + } + } + + if (!ok) { + string_free(tmp); + parser_fail(p, PARSER_ERR_INVALID_NUMBER, 0); + return false; + } + + uint64_t v = parse_int_u64(tmp.data, tmp.length); + string_free(tmp); + *out = v; + return true; +} + +bool parser_read_double(Parser *p, double *out) { + if (p->failed) return false; + + string tmp; + if (!parser_read_number_string(p, &tmp)) return false; + + double v = 0.0; + bool ok = parse_number_double_token(tmp.data, tmp.length, &v); + string_free(tmp); + + if (!ok) { + parser_fail(p, PARSER_ERR_INVALID_NUMBER, 0); + return false; + } + + *out = v; + return true; +} + +bool parser_span(Parser *p, ParserMark m, string *out) { + if (p->failed) return false; + + if (p->s->pos < m.pos) { + parser_fail(p, PARSER_ERR_GENERIC, 0); + return false; + } + uint32_t start = m.pos; + uint32_t end = p->s->pos; + *out = string_from_literal_length(p->s->buf + start, end - start); + return true; +} + +bool parser_read_until(Parser *p, char stop, string *out) { + if (p->failed) return false; + + if (!scan_read_until(p->s, stop, out)) { + parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, 0); + return false; + } + return true; +} + +bool parser_read_operator(Parser *p, string *out) { + if (p->failed) return false; + + if (!scan_read_operator(p->s, out)) { + parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, 0); + return false; + } + return true; +} + +bool parser_expect_operator(Parser *p, const char *op, const char *msg) { + + if (p->failed) return false; + + ParserMark m = parser_mark(p); + + string tmp; + if (!scan_read_operator(p->s, &tmp)) { + parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, msg); + return false; + } + + if (strcmp(tmp.data, op) != 0) { + parser_reset(p, m); + string_free(tmp); + parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, msg); + return false; + } + + string_free(tmp); + return true; +} \ No newline at end of file diff --git a/shared/data/parser/parser.h b/shared/data/parser/parser.h new file mode 100644 index 00000000..150ee4d4 --- /dev/null +++ b/shared/data/parser/parser.h @@ -0,0 +1,68 @@ +#pragma once + +#include "types.h" +#include "data/scanner/scanner.h" +#include "std/string.h" + +typedef enum { + PARSER_ERR_NONE = 0, + PARSER_ERR_GENERIC, + PARSER_ERR_EOF, + PARSER_ERR_UNEXPECTED_CHAR, + PARSER_ERR_UNEXPECTED_TOKEN, + PARSER_ERR_INVALID_NUMBER, + PARSER_ERR_INVALID_STRING +} ParserError; + +typedef struct { + Scanner *s; + bool failed; + uint32_t err_pos; + const char *err_msg; + ParserError err; +} Parser; + +typedef struct { + uint32_t pos; +} ParserMark; + +static inline Parser parser_make(Scanner *s) { + Parser p; + p.s = s; + p.failed = false; + p.err_pos = 0; + p.err_msg = 0; + p.err = PARSER_ERR_NONE; + return p; +} + +static inline bool parser_ok(const Parser *p) { + return !p->failed; +} + +ParserMark parser_mark(Parser *p); +void parser_reset(Parser *p, ParserMark m); + +void parser_fail(Parser *p, ParserError err, const char *msg); + +char parser_peek(Parser *p); +char parser_next(Parser *p); +bool parser_eof(Parser *p); + +void parser_skip_ws(Parser *p); + +bool parser_expect_char(Parser *p, char c, const char *msg); +bool parser_expect_string(Parser *p, const char *lit, const char *msg); + +bool parser_read_identifier(Parser *p, string *out); +bool parser_read_number_string(Parser *p, string *out); +bool parser_read_quoted_string(Parser *p, string *out); + +bool parser_read_int64(Parser *p, int64_t *out); +bool parser_read_uint64(Parser *p, uint64_t *out); +bool parser_read_double(Parser *p, double *out); + +bool parser_span(Parser *p, ParserMark m, string *out); +bool parser_read_until(Parser *p, char stop, string *out); +bool parser_read_operator(Parser *p, string *out); +bool parser_expect_operator(Parser *p, const char *op, const char *msg); diff --git a/shared/data/scanner/scanner.c b/shared/data/scanner/scanner.c new file mode 100644 index 00000000..9141227f --- /dev/null +++ b/shared/data/scanner/scanner.c @@ -0,0 +1,236 @@ +#include "scanner.h" +#include "std/string.h" + +static const char *ops3[] = {">>>", "<<=", ">>=", "===", 0}; + +static const char *ops2[] = {"==","!=", "<=", ">=", "&&", "||", "<<", ">>", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "::", "->", 0}; + +static const char ops1[] = "+-*/%<>=!&|^~?:;.,(){}[]"; + +bool scan_eof(Scanner *s) { + return s->pos >= s->len; +} + +char scan_peek(Scanner *s) { + if (s->pos >= s->len) return 0; + return s->buf[s->pos]; +} + +char scan_next(Scanner *s) { + if (s->pos >= s->len) return 0; + return s->buf[s->pos++]; +} + +bool scan_match(Scanner *s, char c) { + if (scan_eof(s)) return false; + if (s->buf[s->pos] != c) return false; + s->pos++; + return true; +} + +bool scan_match_string(Scanner *s, const char *str) { + uint32_t i = 0; + while (str[i]) { + if (s->pos + i >= s->len) return false; + if (s->buf[s->pos + i] != str[i]) return false; + i++; + } + s->pos += i; + return true; +} + +void scan_skip_ws(Scanner *s) { + while (!scan_eof(s)) { + char c = s->buf[s->pos]; + if (c==' '||c=='\n'||c=='\t'||c=='\r') s->pos++; + else break; + } +} + +static bool is_digit(char c) { + return c >= '0' && c <= '9'; +} + +bool scan_read_until(Scanner *s, char stop, string *out) { + uint32_t start = s->pos; + while (!scan_eof(s) && s->buf[s->pos] != stop) s->pos++; + if (s->pos == start) return false; + *out = string_from_literal_length(s->buf + start, s->pos - start); + return true; +} + +bool scan_read_string_token(Scanner *s, string *out) { + if (!scan_match(s, '"')) return false; + + string tmp = string_repeat('\0', 0); + while (!scan_eof(s)) { + char c = scan_next(s); + if (c == '"') { + *out = tmp; + return true; + } + + if (c == '\\') { + if (scan_eof(s)) { string_free(tmp); return false; } + char e = scan_next(s); + if (e == 'u') { + if (s->pos + 4 > s->len) { string_free(tmp); return false; } + string_append_bytes(&tmp, "\\u", 2); + for (int i=0;i<4;i++) { + char h = scan_next(s); + string_append_bytes(&tmp, &h, 1); + } + continue; + } + + char r = e; + if (e=='b') r='\b'; + else if (e=='f') r='\f'; + else if (e=='n') r='\n'; + else if (e=='r') r='\r'; + else if (e=='t') r='\t'; + else if (!(e=='"'||e=='\\'||e=='/')) { string_free(tmp); return false; } + string_append_bytes(&tmp, &r, 1); + } else { + string_append_bytes(&tmp, &c, 1); + } + } + + string_free(tmp); + return false; +} + +bool scan_read_number_token(Scanner *s, string *out) { + uint32_t start = s->pos; + const char *buf = s->buf; + uint32_t len = s->len; + uint32_t pos = start; + + if (pos >= len) return false; + + if (buf[pos] == '-') { + pos++; + if (pos >= len) return false; + } + + if (pos < len && buf[pos] == '0' && pos + 1 < len) { + char p = buf[pos + 1]; + + if (p == 'x' || p == 'X') { + pos += 2; + if (pos >= len) return false; + + int ok = 0; + while (pos < len) { + char c = buf[pos]; + if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + pos++; + ok = 1; + } else break; + } + + if (!ok) return false; + + s->pos = pos; + *out = string_from_literal_length(buf + start, pos - start); + return true; + } + + if (p == 'b' || p == 'B') { + pos += 2; + if (pos >= len) return false; + int ok = 0; + while (pos < len) { + char c = buf[pos]; + if (c == '0' || c == '1') { + pos++; + ok = 1; + } else break; + } + if (!ok) return false; + s->pos = pos; + *out = string_from_literal_length(buf + start, pos - start); + return true; + } + + if (p == 'o' || p == 'O') { + pos += 2; + if (pos >= len) return false; + int ok = 0; + while (pos < len) { + char c = buf[pos]; + if (c >= '0' && c <= '7') { + pos++; + ok = 1; + } else break; + } + if (!ok) return false; + s->pos = pos; + *out = string_from_literal_length(buf + start, pos - start); + return true; + } + } + + if (pos >= len || !(buf[pos] >= '0' && buf[pos] <= '9')) return false; + + while (pos < len && buf[pos] >= '0' && buf[pos] <= '9') pos++; + + uint32_t int_end = pos; + + if (pos < len && buf[pos] == '.') { + pos++; + if (pos < len && buf[pos] >= '0' && buf[pos] <= '9') { + while (pos < len && buf[pos] >= '0' &&buf[pos] <= '9') pos++; + } else { + pos = int_end; + } + } + + uint32_t mant_end = pos; + + if (pos < len && (buf[pos] == 'e' || buf[pos] == 'E')) { + uint32_t exp_start = pos; + pos++; + if (pos < len && (buf[pos] == '+' || buf[pos] == '-')) pos++; + + if (pos < len && (buf[pos] >= '0' && buf[pos] <= '9')) { + while (pos < len && (buf[pos] >= '0' && buf[pos] <= '9')) pos++; + mant_end = pos; + } else { + pos =exp_start; + } + } + + s->pos = mant_end; + *out = string_from_literal_length(buf + start, mant_end - start); + return true; +} + +bool scan_read_operator(Scanner *s, string *out) { + if (scan_eof(s)) return false; + + for (int i=0; ops3[i]; i++) { + if (scan_match_string(s, ops3[i])) { + *out = string_from_literal(ops3[i]); + return true; + } + } + + for (int i=0; ops2[i]; i++) { + if (scan_match_string(s, ops2[i])) { + *out = string_from_literal(ops2[ i]); + return true; + } + } + + char c = scan_peek(s); + for (int i=0; ops1[i]; i++) { + if (c == ops1[i]) { + scan_next(s); + *out = string_from_char(c); + return true; + } + } + + return false; +} \ No newline at end of file diff --git a/shared/data/scanner/scanner.h b/shared/data/scanner/scanner.h new file mode 100644 index 00000000..7b2e82c2 --- /dev/null +++ b/shared/data/scanner/scanner.h @@ -0,0 +1,27 @@ +#pragma once +#include "types.h" +#include "std/std.h" + +typedef struct { + const char *buf; + uint32_t len; + uint32_t pos; +} Scanner; + +static inline Scanner scanner_make(const char *buf, uint32_t len) { + return (Scanner){buf, len, 0}; +} + +bool scan_eof(Scanner *s); +char scan_peek(Scanner *s); +char scan_next(Scanner *s); + +bool scan_match(Scanner *s, char c); +bool scan_match_string(Scanner *s, const char *str); + +void scan_skip_ws(Scanner *s); + +bool scan_read_until(Scanner *s, char stop, string *out); +bool scan_read_string_token(Scanner *s, string *out); +bool scan_read_number_token(Scanner *s, string *out); +bool scan_read_operator(Scanner *s, string *out); \ No newline at end of file diff --git a/shared/std/string.c b/shared/std/string.c index 1bf3bf8c..2ac6e5fd 100644 --- a/shared/std/string.c +++ b/shared/std/string.c @@ -103,7 +103,7 @@ static inline uint32_t u64_to_dec(char *tmp, uint64_t v) { return n; } -static inline uint32_t u64_to_base(char *tmp, uint64_t v, unsigned base, int upper) { +uint32_t u64_to_base(char *tmp, uint64_t v, unsigned base, int upper) { const char *hx = upper ? "0123456789ABCDEF" : "0123456789abcdef"; uint32_t n = 0; diff --git a/shared/std/string.h b/shared/std/string.h index e81d6c00..ec8b3e85 100644 --- a/shared/std/string.h +++ b/shared/std/string.h @@ -22,6 +22,7 @@ typedef struct string_list { extern void free_sized(void*,size_t); +uint32_t u64_to_base(char *tmp, uint64_t v, unsigned base, int upper); size_t strlen_max(const char *s, uint32_t max_length); static inline size_t strlen(const char *s) { return strlen_max(s,0); } string string_from_literal(const char *literal); From e6b18e370b6aac039d9240b9f3a588247abc3563 Mon Sep 17 00:00:00 2001 From: CodeAnarchist Date: Thu, 11 Dec 2025 02:11:58 +0100 Subject: [PATCH 3/6] [DATA] add tokenizer, migrate json parser to it, introduce token utilities --- shared/data/helpers/token_stream.c | 75 ++++++ shared/data/helpers/token_stream.h | 22 ++ shared/data/helpers/token_utils.c | 127 +++++++++ shared/data/helpers/token_utils.h | 14 + shared/data/json.c | 335 +++++++++++------------- shared/data/json.h | 7 +- shared/data/parser/parser.c | 321 ----------------------- shared/data/parser/parser.h | 68 ----- shared/data/scanner/scanner.c | 197 +------------- shared/data/scanner/scanner.h | 13 +- shared/data/tokenizer/tokenizer.c | 401 +++++++++++++++++++++++++++++ shared/data/tokenizer/tokenizer.h | 63 +++++ 12 files changed, 865 insertions(+), 778 deletions(-) create mode 100644 shared/data/helpers/token_stream.c create mode 100644 shared/data/helpers/token_stream.h create mode 100644 shared/data/helpers/token_utils.c create mode 100644 shared/data/helpers/token_utils.h delete mode 100644 shared/data/parser/parser.c delete mode 100644 shared/data/parser/parser.h create mode 100644 shared/data/tokenizer/tokenizer.c create mode 100644 shared/data/tokenizer/tokenizer.h diff --git a/shared/data/helpers/token_stream.c b/shared/data/helpers/token_stream.c new file mode 100644 index 00000000..bbb1ad17 --- /dev/null +++ b/shared/data/helpers/token_stream.c @@ -0,0 +1,75 @@ +#include "token_stream.h" + +void ts_init(TokenStream *ts, Tokenizer *tz) { + ts->tz = tz; + ts->has_cur = false; +} + +bool ts_peek(TokenStream *ts, Token *t) { + if (!ts->has_cur) { + if (!tokenizer_next(ts->tz,&ts->cur)) return false; + ts->has_cur = true; + } + if (t) *t = ts->cur; + return true; +} + +bool ts_next(TokenStream *ts, Token *t) { + if (ts->has_cur) { + if (t) *t = ts->cur; + ts->has_cur = false; + return true; + } + return tokenizer_next(ts->tz, t); +} + +bool ts_expect(TokenStream *ts, TokenKind k, Token *out) { + Token t; + if (!ts_next(ts, &t)) return false; + if (t.kind != k) return false; + if (out) *out = t; + return true; +} + +bool ts_expect_operator(TokenStream *ts, const char *op) { + Token t; + if (!ts_next(ts, &t)) return false; + return token_is_operator_token(&t, op); +} + +bool ts_expect_identifier(TokenStream *ts, string *out) { + Token t; + if (!ts_next(ts, &t)) return false; + if (t.kind != TOK_IDENTIFIER) return false; + if (out) *out = string_from_literal_length(t.start, t.length); + return true; +} + +bool ts_expect_number(TokenStream *ts,double *out_double) { + Token a, b; + if (!ts_peek(ts, &a))return false; + + if (a.kind == TOK_OPERATOR && token_is_operator_token(&a, "-")) { + ts_next(ts, &a); + if (!ts_peek(ts, &b)) return false; + if (!token_is_number(&b)) return false; + ts_next(ts, &b); + + string merged; + if (!token_merge_negative_number(&a, &b, &merged)) return false; + + Token tmp; + tmp.kind = TOK_NUMBER; + tmp.start=merged.data; + tmp.length = merged.length; + tmp.pos = 0; + + bool ok = token_to_double(&tmp, out_double); + string_free(merged); + return ok; + } + + ts_next(ts, &a); + if (!token_is_number(&a)) return false; + return token_to_double(&a, out_double); +} \ No newline at end of file diff --git a/shared/data/helpers/token_stream.h b/shared/data/helpers/token_stream.h new file mode 100644 index 00000000..6a198136 --- /dev/null +++ b/shared/data/helpers/token_stream.h @@ -0,0 +1,22 @@ +#pragma once + +#include "types.h" +#include "std/string.h" +#include "std/std.h" +#include "data/tokenizer/tokenizer.h" +#include "token_utils.h" + +typedef struct { + Tokenizer *tz; + Token cur; + bool has_cur; +} TokenStream; + +void ts_init(TokenStream *ts, Tokenizer *tz); +bool ts_peek(TokenStream *ts, Token *t); +bool ts_next(TokenStream *ts, Token *t); + +bool ts_expect(TokenStream *ts, TokenKind k, Token *out); +bool ts_expect_operator(TokenStream *ts, const char *op); +bool ts_expect_identifier(TokenStream *ts, string *out); +bool ts_expect_number(TokenStream *ts, double *out_double); \ No newline at end of file diff --git a/shared/data/helpers/token_utils.c b/shared/data/helpers/token_utils.c new file mode 100644 index 00000000..40044c93 --- /dev/null +++ b/shared/data/helpers/token_utils.c @@ -0,0 +1,127 @@ +#include "token_utils.h" +#include "std/std.h" + +bool token_is_number(const Token *t) { + return t && t->kind == TOK_NUMBER; +} + +bool token_is_operator_token(const Token *t, const char *op) { + if (!t || t->kind != TOK_OPERATOR) return false; + uint32_t n = (uint32_t)strlen(op); + if (t->length != n) return false; + return strncmp(t->start, op, n) == 0; +} + +bool token_is_negative_number(const Token *op, const Token *num) { + if (!op || !num) return false; + if (!token_is_operator_token(op, "-")) return false; + return token_is_number(num); +} + +bool token_merge_negative_number(const Token *op, const Token *num, string *out) { + if (!token_is_negative_number(op, num)) return false; + *out = string_repeat('\0', 0); + string_append_bytes(out, "-", 1); + string_append_bytes(out, num->start,num->length); + return true; +} + +bool token_to_int64(const Token *t, int64_t *out) { + if (!t || !token_is_number(t)) return false; + string tmp = string_from_literal_length(t->start, t->length); + int64_t v = parse_int64(tmp.data, tmp.length); + string_free(tmp); + *out = v; + return true; +} + +bool token_to_uint64(const Token *t, uint64_t *out) { + if (!t || !token_is_number(t)) return false; + string tmp = string_from_literal_length(t->start, t->length); + uint64_t v= parse_int_u64(tmp.data, tmp.length); + string_free(tmp); + *out = v; + return true; +} + +static bool parse_double_literal(const char *buf, uint32_t len, double *out) { + if (!buf || !len) return false; + + uint32_t pos = 0; + if (buf[pos] < '0' || buf[pos] > '9') return false; + + double ip = 0.0; + while (pos < len) { + char c = buf[pos]; + if (c < '0' || c > '9') break; + ip = ip * 10.0 + (double)(c - '0'); + pos++; + } + + double fp = 0.0; + if (pos < len && buf[pos] == '.') { + uint32_t p2 = pos + 1; + if (p2 < len && buf[p2] >= '0' && buf[p2] <= '9') { + pos = p2; + double base = 0.1; + while (pos < len) { + char c = buf[pos]; + if (c < '0' || c > '9') break; + + fp += (double)(c - '0') * base; + base *= 0.1; + pos++; + } + } + } + + int exp_val = 0; + if (pos < len && (buf[pos] == 'e' || buf[pos] == 'E')) { + pos++; + if (pos >= len) return false; + + bool exp_neg = false; + if (buf[pos] == '+' || buf[pos] == '-') { + if (buf[pos] == '-') exp_neg = true; + pos++; + if (pos >= len) return false; + } + + if (buf[pos] < '0' || buf[pos] > '9') return false; + + while (pos < len) { + char c = buf[pos]; + if (c < '0' || c > '9') break; + exp_val = exp_val * 10 + (c - '0'); + pos++; + } + if (exp_neg) exp_val = -exp_val; + } + + if (pos != len) return false; + + double val = ip + fp; + if (exp_val != 0) { + double y = 1.0; + if (exp_val > 0) { + while (exp_val--) y *= 10.0; + } else { + while (exp_val++) y /= 10.0; + } + val *= y; + } + + *out = val; + return true; +} + +bool token_to_double(const Token *t, double *out) { + if (!t || !token_is_number(t)) return false; + string tmp = string_from_literal_length(t->start, t->length); + double v = 0.0; + bool ok = parse_double_literal(tmp.data, tmp.length, &v); + string_free(tmp); + if (!ok) return false; + *out = v; + return true; +} \ No newline at end of file diff --git a/shared/data/helpers/token_utils.h b/shared/data/helpers/token_utils.h new file mode 100644 index 00000000..143b0853 --- /dev/null +++ b/shared/data/helpers/token_utils.h @@ -0,0 +1,14 @@ +#pragma once + +#include "types.h" +#include "std/string.h" +#include "data/tokenizer/tokenizer.h" + +bool token_is_number(const Token *t); +bool token_is_operator_token(const Token *t, const char *op); +bool token_is_negative_number(const Token *op, const Token *num); +bool token_merge_negative_number(const Token *op, const Token *num, string *out); + +bool token_to_int64(const Token *t, int64_t *out); +bool token_to_uint64(const Token *t, uint64_t *out); +bool token_to_double(const Token *t, double *out); \ No newline at end of file diff --git a/shared/data/json.c b/shared/data/json.c index c43b579a..d962ed49 100644 --- a/shared/data/json.c +++ b/shared/data/json.c @@ -1,24 +1,24 @@ #include "json.h" #include "std/std.h" +#include "std/string.h" #include "syscalls/syscalls.h" +#include "data/scanner/scanner.h" +#include "data/tokenizer/tokenizer.h" +#include "helpers/token_stream.h" +#include "helpers/token_utils.h" -JsonError parse_value(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out); +static JsonError json_parse_value(TokenStream *ts, JsonValue **out); -static void json_skip_whitespace(const char *buf, uint32_t len, uint32_t *pos) { - while (*pos < len) { - char c = buf[*pos]; - if (c == ' ' || c == '\t' || c == '\n' || c == '\r') (*pos)++; - else break; - } -} +static JsonError json_parse_string_token(Token *tok, JsonValue **out) { + const char *buf = tok->start; + uint32_t len = tok->length; + if (len < 2 || buf[0] != '"' || buf[len - 1] != '"') return JSON_ERR_INVALID; -JsonError parse_string(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out) { - if (!(*pos < len && buf[*pos] == '"')) return JSON_ERR_INVALID; - (*pos)++; + uint32_t pos = 1; string s = string_repeat('\0', 0); - while (*pos < len) { - char c = buf[(*pos)++]; + while (pos < len) { + char c = buf[pos++]; if (c == '"') { JsonValue *v = malloc(sizeof(JsonValue)); if (!v) { @@ -31,11 +31,11 @@ JsonError parse_string(const char *buf, uint32_t len, uint32_t *pos, JsonValue * return JSON_OK; } if (c == '\\') { - if (*pos >= len) { + if (pos >= len) { free_sized(s.data, s.mem_length); return JSON_ERR_INVALID; } - char e = buf[(*pos)++]; + char e = buf[pos++]; char r = e; if (e == 'b') r = '\b'; else if (e == 'f') r = '\f'; @@ -56,106 +56,69 @@ JsonError parse_string(const char *buf, uint32_t len, uint32_t *pos, JsonValue * return JSON_ERR_INVALID; } -JsonError parse_number(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out) { - uint32_t start = *pos; - bool neg = false; - - if (*pos < len && buf[*pos] == '-') { - neg = true; - (*pos)++; - } - - if (!(*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9')) return JSON_ERR_INVALID; - while (*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9') (*pos)++; - - bool has_frac = false; - uint32_t frac_start = 0; - - if (*pos < len && buf[*pos] == '.') { - has_frac = true; - (*pos)++; - if (!(*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9')) return JSON_ERR_INVALID; - frac_start = *pos; - while (*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9') (*pos)++; +static JsonError json_parse_number_tokens(TokenStream *ts, JsonValue **out){ + Token a, b; + if (!ts_peek(ts, &a)) { + if (ts->tz->failed) return JSON_ERR_INVALID; + return JSON_ERR_INVALID; } - bool has_exp = false; - bool exp_neg = false; - int exp_val = 0; + bool negative = false; + Token num; - if (*pos < len && (buf[*pos] == 'e' || buf[*pos] == 'E')) { - has_exp = true; - (*pos)++; - if (*pos < len && (buf[*pos] == '+' || buf[*pos] == '-')) { - if (buf[*pos] == '-') exp_neg = true; - (*pos)++; + if (a.kind == TOK_OPERATOR && token_is_operator_token(&a, "-")) { + ts_next(ts, &a); + if (!ts_next(ts, &b)) { + if (ts->tz->failed)return JSON_ERR_INVALID; + return JSON_ERR_INVALID; } - if (!(*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9')) return JSON_ERR_INVALID; - while (*pos < len && buf[*pos] >= '0' && buf[*pos] <= '9') { - exp_val = exp_val * 10 + (buf[*pos] - '0'); - (*pos)++; + if (!token_is_number(&b)) return JSON_ERR_INVALID; + negative = true; + num = b; + } else { + if (!ts_next(ts, &a)) { + if (ts->tz->failed) return JSON_ERR_INVALID; + return JSON_ERR_INVALID; } - if (exp_neg) exp_val = -exp_val; + if (!token_is_number(&a)) return JSON_ERR_INVALID; + num = a; } - uint32_t end = *pos; - if (end <= start) return JSON_ERR_INVALID; - - if (!has_frac && !has_exp) { - int64_t x = 0; - uint32_t i = start + (neg ? 1u : 0u); - for (; i < end; i++) x = x * 10 + (buf[i] - '0'); - if (neg) x = -x; + bool is_int = true; + for (uint32_t i = 0; i < num.length; i++) { + char c = num.start[i]; + if (c == '.' || c == 'e' || c == 'E') { + is_int = false; + break; + } + } + if (is_int) { + int64_t iv; + if (!token_to_int64(&num, &iv)) return JSON_ERR_INVALID; + if (negative) iv =-iv; JsonValue *v = malloc(sizeof(JsonValue)); if (!v) return JSON_ERR_OOM; v->kind = JSON_INT; - v->u.integer = x; + v->u.integer = iv; *out = v; return JSON_OK; } - double ip = 0.0; - uint32_t i = start + (neg ? 1u : 0u); - for (; i < end; i++) { - char c = buf[i]; - if (c == '.' || c == 'e' || c == 'E') break; - ip = ip * 10.0 + (double)(c - '0'); - } - - double fp = 0.0; - if (has_frac) { - double base = 0.1; - for (i = frac_start; i < end; i++) { - char c = buf[i]; - if (c == 'e' || c == 'E') break; - fp += (double)(c - '0') * base; - base *= 0.1; - } - } - - double val = ip + fp; - if (neg) val = -val; - - if (has_exp) { - double y = 1.0; - if (exp_val > 0) while (exp_val--) y *= 10.0; - else while (exp_val++) y /= 10.0; - val *= y; - } - + double d; + if (!token_to_double(&num, &d)) return JSON_ERR_INVALID; + if (negative) d = -d; JsonValue *v = malloc(sizeof(JsonValue)); if (!v) return JSON_ERR_OOM; v->kind = JSON_DOUBLE; - v->u.real = val; + v->u.real = d; *out = v; return JSON_OK; } -JsonError parse_array(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out) { - if (!(*pos < len && buf[*pos] == '[')) return JSON_ERR_INVALID; - (*pos)++; - json_skip_whitespace(buf, len, pos); +static JsonError json_parse_array(TokenStream *ts, JsonValue **out) { + Token t; + if (!ts_expect(ts, TOK_LBRACKET, &t)) return JSON_ERR_INVALID; JsonValue *arr = malloc(sizeof(JsonValue)); if (!arr) return JSON_ERR_OOM; @@ -163,15 +126,22 @@ JsonError parse_array(const char *buf, uint32_t len, uint32_t *pos, JsonValue ** arr->u.array.items = 0; arr->u.array.count = 0; - if (*pos < len && buf[*pos] == ']') { - (*pos)++; + Token p; + if (!ts_peek(ts, &p)) { + json_free(arr); + if (ts->tz->failed) return JSON_ERR_INVALID; + return JSON_ERR_INVALID; + } + + if (p.kind == TOK_RBRACKET) { + ts_next(ts, &p); *out = arr; return JSON_OK; } for (;;) { JsonValue *elem = 0; - JsonError e = parse_value(buf, len, pos, &elem); + JsonError e = json_parse_value(ts, &elem); if (e != JSON_OK) { json_free(arr); return e; @@ -180,8 +150,8 @@ JsonError parse_array(const char *buf, uint32_t len, uint32_t *pos, JsonValue ** uint32_t n = arr->u.array.count; JsonValue **tmp = malloc((n + 1) * sizeof(JsonValue *)); if (!tmp) { - json_free(elem); json_free(arr); + json_free(elem); return JSON_ERR_OOM; } @@ -192,68 +162,75 @@ JsonError parse_array(const char *buf, uint32_t len, uint32_t *pos, JsonValue ** arr->u.array.items = tmp; arr->u.array.count = n + 1; - json_skip_whitespace(buf, len, pos); + if (!ts_peek(ts, &p)) { + json_free(arr); + if (ts->tz->failed) return JSON_ERR_INVALID; + return JSON_ERR_INVALID; + } - if (*pos < len && buf[*pos] == ']') { - (*pos)++; + if (p.kind == TOK_RBRACKET) { + ts_next(ts, &p); break; } - if (!(*pos < len && buf[*pos] == ',')) { + if (!ts_expect(ts, TOK_COMMA, &t)) { json_free(arr); + if (ts->tz->failed) return JSON_ERR_INVALID; return JSON_ERR_INVALID; } - - (*pos)++; - json_skip_whitespace(buf, len, pos); } *out = arr; return JSON_OK; } -JsonError parse_object(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out) { - if (!(*pos < len && buf[*pos] == '{')) return JSON_ERR_INVALID; - (*pos)++; - json_skip_whitespace(buf, len, pos); +static JsonError json_parse_object(TokenStream *ts, JsonValue **out) { + Token t; + if (!ts_expect(ts, TOK_LBRACE, &t)) return JSON_ERR_INVALID; - JsonValue *obj = malloc(sizeof(JsonValue)); + JsonValue *obj =malloc(sizeof(JsonValue)); if (!obj) return JSON_ERR_OOM; obj->kind = JSON_OBJECT; obj->u.object.pairs = 0; obj->u.object.count = 0; - if (*pos < len && buf[*pos] == '}') { - (*pos)++; + Token p; + if (!ts_peek(ts, &p)) { + json_free(obj); + if (ts->tz->failed) return JSON_ERR_INVALID; + return JSON_ERR_INVALID; + } + + if (p.kind == TOK_RBRACE) { + ts_next(ts, &p); *out = obj; return JSON_OK; } for (;;) { - JsonValue *ks = 0; - JsonError e = parse_string(buf, len, pos, &ks); - if (e != JSON_OK) { - if (ks) json_free(ks); + Token keytok; + if (!ts_expect(ts, TOK_STRING, &keytok)) { json_free(obj); + if (ts->tz->failed) return JSON_ERR_INVALID; return JSON_ERR_INVALID; } - string key = ks->u.string; - free_sized(ks, sizeof(JsonValue)); + if (keytok.length < 2) { + json_free(obj); + return JSON_ERR_INVALID; + } - json_skip_whitespace(buf, len, pos); + string key = string_from_literal_length(keytok.start + 1, keytok.length - 2); - if (!(*pos < len && buf[*pos] == ':')) { + if (!ts_expect(ts, TOK_COLON, &t)) { free_sized(key.data, key.mem_length); json_free(obj); + if (ts->tz->failed) return JSON_ERR_INVALID; return JSON_ERR_INVALID; } - (*pos)++; - json_skip_whitespace(buf, len, pos); - JsonValue *val = 0; - e = parse_value(buf, len, pos, &val); + JsonError e = json_parse_value(ts, &val); if (e != JSON_OK) { free_sized(key.data, key.mem_length); json_free(obj); @@ -277,40 +254,49 @@ JsonError parse_object(const char *buf, uint32_t len, uint32_t *pos, JsonValue * obj->u.object.pairs = tmp; obj->u.object.count = n + 1; - json_skip_whitespace(buf, len, pos); + if (!ts_peek(ts, &p)) { + json_free(obj); + if (ts->tz->failed) return JSON_ERR_INVALID; + return JSON_ERR_INVALID; + } - if (*pos < len && buf[*pos] == '}') { - (*pos)++; + if (p.kind == TOK_RBRACE) { + ts_next(ts, &p); break; } - if (!(*pos < len && buf[*pos] == ',')) { + if (!ts_expect(ts, TOK_COMMA, &t)) { json_free(obj); + if (ts->tz->failed) return JSON_ERR_INVALID; return JSON_ERR_INVALID; } - - (*pos)++; - json_skip_whitespace(buf, len, pos); } *out = obj; return JSON_OK; } -JsonError parse_value(const char *buf, uint32_t len, uint32_t *pos, JsonValue **out) { - json_skip_whitespace(buf, len, pos); - if (*pos >= len) return JSON_ERR_INVALID; +static JsonError json_parse_value(TokenStream *ts, JsonValue **out) { + Token t; + if (!ts_peek(ts, &t)) { + if (ts->tz->failed) return JSON_ERR_INVALID; + return JSON_ERR_INVALID; + } + + if (t.kind == TOK_STRING) { + ts_next(ts,&t); + return json_parse_string_token(&t, out); + } - char c = buf[*pos]; + if (t.kind == TOK_NUMBER || (t.kind == TOK_OPERATOR && token_is_operator_token(&t, "-"))) { + return json_parse_number_tokens(ts, out); + } - if (c == '"') return parse_string(buf, len, pos, out); - if (c == '{') return parse_object(buf, len, pos, out); - if (c == '[') return parse_array(buf, len, pos, out); - if (c == '-' || (c >= '0' && c <= '9')) return parse_number(buf, len, pos, out); + if (t.kind == TOK_LBRACE) return json_parse_object(ts, out); + if (t.kind == TOK_LBRACKET) return json_parse_array(ts, out); - if (c == 't' && *pos + 4 <= len && - buf[*pos] == 't' && buf[*pos+1] == 'r' && buf[*pos+2] == 'u' && buf[*pos+3] == 'e') { - *pos += 4; + if (t.kind == TOK_IDENTIFIER && t.length == 4 && strncmp(t.start, "true", 4) == 0) { + ts_next(ts, &t); JsonValue *v = malloc(sizeof(JsonValue)); if (!v) return JSON_ERR_OOM; v->kind = JSON_BOOL; @@ -319,9 +305,8 @@ JsonError parse_value(const char *buf, uint32_t len, uint32_t *pos, JsonValue ** return JSON_OK; } - if (c == 'f' && *pos + 5 <= len && - buf[*pos] == 'f' && buf[*pos+1] == 'a' && buf[*pos+2] == 'l' && buf[*pos+3] == 's' && buf[*pos+4] == 'e') { - *pos += 5; + if (t.kind == TOK_IDENTIFIER && t.length ==5 &&strncmp(t.start, "false", 5) == 0) { + ts_next(ts, &t); JsonValue *v = malloc(sizeof(JsonValue)); if (!v) return JSON_ERR_OOM; v->kind = JSON_BOOL; @@ -330,9 +315,9 @@ JsonError parse_value(const char *buf, uint32_t len, uint32_t *pos, JsonValue ** return JSON_OK; } - if (c == 'n' && *pos + 4 <= len && - buf[*pos] == 'n' && buf[*pos+1] == 'u' && buf[*pos+2] == 'l' && buf[*pos+3] == 'l') { - *pos += 4; + if (t.kind == TOK_IDENTIFIER && t.length == 4 && + strncmp(t.start, "null", 4) == 0) { + ts_next(ts, &t); JsonValue *v = malloc(sizeof(JsonValue)); if (!v) return JSON_ERR_OOM; v->kind = JSON_NULL; @@ -344,14 +329,24 @@ JsonError parse_value(const char *buf, uint32_t len, uint32_t *pos, JsonValue ** } JsonError json_parse(const char *buf, uint32_t len, JsonValue **out) { - uint32_t pos = 0; - JsonError e = parse_value(buf, len, &pos, out); + Scanner s = scanner_make(buf, len); + Tokenizer tz = tokenizer_make(&s); + TokenStream ts; + ts_init(&ts, &tz); + + JsonError e =json_parse_value(&ts, out); if (e != JSON_OK) return e; - json_skip_whitespace(buf, len, &pos); - if (pos != len) { + + Token t; + if (!ts_peek(&ts, &t)) { json_free(*out); return JSON_ERR_INVALID; } + if (t.kind != TOK_EOF) { + json_free(*out); + return JSON_ERR_INVALID; + } + return JSON_OK; } @@ -360,15 +355,11 @@ void json_free(JsonValue *v) { if (v->kind == JSON_STRING) { free_sized(v->u.string.data, v->u.string.mem_length); - } - - else if (v->kind == JSON_ARRAY) { + } else if (v->kind == JSON_ARRAY) { for (uint32_t i = 0; i < v->u.array.count; i++) json_free(v->u.array.items[i]); if (v->u.array.items) free_sized(v->u.array.items, v->u.array.count * sizeof(JsonValue *)); - } - - else if (v->kind == JSON_OBJECT) { + } else if (v->kind == JSON_OBJECT) { for (uint32_t i = 0; i < v->u.object.count; i++) { free_sized(v->u.object.pairs[i].key.data, v->u.object.pairs[i].key.mem_length); json_free(v->u.object.pairs[i].value); @@ -437,22 +428,6 @@ JsonValue *json_obj_get(const JsonValue *obj, const char *key) { return 0; } -bool json_obj_get_bool(const JsonValue *obj, const char *key, bool *out) { - return json_get_bool(json_obj_get(obj, key), out); -} - -bool json_obj_get_int(const JsonValue *obj, const char *key, int64_t *out) { - return json_get_int(json_obj_get(obj, key), out); -} - -bool json_obj_get_double(const JsonValue *obj, const char *key, double *out) { - return json_get_double(json_obj_get(obj, key), out); -} - -bool json_obj_get_string(const JsonValue *obj, const char *key, string *out) { - return json_get_string(json_obj_get(obj, key), out); -} - JsonValue *json_new_null() { JsonValue *x = malloc(sizeof(JsonValue)); if (!x) return 0; @@ -519,7 +494,7 @@ bool json_array_push(JsonValue *arr, JsonValue *elem) { tmp[n] = elem; if (arr->u.array.items) free_sized(arr->u.array.items, n * sizeof(JsonValue *)); arr->u.array.items = tmp; - arr-> u.array.count = n + 1; + arr->u.array.count = n + 1; return true; } @@ -533,7 +508,7 @@ bool json_obj_set(JsonValue *obj, const char *key, JsonValue *value) { free_sized(obj->u.object.pairs[i].key.data, obj->u.object.pairs[i].key.mem_length); obj->u.object.pairs[i].key = string_from_literal_length((char *)key, klen); json_free(obj->u.object.pairs[i].value); - obj->u.object.pairs[i ].value = value; + obj->u.object.pairs[i].value = value; return true; } } @@ -599,7 +574,7 @@ JsonValue *json_clone(const JsonValue *src) { return 0; } -void serialize_string(const string *s, string *out) { +static void serialize_string(const string *s, string *out) { string_append_bytes(out, "\"", 1); for (uint32_t i = 0; i < s->length; i++) { char c = s->data[i]; @@ -616,9 +591,9 @@ void serialize_string(const string *s, string *out) { string_append_bytes(out, "\"", 1); } -void serialize_value(const JsonValue *v, string *out, uint32_t indent, uint32_t level); +static void serialize_value(const JsonValue *v, string *out, uint32_t indent, uint32_t level); -void serialize_array(const JsonValue *v, string *out, uint32_t indent, uint32_t level) { +static void serialize_array(const JsonValue *v, string *out, uint32_t indent, uint32_t level) { string_append_bytes(out, "[", 1); uint32_t n = v->u.array.count; @@ -647,7 +622,7 @@ void serialize_array(const JsonValue *v, string *out, uint32_t indent, uint32_t string_append_bytes(out, "]", 1); } -void serialize_object(const JsonValue *v, string *out, uint32_t indent, uint32_t level) { +static void serialize_object(const JsonValue *v, string *out, uint32_t indent, uint32_t level) { string_append_bytes(out, "{", 1); uint32_t n = v->u.object.count; @@ -682,7 +657,7 @@ void serialize_object(const JsonValue *v, string *out, uint32_t indent, uint32_t string_append_bytes(out, "}", 1); } -void serialize_value(const JsonValue *v, string *out, uint32_t indent, uint32_t level) { +static void serialize_value(const JsonValue *v, string *out, uint32_t indent, uint32_t level) { if (v->kind == JSON_NULL) { string_append_bytes(out, "null", 4); return; diff --git a/shared/data/json.h b/shared/data/json.h index 37aecafa..2deb3e8d 100644 --- a/shared/data/json.h +++ b/shared/data/json.h @@ -57,6 +57,7 @@ JsonError json_parse(const char *buf, uint32_t len, JsonValue **out); JsonError json_serialize(const JsonValue *value, string *out, uint32_t indent); void json_free(JsonValue *v); +JsonValue *json_obj_get(const JsonValue *obj, const char *key); static inline bool json_is_null(const JsonValue *v) { return v && v->kind == JSON_NULL; } static inline bool json_is_bool(const JsonValue *v) { return v && v->kind == JSON_BOOL; } static inline bool json_is_int(const JsonValue *v) { return v && v->kind == JSON_INT; } @@ -75,12 +76,6 @@ bool json_get_number_as_double(const JsonValue *v, double *out); uint32_t json_array_size(const JsonValue *v); JsonValue *json_array_get(const JsonValue *v, uint32_t index); -JsonValue *json_obj_get(const JsonValue *obj, const char *key); -bool json_obj_get_bool(const JsonValue *obj, const char *key, bool *out); -bool json_obj_get_int(const JsonValue *obj, const char *key, int64_t *out); -bool json_obj_get_double(const JsonValue *obj, const char *key, double *out); -bool json_obj_get_string(const JsonValue *obj, const char *key, string *out); - JsonValue *json_new_null(); JsonValue *json_new_bool(bool v); JsonValue *json_new_int(int64_t v); diff --git a/shared/data/parser/parser.c b/shared/data/parser/parser.c deleted file mode 100644 index c736b3c2..00000000 --- a/shared/data/parser/parser.c +++ /dev/null @@ -1,321 +0,0 @@ -#include "data/parser/parser.h" -#include "std/string.h" - -static bool is_alpha(char c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); -} - -static bool is_alnum(char c) { - return is_alpha(c) || (c >= '0' && c <= '9'); -} - -static bool parse_number_double_token(const char *buf, uint32_t len, double *out) { - if (!buf || !len) return false; - - uint32_t pos = 0; - bool neg = false; - - if (buf[pos] == '-') { - neg = true; - pos++; - if (pos >= len) return false; - } - - if (buf[pos] < '0' || buf[pos] > '9') return false; - - double ip = 0.0; - while (pos < len) { - char c = buf[pos]; - if (c < '0' || c > '9') break; - ip = ip * 10.0 + (double)(c - '0'); - pos++; - } - - double fp = 0.0; - if (pos < len && buf[pos] == '.') { - pos++; - if (pos >= len) return false; - if (buf[pos] < '0' || buf[pos] > '9') return false; - double base = 0.1; - while (pos < len) { - char c = buf[pos]; - if (c < '0' || c > '9') break; - fp += (double)(c - '0') * base; - base *= 0.1; - pos++; - } - } - - int exp_val = 0; - if (pos < len && (buf[pos] == 'e' || buf[pos] == 'E')) { - pos++; - if (pos >= len) return false; - - bool exp_neg = false; - if (buf[pos] == '+' || buf[pos] == '-') { - if (buf[pos] == '-') exp_neg = true; - pos++; - if (pos >= len) return false; - } - if (buf[pos] < '0' || buf[pos] > '9') return false; - - while (pos < len) { - char c = buf[pos]; - if (c < '0' || c > '9') break; - exp_val = exp_val * 10 + (c - '0'); - pos++; - } - if (exp_neg) exp_val = -exp_val; - } - - if (pos != len) return false; - - double val = ip + fp; - if (neg) val = -val; - - if (exp_val != 0) { - double y = 1.0; - if (exp_val > 0) { - while (exp_val--) y *= 10.0; - } else { - while (exp_val++) y /= 10.0; - } - val *= y; - } - - *out = val; - return true; -} - -ParserMark parser_mark(Parser *p) { - ParserMark m; - m.pos = p->s->pos; - return m; -} - -void parser_reset(Parser *p, ParserMark m) { - p->s->pos = m.pos; - p->failed = false; - p->err = PARSER_ERR_NONE; - p->err_msg = 0; - p->err_pos = m.pos; -} - -void parser_fail(Parser *p, ParserError err, const char *msg) { - if (p->failed) return; - - p->failed = true; - p->err = err; - p->err_msg = msg; - p->err_pos = p->s->pos; -} - -char parser_peek(Parser *p) { - return scan_peek(p->s); -} - -char parser_next(Parser *p) { - return scan_next(p->s); -} - -bool parser_eof(Parser *p) { - return scan_eof(p->s); -} - -void parser_skip_ws(Parser *p) { - scan_skip_ws(p->s); -} - -bool parser_expect_char(Parser *p, char c, const char *msg) { - if (p->failed) return false; - - if (!scan_match(p->s, c)) { - parser_fail(p, scan_eof(p->s) ? PARSER_ERR_EOF : PARSER_ERR_UNEXPECTED_CHAR, msg); - return false; - } - return true; -} - -bool parser_expect_string(Parser *p, const char *lit, const char *msg) { - if (p->failed) return false; - - if (!scan_match_string(p->s, lit)) { - parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, msg); - return false; - } - return true; -} - -bool parser_read_identifier(Parser *p, string *out) { - if (p->failed) return false; - - if (parser_eof(p)) { - parser_fail(p, PARSER_ERR_EOF, 0); - return false; - } - uint32_t start = p->s->pos; - char c = parser_peek(p); - if (!(is_alpha(c) || c == '_')) { - parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, 0); - return false; - } - parser_next(p); - while (!parser_eof(p)) { - char d = parser_peek(p); - if (!(is_alnum(d) || d == '_')) break; - parser_next(p); - } - uint32_t end = p->s->pos; - *out = string_from_literal_length(p->s->buf + start, end - start); - return true; -} - -bool parser_read_number_string(Parser *p, string *out) { - if (p->failed) return false; - - if (!scan_read_number_token(p->s, out)) { - parser_fail(p, PARSER_ERR_INVALID_NUMBER, 0); - return false; - } - return true; -} - -bool parser_read_quoted_string(Parser *p, string *out) { - if (p->failed) return false; - - if (!scan_read_string_token(p->s, out)) { - parser_fail(p, PARSER_ERR_INVALID_STRING, 0); - return false; - } - return true; -} - -bool parser_read_int64(Parser *p, int64_t *out) { - if (p->failed) return false; - - string tmp; - if (!parser_read_number_string(p, &tmp)) return false; - - bool ok = true; - for (uint32_t i = 0; i < tmp.length; i++) { - char c = tmp.data[i]; - if (i == 0 && c == '-') continue; - if (c < '0' || c > '9') { - ok = false; - break; - } - } - - if (!ok) { - string_free(tmp); - parser_fail(p, PARSER_ERR_INVALID_NUMBER, 0); - return false; - } - - int64_t v = parse_int64(tmp.data, tmp.length); - string_free(tmp); - *out = v; - return true; -} - -bool parser_read_uint64(Parser *p, uint64_t *out) { - if (p->failed) return false; - - string tmp; - if (!parser_read_number_string(p, &tmp)) return false; - - bool ok = true; - for (uint32_t i = 0; i < tmp.length; i++) { - char c = tmp.data[i]; - if (c < '0' || c > '9') { - ok = false; - break; - } - } - - if (!ok) { - string_free(tmp); - parser_fail(p, PARSER_ERR_INVALID_NUMBER, 0); - return false; - } - - uint64_t v = parse_int_u64(tmp.data, tmp.length); - string_free(tmp); - *out = v; - return true; -} - -bool parser_read_double(Parser *p, double *out) { - if (p->failed) return false; - - string tmp; - if (!parser_read_number_string(p, &tmp)) return false; - - double v = 0.0; - bool ok = parse_number_double_token(tmp.data, tmp.length, &v); - string_free(tmp); - - if (!ok) { - parser_fail(p, PARSER_ERR_INVALID_NUMBER, 0); - return false; - } - - *out = v; - return true; -} - -bool parser_span(Parser *p, ParserMark m, string *out) { - if (p->failed) return false; - - if (p->s->pos < m.pos) { - parser_fail(p, PARSER_ERR_GENERIC, 0); - return false; - } - uint32_t start = m.pos; - uint32_t end = p->s->pos; - *out = string_from_literal_length(p->s->buf + start, end - start); - return true; -} - -bool parser_read_until(Parser *p, char stop, string *out) { - if (p->failed) return false; - - if (!scan_read_until(p->s, stop, out)) { - parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, 0); - return false; - } - return true; -} - -bool parser_read_operator(Parser *p, string *out) { - if (p->failed) return false; - - if (!scan_read_operator(p->s, out)) { - parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, 0); - return false; - } - return true; -} - -bool parser_expect_operator(Parser *p, const char *op, const char *msg) { - - if (p->failed) return false; - - ParserMark m = parser_mark(p); - - string tmp; - if (!scan_read_operator(p->s, &tmp)) { - parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, msg); - return false; - } - - if (strcmp(tmp.data, op) != 0) { - parser_reset(p, m); - string_free(tmp); - parser_fail(p, PARSER_ERR_UNEXPECTED_TOKEN, msg); - return false; - } - - string_free(tmp); - return true; -} \ No newline at end of file diff --git a/shared/data/parser/parser.h b/shared/data/parser/parser.h deleted file mode 100644 index 150ee4d4..00000000 --- a/shared/data/parser/parser.h +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once - -#include "types.h" -#include "data/scanner/scanner.h" -#include "std/string.h" - -typedef enum { - PARSER_ERR_NONE = 0, - PARSER_ERR_GENERIC, - PARSER_ERR_EOF, - PARSER_ERR_UNEXPECTED_CHAR, - PARSER_ERR_UNEXPECTED_TOKEN, - PARSER_ERR_INVALID_NUMBER, - PARSER_ERR_INVALID_STRING -} ParserError; - -typedef struct { - Scanner *s; - bool failed; - uint32_t err_pos; - const char *err_msg; - ParserError err; -} Parser; - -typedef struct { - uint32_t pos; -} ParserMark; - -static inline Parser parser_make(Scanner *s) { - Parser p; - p.s = s; - p.failed = false; - p.err_pos = 0; - p.err_msg = 0; - p.err = PARSER_ERR_NONE; - return p; -} - -static inline bool parser_ok(const Parser *p) { - return !p->failed; -} - -ParserMark parser_mark(Parser *p); -void parser_reset(Parser *p, ParserMark m); - -void parser_fail(Parser *p, ParserError err, const char *msg); - -char parser_peek(Parser *p); -char parser_next(Parser *p); -bool parser_eof(Parser *p); - -void parser_skip_ws(Parser *p); - -bool parser_expect_char(Parser *p, char c, const char *msg); -bool parser_expect_string(Parser *p, const char *lit, const char *msg); - -bool parser_read_identifier(Parser *p, string *out); -bool parser_read_number_string(Parser *p, string *out); -bool parser_read_quoted_string(Parser *p, string *out); - -bool parser_read_int64(Parser *p, int64_t *out); -bool parser_read_uint64(Parser *p, uint64_t *out); -bool parser_read_double(Parser *p, double *out); - -bool parser_span(Parser *p, ParserMark m, string *out); -bool parser_read_until(Parser *p, char stop, string *out); -bool parser_read_operator(Parser *p, string *out); -bool parser_expect_operator(Parser *p, const char *op, const char *msg); diff --git a/shared/data/scanner/scanner.c b/shared/data/scanner/scanner.c index 9141227f..6bbdc28e 100644 --- a/shared/data/scanner/scanner.c +++ b/shared/data/scanner/scanner.c @@ -1,11 +1,4 @@ -#include "scanner.h" -#include "std/string.h" - -static const char *ops3[] = {">>>", "<<=", ">>=", "===", 0}; - -static const char *ops2[] = {"==","!=", "<=", ">=", "&&", "||", "<<", ">>", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "::", "->", 0}; - -static const char ops1[] = "+-*/%<>=!&|^~?:;.,(){}[]"; +#include "data/scanner/scanner.h" bool scan_eof(Scanner *s) { return s->pos >= s->len; @@ -45,192 +38,4 @@ void scan_skip_ws(Scanner *s) { if (c==' '||c=='\n'||c=='\t'||c=='\r') s->pos++; else break; } -} - -static bool is_digit(char c) { - return c >= '0' && c <= '9'; -} - -bool scan_read_until(Scanner *s, char stop, string *out) { - uint32_t start = s->pos; - while (!scan_eof(s) && s->buf[s->pos] != stop) s->pos++; - if (s->pos == start) return false; - *out = string_from_literal_length(s->buf + start, s->pos - start); - return true; -} - -bool scan_read_string_token(Scanner *s, string *out) { - if (!scan_match(s, '"')) return false; - - string tmp = string_repeat('\0', 0); - while (!scan_eof(s)) { - char c = scan_next(s); - if (c == '"') { - *out = tmp; - return true; - } - - if (c == '\\') { - if (scan_eof(s)) { string_free(tmp); return false; } - char e = scan_next(s); - if (e == 'u') { - if (s->pos + 4 > s->len) { string_free(tmp); return false; } - string_append_bytes(&tmp, "\\u", 2); - for (int i=0;i<4;i++) { - char h = scan_next(s); - string_append_bytes(&tmp, &h, 1); - } - continue; - } - - char r = e; - if (e=='b') r='\b'; - else if (e=='f') r='\f'; - else if (e=='n') r='\n'; - else if (e=='r') r='\r'; - else if (e=='t') r='\t'; - else if (!(e=='"'||e=='\\'||e=='/')) { string_free(tmp); return false; } - string_append_bytes(&tmp, &r, 1); - } else { - string_append_bytes(&tmp, &c, 1); - } - } - - string_free(tmp); - return false; -} - -bool scan_read_number_token(Scanner *s, string *out) { - uint32_t start = s->pos; - const char *buf = s->buf; - uint32_t len = s->len; - uint32_t pos = start; - - if (pos >= len) return false; - - if (buf[pos] == '-') { - pos++; - if (pos >= len) return false; - } - - if (pos < len && buf[pos] == '0' && pos + 1 < len) { - char p = buf[pos + 1]; - - if (p == 'x' || p == 'X') { - pos += 2; - if (pos >= len) return false; - - int ok = 0; - while (pos < len) { - char c = buf[pos]; - if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { - pos++; - ok = 1; - } else break; - } - - if (!ok) return false; - - s->pos = pos; - *out = string_from_literal_length(buf + start, pos - start); - return true; - } - - if (p == 'b' || p == 'B') { - pos += 2; - if (pos >= len) return false; - int ok = 0; - while (pos < len) { - char c = buf[pos]; - if (c == '0' || c == '1') { - pos++; - ok = 1; - } else break; - } - if (!ok) return false; - s->pos = pos; - *out = string_from_literal_length(buf + start, pos - start); - return true; - } - - if (p == 'o' || p == 'O') { - pos += 2; - if (pos >= len) return false; - int ok = 0; - while (pos < len) { - char c = buf[pos]; - if (c >= '0' && c <= '7') { - pos++; - ok = 1; - } else break; - } - if (!ok) return false; - s->pos = pos; - *out = string_from_literal_length(buf + start, pos - start); - return true; - } - } - - if (pos >= len || !(buf[pos] >= '0' && buf[pos] <= '9')) return false; - - while (pos < len && buf[pos] >= '0' && buf[pos] <= '9') pos++; - - uint32_t int_end = pos; - - if (pos < len && buf[pos] == '.') { - pos++; - if (pos < len && buf[pos] >= '0' && buf[pos] <= '9') { - while (pos < len && buf[pos] >= '0' &&buf[pos] <= '9') pos++; - } else { - pos = int_end; - } - } - - uint32_t mant_end = pos; - - if (pos < len && (buf[pos] == 'e' || buf[pos] == 'E')) { - uint32_t exp_start = pos; - pos++; - if (pos < len && (buf[pos] == '+' || buf[pos] == '-')) pos++; - - if (pos < len && (buf[pos] >= '0' && buf[pos] <= '9')) { - while (pos < len && (buf[pos] >= '0' && buf[pos] <= '9')) pos++; - mant_end = pos; - } else { - pos =exp_start; - } - } - - s->pos = mant_end; - *out = string_from_literal_length(buf + start, mant_end - start); - return true; -} - -bool scan_read_operator(Scanner *s, string *out) { - if (scan_eof(s)) return false; - - for (int i=0; ops3[i]; i++) { - if (scan_match_string(s, ops3[i])) { - *out = string_from_literal(ops3[i]); - return true; - } - } - - for (int i=0; ops2[i]; i++) { - if (scan_match_string(s, ops2[i])) { - *out = string_from_literal(ops2[ i]); - return true; - } - } - - char c = scan_peek(s); - for (int i=0; ops1[i]; i++) { - if (c == ops1[i]) { - scan_next(s); - *out = string_from_char(c); - return true; - } - } - - return false; } \ No newline at end of file diff --git a/shared/data/scanner/scanner.h b/shared/data/scanner/scanner.h index 7b2e82c2..8befe44f 100644 --- a/shared/data/scanner/scanner.h +++ b/shared/data/scanner/scanner.h @@ -9,7 +9,11 @@ typedef struct { } Scanner; static inline Scanner scanner_make(const char *buf, uint32_t len) { - return (Scanner){buf, len, 0}; + Scanner s; + s.buf = buf; + s.len = len; + s.pos = 0; + return s; } bool scan_eof(Scanner *s); @@ -19,9 +23,4 @@ char scan_next(Scanner *s); bool scan_match(Scanner *s, char c); bool scan_match_string(Scanner *s, const char *str); -void scan_skip_ws(Scanner *s); - -bool scan_read_until(Scanner *s, char stop, string *out); -bool scan_read_string_token(Scanner *s, string *out); -bool scan_read_number_token(Scanner *s, string *out); -bool scan_read_operator(Scanner *s, string *out); \ No newline at end of file +void scan_skip_ws(Scanner *s); \ No newline at end of file diff --git a/shared/data/tokenizer/tokenizer.c b/shared/data/tokenizer/tokenizer.c new file mode 100644 index 00000000..009ea103 --- /dev/null +++ b/shared/data/tokenizer/tokenizer.c @@ -0,0 +1,401 @@ +#include "data/tokenizer/tokenizer.h" + +static bool is_alpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static bool is_alnum(char c) { + return is_alpha(c) || (c >= '0' && c <= '9'); +} + +static void tokenizer_fail(Tokenizer *t, TokenizerError err) { + if (t->failed) return; + + t->failed = true; + t->err = err; + t->err_pos = t->s->pos; +} + +static void skip_ws_and_comments(Tokenizer *t) { + Scanner *s = t-> s; + for (;;) { + scan_skip_ws(s); + if (scan_eof(s)) return; + + uint32_t pos =s->pos; + + if (scan_match(s, '/')) { + if (scan_match(s, '/')) { + while (!scan_eof(s)) { + char c = scan_next(s); + if (c == '\n' || c == '\r') break; + } + continue; + } else if (scan_match(s, '*')) { + int found = 0; + while (!scan_eof(s)) { + char c = scan_next(s); + if (c == '*' && !scan_eof(s) && scan_peek(s) == '/') { + scan_next(s); + found = 1; + break; + } + } + if (!found) { + tokenizer_fail(t, TOKENIZER_ERR_UNTERMINATED_COMMENT); + return; + } + continue; + } else { + s->pos = pos; + } + } + + break; + } +} + +static void read_identifier(Scanner *s, Token *tok) { + uint32_t start = s->pos; + scan_next(s); + + while (!scan_eof(s)) { + char c = scan_peek(s); + if (!(is_alnum(c) || c == '_')) break; + scan_next(s); + } + + tok->kind = TOK_IDENTIFIER; + tok->start = s->buf + start; + tok->length = s->pos - start; + tok->pos = start; +} + +static bool read_number(Scanner *s, Token *tok) { + uint32_t start = s->pos; + const char *buf = s->buf; + uint32_t len = s->len; + uint32_t pos = start; + + if (pos >= len || !(buf[pos] >= '0' && buf[pos] <= '9')) return false; + + if (buf[pos] == '0' && pos + 1 < len) { + char p = buf[pos + 1]; + + if (p == 'x' || p == 'X') { + pos += 2; + if (pos >= len) return false; + + int ok = 0; + while (pos < len) { + char c = buf[pos]; + if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + pos++; + ok = 1; + } else break; + } + if (!ok) return false; + + s->pos = pos; + tok->kind = TOK_NUMBER; + tok->start = buf + start; + tok->length = pos - start; + tok->pos = start; + + return true; + } + + if (p == 'b' || p == 'B') { + pos += 2; + if (pos >= len) return false; + + int ok = 0; + while (pos < len) { + char c = buf[pos]; + if (c == '0' || c == '1') { + pos++; + ok = 1; + } else break; + } + if (!ok) return false; + + s->pos = pos; + tok->kind = TOK_NUMBER; + tok->start = buf + start; + tok->length = pos - start; + tok->pos = start; + + return true; + } + + if (p == 'o' || p == 'O') { + pos += 2; + if (pos >= len) return false; + + int ok = 0; + while (pos < len) { + + char c = buf[pos]; + if (c >= '0' && c <= '7') { + pos++; + ok = 1; + } else break; + } + if (!ok) return false; + + s->pos = pos; + tok->kind = TOK_NUMBER; + tok->start = buf + start; + tok->length = pos - start; + tok->pos= start; + + return true; + } + } + + while (pos = '0' && buf[pos] <= '9') pos++; + + uint32_t int_end = pos; + + if (pos < len && buf[pos] == '.') { + uint32_t p2 = pos + 1; + if (p2 < len && buf[p2] >= '0' && buf[p2] <= '9') { + pos = p2; + while (pos < len && buf[pos] >= '0' && buf[pos] <= '9') pos++; + } else { + pos = int_end; + } + } + + uint32_t mant_end = pos; + + if (pos < len && (buf[pos] == 'e' || buf[pos] == 'E')) { + uint32_t exp_start = pos; + pos++; + if (pos < len && (buf[pos] == '+' || buf[pos] == '-')) pos++; + if (pos < len && (buf[pos] >= '0' && buf[pos] <= '9')) { + while (pos < len && (buf[pos] >= '0' && buf[pos] <= '9')) pos++; + mant_end = pos; + } else { + pos = exp_start; + } + } + + s->pos = mant_end; + tok->kind = TOK_NUMBER; + tok->start = buf + start; + tok->length = mant_end - start; + tok->pos = start; + return true; +} + +static bool read_string(Tokenizer *t, Token *tok) { + Scanner *s = t->s; + uint32_t start = s->pos; + + if (!scan_match(s, '"')) return false; + + while (!scan_eof(s)) { + char c = scan_next(s); + if (c == '"') { + tok->kind = TOK_STRING; + tok->start = s->buf + start; + tok->length = s->pos - start; + tok->pos = start; + return true; + } + + if (c == '\\') { + if (scan_eof(s)) { + tokenizer_fail(t, TOKENIZER_ERR_UNTERMINATED_STRING); + return false; + } + char e = scan_next(s); + if (e == 'u') { + for (int i = 0; i < 4; i++) { + if (scan_eof(s)) { + tokenizer_fail(t, TOKENIZER_ERR_UNTERMINATED_STRING); + return false; + } + char h = scan_next(s); + if (!((h >= '0' && h <= '9') || + (h >= 'a' && h <= 'f') || + (h >= 'A' && h <= 'F'))) { + tokenizer_fail(t, TOKENIZER_ERR_UNTERMINATED_STRING); + return false; + } + } + } else if (!(e == '"' || e == '\\' || e == '/' || + e == 'b' || e == 'f' || e == 'n' || + e == 'r' || e == 't')) { + tokenizer_fail(t, TOKENIZER_ERR_UNTERMINATED_STRING); + return false; + } + } + } + + tokenizer_fail(t, TOKENIZER_ERR_UNTERMINATED_STRING); + return false; +} + +static const char *ops3[] = {">>>", "<<=", ">>=", "===", 0}; +static const char *ops2[] = {"==", "!=", "<=", ">=", "&&", "||", "<<", ">>", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "::", "->", 0}; +static const char ops1[] = "+-*/%<>=!&|^~?"; + +static bool read_operator(Scanner *s, Token *tok) { + const char *buf = s->buf; + uint32_t len = s->len; + uint32_t pos = s->pos; + + for (int i = 0; ops3[i]; i++) { + const char *op = ops3[i]; + uint32_t n = 3; + if (pos + n <= len) { + uint32_t k = 0; + while (k < n && buf[pos + k] == op[k]) k++; + + if (k == n) { + s->pos = pos + n; + tok->kind = TOK_OPERATOR; + tok->start = buf + pos; + tok->length = n; + tok->pos = pos; + + return true; + } + } + } + + for (int i = 0; ops2[i]; i++) { + const char *op = ops2[i]; + uint32_t n = 0; + while (op[n]) n++; + + if (pos + n <= len) { + uint32_t k = 0; + while (k < n && buf[pos + k] == op[k]) k++; + if (k == n) { + s->pos = pos + n; + tok->kind = TOK_OPERATOR; + tok->start = buf + pos; + tok->length = n; + tok->pos = pos; + + return true; + } + } + } + + if (pos >= len) return false; + char c = buf[pos]; + + for (int i = 0; ops1[i]; i++) { + if (c == ops1[i]) { + s->pos = pos + 1; + tok->kind = TOK_OPERATOR; + tok->start = buf + pos; + tok->length = 1; + tok->pos = pos; + + return true; + } + } + + return false; +} + +static bool read_delim(Scanner *s, Token *tok) { + if (scan_eof(s))return false; + + uint32_t pos = s->pos; + char c = scan_peek(s); + + if (c == '(') { scan_next(s); tok->kind = TOK_LPAREN; } + else if (c == ')'){ scan_next(s); tok->kind = TOK_RPAREN;} + else if (c == '{'){ scan_next(s); tok->kind = TOK_LBRACE; } + else if (c == '}'){ scan_next(s); tok->kind = TOK_RBRACE; } + else if (c == '['){ scan_next(s); tok->kind = TOK_LBRACKET; } + else if (c == ']'){ scan_next(s); tok->kind = TOK_RBRACKET; } + else if (c == ','){ scan_next(s); tok->kind = TOK_COMMA; } + else if (c == ':'){ scan_next(s); tok->kind = TOK_COLON; } + else if (c == ';'){ scan_next(s); tok->kind = TOK_SEMICOLON; } + else if (c == '.'){ scan_next(s); tok->kind = TOK_DOT; } + else return false; + + tok->start = s->buf + pos; + tok->length = 1; + tok->pos =pos; + + return true; +} + +bool tokenizer_next(Tokenizer *t, Token *out) { + if (t->failed) { + out->kind = TOK_INVALID; + out->start = 0; + out->length = 0; + out->pos = t->s->pos; + return false; + } + + skip_ws_and_comments(t); + if (t->failed) { + out->kind = TOK_INVALID; + out->start = 0; + out->length = 0; + out->pos = t->err_pos; + return false; + } + + Scanner *s = t->s; + + if (scan_eof(s)) { + out->kind = TOK_EOF; + out->start = s->buf + s->pos; + out->length = 0; + out->pos = s->pos; + return true; + } + + char c = scan_peek(s); + + if (is_alpha(c) || c == '_') { + read_identifier(s, out); + return true; + } + + if (c >= '0' && c <= '9') { + uint32_t pos_before = s->pos; + if (read_number(s, out)) return true; + + tokenizer_fail(t, TOKENIZER_ERR_INVALID_NUMBER); + out->kind = TOK_INVALID; + out->start = s->buf + pos_before; + out->length = 0; + out->pos = pos_before; + return false; + } + + if (c == '"') { + uint32_t pos_before = s->pos; + if (read_string(t, out)) return true; + + tokenizer_fail(t, TOKENIZER_ERR_UNTERMINATED_STRING); + out->kind = TOK_INVALID; + out->start = s->buf + pos_before; + out->length = 0; + out->pos = pos_before; + return false; + } + + if (read_delim(s, out)) return true; + + if (read_operator(s, out)) return true; + + tokenizer_fail(t, TOKENIZER_ERR_INVALID_CHAR); + out->kind = TOK_INVALID; + out->start = s->buf + s->pos; + out->length = 0; + out->pos = s->pos; + return false; +} \ No newline at end of file diff --git a/shared/data/tokenizer/tokenizer.h b/shared/data/tokenizer/tokenizer.h new file mode 100644 index 00000000..b171c254 --- /dev/null +++ b/shared/data/tokenizer/tokenizer.h @@ -0,0 +1,63 @@ +#pragma once +#include "types.h" +#include "data/scanner/scanner.h" + +typedef enum { + TOK_EOF = 0, + TOK_INVALID, + + TOK_IDENTIFIER, + TOK_NUMBER, + TOK_STRING, + + TOK_OPERATOR, + + TOK_LPAREN, + TOK_RPAREN, + TOK_LBRACE, + TOK_RBRACE, + TOK_LBRACKET, + TOK_RBRACKET, + + TOK_COMMA, + TOK_COLON, + TOK_SEMICOLON, + TOK_DOT +} TokenKind; + +typedef struct { + TokenKind kind; + const char *start; + uint32_t length; + uint32_t pos; +} Token; + +typedef enum { + TOKENIZER_ERR_NONE = 0, + TOKENIZER_ERR_INVALID_CHAR, + TOKENIZER_ERR_INVALID_NUMBER, + TOKENIZER_ERR_UNTERMINATED_STRING, + TOKENIZER_ERR_UNTERMINATED_COMMENT +} TokenizerError; + +typedef struct { + Scanner *s; + bool failed; + TokenizerError err; + uint32_t err_pos; +} Tokenizer; + +static inline Tokenizer tokenizer_make(Scanner *s) { + Tokenizer t; + t.s = s; + t.failed = false; + t.err = TOKENIZER_ERR_NONE; + t.err_pos = 0; + return t; +} + +static inline bool tokenizer_ok(const Tokenizer *t) { + return !t->failed; +} + +bool tokenizer_next(Tokenizer *t, Token *out); \ No newline at end of file From 3de1153e046d2a25474b55d49a835b5361c989b1 Mon Sep 17 00:00:00 2001 From: CodeAnarchist Date: Fri, 12 Dec 2025 01:25:48 +0100 Subject: [PATCH 4/6] [DATA] add url and query string parser --- shared/data/query_string.c | 66 +++++++++++++ shared/data/query_string.h | 19 ++++ shared/data/tokenizer/tokenizer.c | 8 -- shared/data/url.c | 150 ++++++++++++++++++++++++++++++ shared/data/url.h | 25 +++++ shared/std/string.h | 12 +++ 6 files changed, 272 insertions(+), 8 deletions(-) create mode 100644 shared/data/query_string.c create mode 100644 shared/data/query_string.h create mode 100644 shared/data/url.c create mode 100644 shared/data/url.h diff --git a/shared/data/query_string.c b/shared/data/query_string.c new file mode 100644 index 00000000..ec261ac1 --- /dev/null +++ b/shared/data/query_string.c @@ -0,0 +1,66 @@ +#include "query_string.h" +#include "std/string.h" +#include "syscalls/syscalls.h" + +void query_parse(const char *buf, uint32_t len, QueryParam **out_params, uint32_t *out_count) { + *out_params = 0; + *out_count = 0; + if (!buf || !len) return; + + uint32_t max_params = 1; //b query_parse + + for (uint32_t i = 0; i < len; i++) if (buf[i] == '&') max_params++; + + QueryParam *params = (QueryParam*)malloc(sizeof(QueryParam) * max_params); + if (!params) return; + + uint32_t count = 0; + uint32_t pos = 0; + + while (pos < len) { + uint32_t seg_start = pos; + while (pos < len && buf[pos] != '&') pos++; + uint32_t seg_end = pos; + + if (seg_end > seg_start) { + uint32_t eq_pos = seg_end; + for (uint32_t i = seg_start; i < seg_end; i++) { + if (buf[i] =='=') { + eq_pos = i; + break; + } + } + + QueryParam *qp = ¶ms[count]; + + if (eq_pos < seg_end) { + qp->key.ptr = (uintptr_t)(buf + seg_start); + qp->key.size = eq_pos - seg_start; + qp->value.ptr = (uintptr_t)(buf + eq_pos + 1); + qp->value.size = seg_end - (eq_pos + 1); + } else { + qp->key.ptr = (uintptr_t)(buf + seg_start); + qp->key.size = seg_end - seg_start; + qp->value.ptr = (uintptr_t)(buf + seg_end); + qp->value.size = 0; + } + + count++; + } + + if (pos < len && buf[pos] == '&') pos++; + } + + *out_params = params; + *out_count = count; +} + +void query_parse_z(const char *buf, QueryParam **out_params, uint32_t *out_count) { + if (!buf) { + *out_params = 0; + *out_count = 0; + return; + } + size_t n = strlen(buf); + query_parse(buf, (uint32_t)n, out_params, out_count); +} diff --git a/shared/data/query_string.h b/shared/data/query_string.h new file mode 100644 index 00000000..a6703169 --- /dev/null +++ b/shared/data/query_string.h @@ -0,0 +1,19 @@ +#pragma once + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + sizedptr key; + sizedptr value; +} QueryParam; + +void query_parse(const char *buf, uint32_t len, QueryParam **out_params, uint32_t *out_count); +void query_parse_z(const char *buf, QueryParam **out_params, uint32_t *out_count); + +#ifdef __cplusplus +} +#endif diff --git a/shared/data/tokenizer/tokenizer.c b/shared/data/tokenizer/tokenizer.c index 009ea103..c69a615e 100644 --- a/shared/data/tokenizer/tokenizer.c +++ b/shared/data/tokenizer/tokenizer.c @@ -1,13 +1,5 @@ #include "data/tokenizer/tokenizer.h" -static bool is_alpha(char c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); -} - -static bool is_alnum(char c) { - return is_alpha(c) || (c >= '0' && c <= '9'); -} - static void tokenizer_fail(Tokenizer *t, TokenizerError err) { if (t->failed) return; diff --git a/shared/data/url.c b/shared/data/url.c new file mode 100644 index 00000000..568f491e --- /dev/null +++ b/shared/data/url.c @@ -0,0 +1,150 @@ +#include "url.h" +#include "std/string.h" + +ParsedURL parse_url(const char *buf, uint32_t len) { + ParsedURL r = {0}; + if (!buf || !len) return r; + + uint32_t i = 0; + + if (!is_alpha(buf[i]))return r; + i++; + while (i < len) { + char c= buf[i]; + if (c == ':') break; + + if (!(is_alnum(c) || c == '+' || c == '-' || c == '.')) return r; + i++; + } + + if (i >= len || buf[i] != ':') return r; + + r.scheme.ptr = (uintptr_t)buf; + r.scheme.size = i; + i++; + + if (i + 1 < len && buf[i] =='/' &&buf[i + 1] == '/') { + i += 2; + uint32_t auth_start = i; + uint32_t auth_end = auth_start; + while (auth_end < len) { + char c = buf[auth_end]; + if (c == '/' || c == '?' || c == '#') break; + auth_end++; + } + + if (auth_end > auth_start) { + uint32_t user_end = auth_start; + while (user_end= auth_end) return r; + + if (buf[host_start] == '[') { + uint32_t p = host_start + 1; + while (p < auth_end && buf[p] != ']') p++; + + if (p >= auth_end) return r; + + r.host.ptr = (uintptr_t)(buf + host_start + 1); + r.host.size = p -(host_start + 1); + + p++; + if (p < auth_end && buf[p] == ':') { + p++; + uint32_t v = 0; + int any = 0; + while (p < auth_end) { + char d = buf[p]; + if (!is_digit(d)) return r; + + any = 1; + v = v * 10 + (uint32_t)(d - '0'); + if (v > 65535) return r; + p++; + } + if (!any) return r; + r.port = (uint16_t)v; + } + } else { + uint32_t p = host_start; + while (p < auth_end && buf[p] != ':') p++; + + r.host.ptr = (uintptr_t)(buf + host_start); + r.host.size = p -host_start; + + if (p < auth_end && buf[p] == ':') { + p++; + uint32_t port = 0; + int any = 0; + while (p < auth_end) { + char d = buf[p]; + if (!is_digit(d)) return r; + + any = 1; + port = port * 10 + (uint32_t)(d -'0'); + if (port > 65535) return r; + p++; + } + if (!any) return r; + r.port = (uint16_t)port; + } + } + } + + i = auth_end; + } + + uint32_t path_start = i; + uint32_t path_end = path_start; + while (path_end < len) { + char c = buf[path_end]; + if (c == '?' || c == '#') break; + path_end++; + } + + if (path_end > path_start) { + r.path.ptr = (uintptr_t)(buf + path_start); + r.path.size = path_end - path_start; + } + + i = path_end; + + if (i < len && buf[i] == '?') { + uint32_t qs = i + 1; + uint32_t qe = qs; + while (qe < len && buf[qe] != '#') qe++; + + if (qe > qs) { + r.query.ptr = (uintptr_t)(buf + qs); + r.query.size = qe - qs; + } + i = qe; + } + + if (i < len && buf[i] == '#') { + uint32_t fs = i + 1; + if (fs < len) { + r.fragment.ptr = (uintptr_t)(buf + fs); + r.fragment.size = len - fs; + } + } + + r.ok = true; + return r; +} + +ParsedURL parse_url_z(const char *buf) { + if (!buf) { + ParsedURL r = {0}; + return r; + } + size_t n = strlen(buf); + return parse_url(buf, (uint32_t)n); +} diff --git a/shared/data/url.h b/shared/data/url.h new file mode 100644 index 00000000..db093cf4 --- /dev/null +++ b/shared/data/url.h @@ -0,0 +1,25 @@ +#pragma once + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + sizedptr scheme; + sizedptr userinfo; + sizedptr host; + uint16_t port; + sizedptr path; + sizedptr query; + sizedptr fragment; + bool ok; +} ParsedURL; + +ParsedURL parse_url(const char *buf, uint32_t len); +ParsedURL parse_url_z(const char *buf); + +#ifdef __cplusplus +} +#endif diff --git a/shared/std/string.h b/shared/std/string.h index ec8b3e85..48faf5ff 100644 --- a/shared/std/string.h +++ b/shared/std/string.h @@ -22,6 +22,18 @@ typedef struct string_list { extern void free_sized(void*,size_t); +static inline bool is_alpha(char c) { + return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); +} + +static inline bool is_digit(char c) { + return (c >= '0' && c <= '9'); +} + +static inline bool is_alnum(char c) { + return is_alpha(c) || is_digit(c); +} + uint32_t u64_to_base(char *tmp, uint64_t v, unsigned base, int upper); size_t strlen_max(const char *s, uint32_t max_length); static inline size_t strlen(const char *s) { return strlen_max(s,0); } From dcc7e297da5102c1bb0ef1cb82a0399cd8401a24 Mon Sep 17 00:00:00 2001 From: CodeAnarchist Date: Fri, 12 Dec 2025 12:06:21 +0100 Subject: [PATCH 5/6] [DATA] add percent format and user info parser --- shared/data/percent.c | 36 ++++++++++++++++++++++++++++++++++++ shared/data/percent.h | 14 ++++++++++++++ shared/data/userinfo.c | 29 +++++++++++++++++++++++++++++ shared/data/userinfo.h | 19 +++++++++++++++++++ shared/std/string.h | 7 +++++++ 5 files changed, 105 insertions(+) create mode 100644 shared/data/percent.c create mode 100644 shared/data/percent.h create mode 100644 shared/data/userinfo.c create mode 100644 shared/data/userinfo.h diff --git a/shared/data/percent.c b/shared/data/percent.c new file mode 100644 index 00000000..08b65587 --- /dev/null +++ b/shared/data/percent.c @@ -0,0 +1,36 @@ +#include "percent.h" +#include "std/string.h" + +bool percent_decode(sizedptr in, string *out) { + *out = string_repeat('\0', 0); + const char *buf = (const char*)in.ptr; + uint32_t len = in.size; + + uint32_t i = 0; + while (i < len) { + char c = buf[i]; + + if (c == '%' && i + 2 < len) { + int h1 = hex_val(buf[i+1]); + int h2 = hex_val(buf[i+2]); + if (h1 < 0 || h2 < 0) return false; + + char decoded =(char)((h1 << 4) | h2); + string_append_bytes(out, &decoded, 1); + i += 3; + continue; + } + + if (c == '+') { + char sp = ' '; + string_append_bytes(out, &sp, 1); + i++; + continue; + } + + string_append_bytes(out, &c, 1); + i++; + } + + return true; +} diff --git a/shared/data/percent.h b/shared/data/percent.h new file mode 100644 index 00000000..b061a40a --- /dev/null +++ b/shared/data/percent.h @@ -0,0 +1,14 @@ +#pragma once + +#include "types.h" +#include "std/string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bool percent_decode(sizedptr in, string *out); + +#ifdef __cplusplus +} +#endif diff --git a/shared/data/userinfo.c b/shared/data/userinfo.c new file mode 100644 index 00000000..f889018e --- /dev/null +++ b/shared/data/userinfo.c @@ -0,0 +1,29 @@ +#include "userinfo.h" +#include "std/string.h" + +ParsedUserinfo parse_userinfo(sizedptr u) { + ParsedUserinfo r = {0}; + if (!u.ptr || u.size == 0) return r; + + const char *buf = (const char*)u.ptr; + uint32_t len = u.size; + + uint32_t sep = len; + for (uint32_t i = 0; i < len; i++) { + if (buf[i] == ':') { + sep = i; + break; + } + } + + if (sep == len) return r; + + r.username.ptr = u.ptr; + r.username.size = sep; + + r.password.ptr = (uintptr_t)(buf + sep+ 1); + r.password.size = len - (sep+ 1); + + r.ok = true; + return r; +} diff --git a/shared/data/userinfo.h b/shared/data/userinfo.h new file mode 100644 index 00000000..da22f4c0 --- /dev/null +++ b/shared/data/userinfo.h @@ -0,0 +1,19 @@ +#pragma once + +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + sizedptr username; + sizedptr password; + bool ok; +} ParsedUserinfo; + +ParsedUserinfo parse_userinfo(sizedptr u); + +#ifdef __cplusplus +} +#endif diff --git a/shared/std/string.h b/shared/std/string.h index 48faf5ff..ae02b372 100644 --- a/shared/std/string.h +++ b/shared/std/string.h @@ -34,6 +34,13 @@ static inline bool is_alnum(char c) { return is_alpha(c) || is_digit(c); } +static inline int hex_val(char c) { + if (is_digit(c)) return c - '0'; + if (c >= 'a' && c <= 'f') return 10 + (c - 'a'); + if (c >= 'A' && c <= 'F') return 10 + (c - 'A'); + return -1; +} + uint32_t u64_to_base(char *tmp, uint64_t v, unsigned base, int upper); size_t strlen_max(const char *s, uint32_t max_length); static inline size_t strlen(const char *s) { return strlen_max(s,0); } From ccc1e2691ca5af624a463501256f7f67eca24302 Mon Sep 17 00:00:00 2001 From: CodeAnarchist Date: Fri, 12 Dec 2025 19:53:30 +0100 Subject: [PATCH 6/6] [DATA] add helper --- shared/data/helpers/token_stream.c | 45 +++++++++++++++++++++++++++++- shared/data/helpers/token_stream.h | 3 +- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/shared/data/helpers/token_stream.c b/shared/data/helpers/token_stream.c index bbb1ad17..2d6c3c77 100644 --- a/shared/data/helpers/token_stream.c +++ b/shared/data/helpers/token_stream.c @@ -45,7 +45,7 @@ bool ts_expect_identifier(TokenStream *ts, string *out) { return true; } -bool ts_expect_number(TokenStream *ts,double *out_double) { +bool ts_expect_double(TokenStream *ts,double *out_double) { Token a, b; if (!ts_peek(ts, &a))return false; @@ -72,4 +72,47 @@ bool ts_expect_number(TokenStream *ts,double *out_double) { ts_next(ts, &a); if (!token_is_number(&a)) return false; return token_to_double(&a, out_double); +} + +bool ts_expect_int(TokenStream *ts, int64_t *out_int) { + Token a, b; + + if (!ts_peek(ts, &a)) return false; + + if (a.kind == TOK_OPERATOR && token_is_operator_token(&a, "-")) { + ts_next(ts, &a); + + if (!ts_peek(ts, &b)) return false; + if (!token_is_number(&b)) return false; + + ts_next(ts, &b); + + string merged; + if (!token_merge_negative_number(&a, &b, &merged)) return false; + + Token tmp; + tmp.kind = TOK_NUMBER; + tmp.start = merged.data; + tmp.length = merged.length; + tmp.pos = a.pos; + + int64_t iv; + bool ok = token_to_int64(&tmp, &iv); + + string_free(merged); + if (!ok) return false; + + *out_int = iv; + return true; + } + + ts_next(ts, &a); + if (!token_is_number(&a)) return false; + + for (uint32_t i = 0; i < a.length; i++) { + char c = a.start[i]; + if (c == '.' || c == 'e' || c == 'E') return false; + } + + return token_to_int64(&a, out_int); } \ No newline at end of file diff --git a/shared/data/helpers/token_stream.h b/shared/data/helpers/token_stream.h index 6a198136..016fc5ff 100644 --- a/shared/data/helpers/token_stream.h +++ b/shared/data/helpers/token_stream.h @@ -19,4 +19,5 @@ bool ts_next(TokenStream *ts, Token *t); bool ts_expect(TokenStream *ts, TokenKind k, Token *out); bool ts_expect_operator(TokenStream *ts, const char *op); bool ts_expect_identifier(TokenStream *ts, string *out); -bool ts_expect_number(TokenStream *ts, double *out_double); \ No newline at end of file +bool ts_expect_double(TokenStream *ts, double *out_double); +bool ts_expect_int(TokenStream *ts, int64_t *out_int); \ No newline at end of file