| 1 | /* Copyright (C) 2010-2021 The RetroArch team |
| 2 | * |
| 3 | * --------------------------------------------------------------------------------------- |
| 4 | * The following license statement only applies to this file (rjson.c). |
| 5 | * --------------------------------------------------------------------------------------- |
| 6 | * |
| 7 | * Permission is hereby granted, free of charge, |
| 8 | * to any person obtaining a copy of this software and associated documentation files (the "Software"), |
| 9 | * to deal in the Software without restriction, including without limitation the rights to |
| 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, |
| 11 | * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, |
| 16 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| 18 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 19 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 21 | */ |
| 22 | |
| 23 | /* The parser is based on Public Domain JSON Parser for C by Christopher Wellons - https://github.com/skeeto/pdjson */ |
| 24 | |
| 25 | #include <stdio.h> /* snprintf, vsnprintf */ |
| 26 | #include <stdarg.h> /* va_list */ |
| 27 | #include <string.h> /* memcpy */ |
| 28 | #include <stdint.h> /* int64_t */ |
| 29 | #include <stdlib.h> /* malloc, realloc, atof, atoi */ |
| 30 | |
| 31 | #include <formats/rjson.h> |
| 32 | #include <compat/posix_string.h> |
| 33 | #include <streams/interface_stream.h> |
| 34 | #include <streams/file_stream.h> |
| 35 | |
| 36 | struct _rjson_stack { enum rjson_type type; size_t count; }; |
| 37 | |
| 38 | struct rjson |
| 39 | { |
| 40 | /* Order of the top few struct elements have an impact on performance */ |
| 41 | /* Place most frequently accessed things on top */ |
| 42 | const unsigned char *input_p; |
| 43 | struct _rjson_stack *stack_top; |
| 44 | const unsigned char *input_end; |
| 45 | const unsigned char* source_column_p; |
| 46 | size_t source_line; |
| 47 | |
| 48 | char *string, *string_pass_through; |
| 49 | size_t string_len, string_cap; |
| 50 | |
| 51 | struct _rjson_stack inline_stack[10]; |
| 52 | struct _rjson_stack *stack; |
| 53 | |
| 54 | rjson_io_t io; |
| 55 | void *user_data; |
| 56 | |
| 57 | unsigned int stack_cap, stack_max; |
| 58 | int input_len; |
| 59 | |
| 60 | char option_flags; |
| 61 | char decimal_sep; |
| 62 | char error_text[80]; |
| 63 | char inline_string[512]; |
| 64 | |
| 65 | /* Must be at the end of the struct, can be allocated with custom size */ |
| 66 | unsigned char input_buf[512]; |
| 67 | }; |
| 68 | |
| 69 | enum _rjson_token |
| 70 | { |
| 71 | _rJSON_TOK_WHITESPACE, _rJSON_TOK_NEWLINE, _rJSON_TOK_OPTIONAL_SKIP, |
| 72 | _rJSON_TOK_OBJECT, _rJSON_TOK_ARRAY, _rJSON_TOK_STRING, _rJSON_TOK_NUMBER, |
| 73 | _rJSON_TOK_TRUE, _rJSON_TOK_FALSE, _rJSON_TOK_NULL, |
| 74 | _rJSON_TOK_OBJECT_END, _rJSON_TOK_ARRAY_END, _rJSON_TOK_COLON, |
| 75 | _rJSON_TOK_COMMA, _rJSON_TOK_ERROR, _rJSON_TOK_EOF, _rJSON_TOKCOUNT |
| 76 | }; |
| 77 | |
| 78 | /* The used char type is int and not short for better performance */ |
| 79 | typedef unsigned int _rjson_char_t; |
| 80 | #define _rJSON_EOF ((_rjson_char_t)256) |
| 81 | |
| 82 | /* Compiler branching hint for expression with high probability |
| 83 | * Explicitly only have likely (and no unlikely) because compilers |
| 84 | * that don't support it expect likely branches to come first. */ |
| 85 | #if defined(__GNUC__) || defined(__clang__) |
| 86 | #define _rJSON_LIKELY(x) __builtin_expect(!!(x), 1) |
| 87 | #else |
| 88 | #define _rJSON_LIKELY(x) (x) |
| 89 | #endif |
| 90 | |
| 91 | /* These 3 error functions return RJSON_ERROR for convenience */ |
| 92 | static enum rjson_type _rjson_error(rjson_t *json, const char *fmt, ...) |
| 93 | { |
| 94 | va_list ap; |
| 95 | if (json->stack_top->type == RJSON_ERROR) |
| 96 | return RJSON_ERROR; |
| 97 | json->stack_top->type = RJSON_ERROR; |
| 98 | va_start(ap, fmt); |
| 99 | vsnprintf(json->error_text, sizeof(json->error_text), fmt, ap); |
| 100 | va_end(ap); |
| 101 | return RJSON_ERROR; |
| 102 | } |
| 103 | |
| 104 | static enum rjson_type _rjson_error_char(rjson_t *json, |
| 105 | const char *fmt, _rjson_char_t chr) |
| 106 | { |
| 107 | char buf[16]; |
| 108 | if (json->stack_top->type == RJSON_ERROR) |
| 109 | return RJSON_ERROR; |
| 110 | snprintf(buf, sizeof(buf), |
| 111 | (chr == _rJSON_EOF ? "end of stream" : |
| 112 | (chr >= ' ' && chr <= '~' ? "'%c'" : "byte 0x%02X")), chr); |
| 113 | return _rjson_error(json, fmt, buf); |
| 114 | } |
| 115 | |
| 116 | static enum rjson_type _rjson_error_token(rjson_t *json, |
| 117 | const char *fmt, enum _rjson_token tok) |
| 118 | { |
| 119 | return _rjson_error_char(json, fmt, |
| 120 | (tok == _rJSON_TOK_EOF ? _rJSON_EOF : json->input_p[-1])); |
| 121 | } |
| 122 | |
| 123 | static bool _rjson_io_input(rjson_t *json) |
| 124 | { |
| 125 | if (json->input_end == json->input_buf) |
| 126 | return false; |
| 127 | json->source_column_p -= (json->input_end - json->input_buf); |
| 128 | json->input_p = json->input_buf; |
| 129 | json->input_end = json->input_buf + |
| 130 | json->io(json->input_buf, json->input_len, json->user_data); |
| 131 | if (json->input_end < json->input_buf) |
| 132 | { |
| 133 | _rjson_error(json, "input stream read error"); |
| 134 | json->input_end = json->input_buf; |
| 135 | } |
| 136 | return (json->input_end != json->input_p); |
| 137 | } |
| 138 | |
| 139 | static bool _rjson_grow_string(rjson_t *json) |
| 140 | { |
| 141 | char *string; |
| 142 | size_t new_string_cap = json->string_cap * 2; |
| 143 | if (json->string != json->inline_string) |
| 144 | string = (char*)realloc(json->string, new_string_cap); |
| 145 | else if ((string = (char*)malloc(new_string_cap)) != NULL) |
| 146 | memcpy(string, json->inline_string, sizeof(json->inline_string)); |
| 147 | if (!string) |
| 148 | { |
| 149 | _rjson_error(json, "out of memory"); |
| 150 | return false; |
| 151 | } |
| 152 | json->string_cap = new_string_cap; |
| 153 | json->string = string; |
| 154 | return true; |
| 155 | } |
| 156 | |
| 157 | static INLINE bool _rjson_pushchar(rjson_t *json, _rjson_char_t c) |
| 158 | { |
| 159 | json->string[json->string_len++] = (char)c; |
| 160 | return (json->string_len != json->string_cap || _rjson_grow_string(json)); |
| 161 | } |
| 162 | |
| 163 | static INLINE bool _rjson_pushchars(rjson_t *json, |
| 164 | const unsigned char *from, const unsigned char *to) |
| 165 | { |
| 166 | size_t len = json->string_len, new_len = len + (to - from); |
| 167 | unsigned char* string; |
| 168 | while (new_len >= json->string_cap) |
| 169 | if (!_rjson_grow_string(json)) |
| 170 | return false; |
| 171 | string = (unsigned char *)json->string; |
| 172 | while (len != new_len) |
| 173 | string[len++] = *(from++); |
| 174 | json->string_len = new_len; |
| 175 | return true; |
| 176 | } |
| 177 | |
| 178 | static INLINE _rjson_char_t _rjson_char_get(rjson_t *json) |
| 179 | { |
| 180 | return (json->input_p != json->input_end || _rjson_io_input(json) |
| 181 | ? *json->input_p++ : _rJSON_EOF); |
| 182 | } |
| 183 | |
| 184 | static unsigned int _rjson_get_unicode_cp(rjson_t *json) |
| 185 | { |
| 186 | unsigned int cp = 0, shift = 16; |
| 187 | for (;;) |
| 188 | { |
| 189 | _rjson_char_t c = _rjson_char_get(json); |
| 190 | switch (c) |
| 191 | { |
| 192 | case '0': case '1': case '2': case '3': case '4': |
| 193 | case '5': case '6': case '7': case '8': case '9': |
| 194 | c -= '0'; |
| 195 | break; |
| 196 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
| 197 | c -= ('a' - 10); |
| 198 | break; |
| 199 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
| 200 | c -= ('A' - 10); |
| 201 | break; |
| 202 | case _rJSON_EOF: |
| 203 | _rjson_error(json, "unterminated string literal in Unicode"); |
| 204 | return (unsigned int)-1; |
| 205 | default: |
| 206 | _rjson_error_char(json, "invalid Unicode escape hexadecimal %s", c); |
| 207 | return (unsigned int)-1; |
| 208 | } |
| 209 | shift -= 4; |
| 210 | cp |= ((unsigned int)c << shift); |
| 211 | if (!shift) |
| 212 | return cp; |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | static bool _rjson_read_unicode(rjson_t *json) |
| 217 | { |
| 218 | #define _rJSON_READ_UNICODE_REPLACE_OR_IGNORE \ |
| 219 | if (json->option_flags & (RJSON_OPTION_IGNORE_INVALID_ENCODING \ |
| 220 | | RJSON_OPTION_REPLACE_INVALID_ENCODING)) goto replace_or_ignore; |
| 221 | |
| 222 | unsigned int cp; |
| 223 | |
| 224 | if ((cp = _rjson_get_unicode_cp(json)) == (unsigned int)-1) |
| 225 | return false; |
| 226 | |
| 227 | if (cp >= 0xd800 && cp <= 0xdbff) |
| 228 | { |
| 229 | /* This is the high portion of a surrogate pair; we need to read the |
| 230 | * lower portion to get the codepoint */ |
| 231 | unsigned int l, h = cp; |
| 232 | |
| 233 | _rjson_char_t c = _rjson_char_get(json); |
| 234 | if (c == _rJSON_EOF) |
| 235 | { |
| 236 | _rjson_error(json, "unterminated string literal in Unicode"); |
| 237 | return false; |
| 238 | } |
| 239 | if (c != '\\') |
| 240 | { |
| 241 | _rjson_error_char(json, "invalid continuation %s" |
| 242 | " for surrogate pair, expected '\\'", c); |
| 243 | return false; |
| 244 | } |
| 245 | |
| 246 | c = _rjson_char_get(json); |
| 247 | if (c == _rJSON_EOF) |
| 248 | { |
| 249 | _rjson_error(json, "unterminated string literal in Unicode"); |
| 250 | return false; |
| 251 | } |
| 252 | if (c != 'u') |
| 253 | { |
| 254 | _rjson_error_char(json, "invalid continuation %s" |
| 255 | " for surrogate pair, expected 'u'", c); |
| 256 | return false; |
| 257 | } |
| 258 | if ((l = _rjson_get_unicode_cp(json)) == (unsigned int)-1) |
| 259 | return false; |
| 260 | if (l < 0xdc00 || l > 0xdfff) |
| 261 | { |
| 262 | _rJSON_READ_UNICODE_REPLACE_OR_IGNORE |
| 263 | _rjson_error(json, "surrogate pair continuation \\u%04x out " |
| 264 | "of range (dc00-dfff)", l); |
| 265 | return false; |
| 266 | } |
| 267 | cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000); |
| 268 | } |
| 269 | else if (cp >= 0xdc00 && cp <= 0xdfff) |
| 270 | { |
| 271 | _rJSON_READ_UNICODE_REPLACE_OR_IGNORE |
| 272 | _rjson_error(json, "dangling surrogate \\u%04x", cp); |
| 273 | return false; |
| 274 | } |
| 275 | |
| 276 | if (cp < 0x80UL) |
| 277 | return _rjson_pushchar(json, cp); |
| 278 | |
| 279 | if (cp < 0x0800UL) |
| 280 | return (_rjson_pushchar(json, (cp >> 6 & 0x1F) | 0xC0) && |
| 281 | _rjson_pushchar(json, (cp >> 0 & 0x3F) | 0x80)); |
| 282 | |
| 283 | if (cp < 0x010000UL) |
| 284 | { |
| 285 | if (cp >= 0xd800 && cp <= 0xdfff) |
| 286 | { |
| 287 | _rJSON_READ_UNICODE_REPLACE_OR_IGNORE |
| 288 | _rjson_error(json, "invalid codepoint %04x", cp); |
| 289 | return false; |
| 290 | } |
| 291 | return (_rjson_pushchar(json, (cp >> 12 & 0x0F) | 0xE0) && |
| 292 | _rjson_pushchar(json, (cp >> 6 & 0x3F) | 0x80) && |
| 293 | _rjson_pushchar(json, (cp >> 0 & 0x3F) | 0x80)); |
| 294 | } |
| 295 | if (cp < 0x110000UL) |
| 296 | return (_rjson_pushchar(json, (cp >> 18 & 0x07) | 0xF0) && |
| 297 | _rjson_pushchar(json, (cp >> 12 & 0x3F) | 0x80) && |
| 298 | _rjson_pushchar(json, (cp >> 6 & 0x3F) | 0x80) && |
| 299 | _rjson_pushchar(json, (cp >> 0 & 0x3F) | 0x80)); |
| 300 | |
| 301 | _rJSON_READ_UNICODE_REPLACE_OR_IGNORE |
| 302 | _rjson_error(json, "unable to encode %04x as UTF-8", cp); |
| 303 | return false; |
| 304 | |
| 305 | replace_or_ignore: |
| 306 | return ((json->option_flags & RJSON_OPTION_IGNORE_INVALID_ENCODING) || |
| 307 | _rjson_pushchar(json, '?')); |
| 308 | #undef _rJSON_READ_UNICODE_REPLACE_OR_IGNORE |
| 309 | } |
| 310 | |
| 311 | static bool _rjson_validate_utf8(rjson_t *json) |
| 312 | { |
| 313 | unsigned char first, c; |
| 314 | unsigned char *p; |
| 315 | unsigned char *from = (unsigned char *) |
| 316 | (json->string_pass_through ? json->string_pass_through : json->string); |
| 317 | unsigned char *to = from + json->string_len; |
| 318 | |
| 319 | if (json->option_flags & RJSON_OPTION_IGNORE_INVALID_ENCODING) |
| 320 | return true; |
| 321 | |
| 322 | for (;;) |
| 323 | { |
| 324 | if (from == to) |
| 325 | return true; |
| 326 | first = *from; |
| 327 | if (first <= 0x7F) /* ASCII */ |
| 328 | { |
| 329 | from++; |
| 330 | continue; |
| 331 | } |
| 332 | p = from; |
| 333 | /* Continuation or overlong encoding of an ASCII byte */ |
| 334 | if (first <= 0xC1) |
| 335 | goto invalid_utf8; |
| 336 | if (first <= 0xDF) |
| 337 | { |
| 338 | if ((from = p + 2) > to) |
| 339 | goto invalid_utf8; |
| 340 | continue_length_2: |
| 341 | c = p[1]; |
| 342 | switch (first) |
| 343 | { |
| 344 | case 0xE0: |
| 345 | c = (c < 0xA0 || c > 0xBF); |
| 346 | break; |
| 347 | case 0xED: |
| 348 | c = (c < 0x80 || c > 0x9F); |
| 349 | break; |
| 350 | case 0xF0: |
| 351 | c = (c < 0x90 || c > 0xBF); |
| 352 | break; |
| 353 | case 0xF4: |
| 354 | c = (c < 0x80 || c > 0x8F); |
| 355 | break; |
| 356 | default: |
| 357 | c = (c < 0x80 || c > 0xBF); |
| 358 | break; |
| 359 | } |
| 360 | if (c) |
| 361 | goto invalid_utf8; |
| 362 | } |
| 363 | else if (first <= 0xEF) |
| 364 | { |
| 365 | if ((from = p + 3) > to) |
| 366 | goto invalid_utf8; |
| 367 | continue_length_3: |
| 368 | if ((c = p[2]) < 0x80 || c > 0xBF) |
| 369 | goto invalid_utf8; |
| 370 | goto continue_length_2; |
| 371 | } |
| 372 | else if (first <= 0xF4) |
| 373 | { |
| 374 | if ((from = p + 4) > to) |
| 375 | goto invalid_utf8; |
| 376 | if ((c = p[3]) < 0x80 || c > 0xBF) |
| 377 | goto invalid_utf8; |
| 378 | goto continue_length_3; |
| 379 | } |
| 380 | else |
| 381 | goto invalid_utf8; /* length 5 or 6 or invalid UTF-8 */ |
| 382 | continue; |
| 383 | invalid_utf8: |
| 384 | if (!(json->option_flags & RJSON_OPTION_REPLACE_INVALID_ENCODING)) |
| 385 | { |
| 386 | _rjson_error(json, "invalid UTF-8 character in string"); |
| 387 | return false; |
| 388 | } |
| 389 | from = p; |
| 390 | *from++ = '?'; |
| 391 | while (from != to && (*from & 0x80)) |
| 392 | *from++ = '?'; |
| 393 | } |
| 394 | } |
| 395 | |
| 396 | static enum rjson_type _rjson_read_string(rjson_t *json) |
| 397 | { |
| 398 | const unsigned char *p = json->input_p, *raw = p; |
| 399 | const unsigned char *end = json->input_end; |
| 400 | unsigned char utf8mask = 0; |
| 401 | json->string_pass_through = NULL; |
| 402 | json->string_len = 0; |
| 403 | |
| 404 | for (;;) |
| 405 | { |
| 406 | if (_rJSON_LIKELY(p != end)) |
| 407 | { |
| 408 | unsigned char c = *p; |
| 409 | if (_rJSON_LIKELY(c != '"' && c != '\\' && c >= 0x20)) |
| 410 | { |
| 411 | /* handle most common case first, it's faster */ |
| 412 | utf8mask |= c; |
| 413 | p++; |
| 414 | } |
| 415 | else if (c == '"') |
| 416 | { |
| 417 | json->input_p = p + 1; |
| 418 | if (json->string_len == 0 && p + 1 != end) |
| 419 | { |
| 420 | /* raw string fully inside input buffer, pass through */ |
| 421 | json->string_len = p - raw; |
| 422 | json->string_pass_through = (char*)raw; |
| 423 | } |
| 424 | else if (raw != p && !_rjson_pushchars(json, raw, p)) /* OOM */ |
| 425 | return RJSON_ERROR; |
| 426 | /* Contains invalid UTF-8 byte sequences */ |
| 427 | if ((utf8mask & 0x80) && !_rjson_validate_utf8(json)) |
| 428 | return RJSON_ERROR; |
| 429 | return RJSON_STRING; |
| 430 | } |
| 431 | else if (c == '\\') |
| 432 | { |
| 433 | _rjson_char_t esc; |
| 434 | if (raw != p) |
| 435 | { |
| 436 | /* Can't pass through string with escapes, use string buffer */ |
| 437 | if (!_rjson_pushchars(json, raw, p)) |
| 438 | return RJSON_ERROR; |
| 439 | } |
| 440 | json->input_p = p + 1; |
| 441 | esc = _rjson_char_get(json); |
| 442 | switch (esc) |
| 443 | { |
| 444 | case 'u': |
| 445 | if (!_rjson_read_unicode(json)) |
| 446 | return RJSON_ERROR; |
| 447 | break; |
| 448 | |
| 449 | case 'b': |
| 450 | esc = '\b'; |
| 451 | goto escape_pushchar; |
| 452 | case 'f': |
| 453 | esc = '\f'; |
| 454 | goto escape_pushchar; |
| 455 | case 'n': |
| 456 | esc = '\n'; |
| 457 | goto escape_pushchar; |
| 458 | case 'r': |
| 459 | if (!(json->option_flags & RJSON_OPTION_IGNORE_STRING_CARRIAGE_RETURN)) |
| 460 | { |
| 461 | esc = '\r'; |
| 462 | goto escape_pushchar; |
| 463 | } |
| 464 | break; |
| 465 | case 't': |
| 466 | esc = '\t'; |
| 467 | goto escape_pushchar; |
| 468 | case '/': |
| 469 | case '"': |
| 470 | case '\\': |
| 471 | escape_pushchar: |
| 472 | if (!_rjson_pushchar(json, esc)) |
| 473 | return RJSON_ERROR; |
| 474 | break; |
| 475 | |
| 476 | case _rJSON_EOF: |
| 477 | return _rjson_error(json, "unterminated string literal in escape"); |
| 478 | |
| 479 | default: |
| 480 | return _rjson_error_char(json, "invalid escaped %s", esc); |
| 481 | } |
| 482 | raw = p = json->input_p; |
| 483 | end = json->input_end; |
| 484 | } |
| 485 | else if (!(json->option_flags & RJSON_OPTION_ALLOW_UNESCAPED_CONTROL_CHARACTERS)) |
| 486 | return _rjson_error_char(json, "unescaped control character %s in string", c); |
| 487 | else |
| 488 | p++; |
| 489 | } |
| 490 | else |
| 491 | { |
| 492 | if (raw != p) |
| 493 | { |
| 494 | /* not fully inside input buffer, copy to string buffer */ |
| 495 | if (!_rjson_pushchars(json, raw, p)) |
| 496 | return RJSON_ERROR; |
| 497 | } |
| 498 | if (!_rjson_io_input(json)) |
| 499 | return _rjson_error(json, "unterminated string literal"); |
| 500 | raw = p = json->input_p; |
| 501 | end = json->input_end; |
| 502 | } |
| 503 | } |
| 504 | } |
| 505 | |
| 506 | static enum rjson_type _rjson_read_number(rjson_t *json) |
| 507 | { |
| 508 | const unsigned char *p = json->input_p - 1; |
| 509 | const unsigned char *end = json->input_end; |
| 510 | const unsigned char *start = p; |
| 511 | |
| 512 | json->string_len = 0; |
| 513 | json->string_pass_through = NULL; |
| 514 | for (;;) |
| 515 | { |
| 516 | if (_rJSON_LIKELY(p != end)) |
| 517 | { |
| 518 | switch (*p++) |
| 519 | { |
| 520 | case '+': case '-': case '.': |
| 521 | case '0': case '1': case '2': case '3': case '4': |
| 522 | case '5': case '6': case '7': case '8': case '9': |
| 523 | case 'E': case 'e': |
| 524 | continue; |
| 525 | } |
| 526 | p--; |
| 527 | json->input_p = p; |
| 528 | if (!_rjson_pushchars(json, start, p)) |
| 529 | return RJSON_ERROR; /* out of memory */ |
| 530 | break; |
| 531 | } |
| 532 | else |
| 533 | { |
| 534 | /* number sequences are always copied to the string buffer */ |
| 535 | if (!_rjson_pushchars(json, start, p)) |
| 536 | return RJSON_ERROR; |
| 537 | if (!_rjson_io_input(json)) |
| 538 | { |
| 539 | /* EOF here is not an error for a number */ |
| 540 | json->input_p = json->input_end; |
| 541 | break; |
| 542 | } |
| 543 | start = p = json->input_p; |
| 544 | end = json->input_end; |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | p = (const unsigned char *)json->string; |
| 549 | end = (p + json->string_len); |
| 550 | |
| 551 | /* validate json number */ |
| 552 | if (*p == '-' && ++p == end) |
| 553 | goto invalid_number; |
| 554 | if (*p == '0') |
| 555 | { |
| 556 | if (++p == end) |
| 557 | return RJSON_NUMBER; |
| 558 | } |
| 559 | else |
| 560 | { |
| 561 | if (*p < '1' || *p > '9') |
| 562 | goto invalid_number; |
| 563 | do |
| 564 | { |
| 565 | if (++p == end) |
| 566 | return RJSON_NUMBER; |
| 567 | } |
| 568 | while (*p >= '0' && *p <= '9'); |
| 569 | } |
| 570 | if (*p == '.') |
| 571 | { |
| 572 | if (++p == end) |
| 573 | goto invalid_number; |
| 574 | if (*p < '0' || *p > '9') |
| 575 | goto invalid_number; |
| 576 | do |
| 577 | { |
| 578 | if (++p == end) |
| 579 | return RJSON_NUMBER; |
| 580 | } |
| 581 | while (*p >= '0' && *p <= '9'); |
| 582 | } |
| 583 | if (((*p)|0x20) == 'e') |
| 584 | { |
| 585 | if (++p == end) |
| 586 | goto invalid_number; |
| 587 | if ((*p == '-' || *p == '+') && ++p == end) |
| 588 | goto invalid_number; |
| 589 | if (*p < '0' || *p > '9') |
| 590 | goto invalid_number; |
| 591 | do |
| 592 | { |
| 593 | if (++p == end) |
| 594 | return RJSON_NUMBER; |
| 595 | } |
| 596 | while (*p >= '0' && *p <= '9'); |
| 597 | } |
| 598 | invalid_number: |
| 599 | return _rjson_error_char(json, "unexpected %s in number", |
| 600 | (p == json->input_end ? _rJSON_EOF : p[p == end ? -1 : 0])); |
| 601 | } |
| 602 | |
| 603 | static enum rjson_type _rjson_push_stack(rjson_t *json, enum _rjson_token t) |
| 604 | { |
| 605 | if (json->stack_top + 1 == json->stack + json->stack_cap) |
| 606 | { |
| 607 | /* reached allocated stack size, either reallocate or abort */ |
| 608 | unsigned int new_stack_cap; |
| 609 | struct _rjson_stack *new_stack; |
| 610 | size_t stack_alloc; |
| 611 | if (json->stack_cap == json->stack_max) |
| 612 | return _rjson_error(json, "maximum depth of nesting reached"); |
| 613 | |
| 614 | new_stack_cap = json->stack_cap + 4; |
| 615 | if (new_stack_cap > json->stack_max) |
| 616 | new_stack_cap = json->stack_max; |
| 617 | stack_alloc = new_stack_cap * sizeof(struct _rjson_stack); |
| 618 | if (json->stack != json->inline_stack) |
| 619 | new_stack = (struct _rjson_stack *)realloc(json->stack, stack_alloc); |
| 620 | else if ((new_stack = (struct _rjson_stack*)malloc(stack_alloc)) != NULL) |
| 621 | memcpy(new_stack, json->inline_stack, sizeof(json->inline_stack)); |
| 622 | if (!new_stack) |
| 623 | return _rjson_error(json, "out of memory"); |
| 624 | |
| 625 | json->stack = new_stack; |
| 626 | json->stack_top = new_stack + json->stack_cap - 1; |
| 627 | json->stack_cap = new_stack_cap; |
| 628 | } |
| 629 | json->stack_top++; |
| 630 | json->stack_top->count = 0; |
| 631 | return (json->stack_top->type = |
| 632 | (t == _rJSON_TOK_ARRAY ? RJSON_ARRAY : RJSON_OBJECT)); |
| 633 | } |
| 634 | |
| 635 | static enum rjson_type _rjson_read_name(rjson_t *json, const char *pattern, enum rjson_type type) |
| 636 | { |
| 637 | _rjson_char_t c; |
| 638 | const char *p; |
| 639 | for (p = pattern; *p; p++) |
| 640 | { |
| 641 | if ((_rjson_char_t)*p != (c = _rjson_char_get(json))) |
| 642 | return _rjson_error_char(json, "unexpected %s in value", c); |
| 643 | } |
| 644 | return type; |
| 645 | } |
| 646 | |
| 647 | static bool _rjson_optional_skip(rjson_t *json, const unsigned char **p, const unsigned char **end) |
| 648 | { |
| 649 | unsigned char c, skip = (*p)[-1]; |
| 650 | int state = 0; |
| 651 | |
| 652 | if (skip == '/' && !(json->option_flags & RJSON_OPTION_ALLOW_COMMENTS)) |
| 653 | return false; |
| 654 | |
| 655 | if ( skip == 0xEF && (!(json->option_flags & RJSON_OPTION_ALLOW_UTF8BOM) |
| 656 | || json->source_line != 1 || json->source_column_p != json->input_p)) |
| 657 | return false; |
| 658 | |
| 659 | for (;;) |
| 660 | { |
| 661 | if (*p == *end) |
| 662 | { |
| 663 | if (!_rjson_io_input(json)) |
| 664 | { |
| 665 | _rjson_error(json, "unfinished %s", |
| 666 | (skip == '/' ? "comment" : "utf8 byte order mark")); |
| 667 | break; |
| 668 | } |
| 669 | *p = json->input_p; |
| 670 | *end = json->input_end; |
| 671 | } |
| 672 | c = *(*p)++; |
| 673 | if (skip == '/') |
| 674 | { |
| 675 | if (state == 0 && c == '/') |
| 676 | state = 1; |
| 677 | else if (state == 0 && c == '*') |
| 678 | state = 2; |
| 679 | else if (state == 0) |
| 680 | break; |
| 681 | else if (state == 1 && c == '\n') |
| 682 | return true; |
| 683 | else if (state == 2 && c == '*') |
| 684 | state = 3; |
| 685 | else if (state == 3 && c == '/') |
| 686 | return true; |
| 687 | else if (state == 3 && c != '*') |
| 688 | state = 2; |
| 689 | } |
| 690 | else if (skip == 0xEF) |
| 691 | { |
| 692 | /* Silence warning - state being set never used */ |
| 693 | if (state == 0 && c == 0xBB) |
| 694 | state = 1; |
| 695 | else if (state == 1 && c == 0xBF) |
| 696 | return true; |
| 697 | else |
| 698 | break; |
| 699 | } |
| 700 | } |
| 701 | return false; |
| 702 | } |
| 703 | |
| 704 | enum rjson_type rjson_next(rjson_t *json) |
| 705 | { |
| 706 | unsigned char tok; |
| 707 | struct _rjson_stack *stack = json->stack_top; |
| 708 | const unsigned char *p = json->input_p; |
| 709 | const unsigned char *end = json->input_end; |
| 710 | unsigned char passed_token = false; |
| 711 | |
| 712 | /* JSON token look-up-table */ |
| 713 | static const unsigned char token_lut[256] = |
| 714 | { |
| 715 | #define i _rJSON_TOK_ERROR |
| 716 | /* 0 | 0x00 | */ i,i,i,i,i,i,i,i,i, |
| 717 | /* 9 | 0x09 |\t */ _rJSON_TOK_WHITESPACE, |
| 718 | /* 10 | 0x0A |\n */ _rJSON_TOK_NEWLINE, i,i, |
| 719 | /* 13 | 0x0D |\r */ _rJSON_TOK_WHITESPACE, i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i, |
| 720 | /* 32 | 0x20 | */ _rJSON_TOK_WHITESPACE, i, |
| 721 | /* 34 | 0x22 | " */ _rJSON_TOK_STRING, i,i,i,i,i,i,i,i,i, |
| 722 | /* 44 | 0x2C | , */ _rJSON_TOK_COMMA, |
| 723 | /* 45 | 0x2D | - */ _rJSON_TOK_NUMBER, i, |
| 724 | /* 47 | 0x2F | / */ _rJSON_TOK_OPTIONAL_SKIP, |
| 725 | /* 48 | 0x30 | 0 */ _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, |
| 726 | /* 53 | 0x35 | 5 */ _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, |
| 727 | /* 58 | 0x3A | : */ _rJSON_TOK_COLON, i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i, |
| 728 | /* 91 | 0x5B | [ */ _rJSON_TOK_ARRAY, i, |
| 729 | /* 93 | 0x5D | ] */ _rJSON_TOK_ARRAY_END, i,i,i,i,i,i,i,i, |
| 730 | /* 102 | 0x66 | f */ _rJSON_TOK_FALSE, i,i,i,i,i,i,i, |
| 731 | /* 110 | 0x6E | n */ _rJSON_TOK_NULL, i,i,i,i,i, |
| 732 | /* 116 | 0x74 | t */ _rJSON_TOK_TRUE, i,i,i,i,i,i, |
| 733 | /* 123 | 0x7B | { */ _rJSON_TOK_OBJECT, i, |
| 734 | /* 125 | 0x7D | } */ _rJSON_TOK_OBJECT_END, |
| 735 | /* 126 | 0x7E | ~ */ i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i, |
| 736 | /* 164 | 0xA4 | */ i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i, |
| 737 | /* 202 | 0xCA | */ i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i, |
| 738 | /* 239 | 0xEF | */ _rJSON_TOK_OPTIONAL_SKIP, i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i |
| 739 | #undef i |
| 740 | }; |
| 741 | |
| 742 | if (_rJSON_LIKELY(stack->type != RJSON_ERROR)) |
| 743 | { |
| 744 | for (;;) |
| 745 | { |
| 746 | if (_rJSON_LIKELY(p != end)) |
| 747 | { |
| 748 | tok = token_lut[*p++]; |
| 749 | if (_rJSON_LIKELY(tok > _rJSON_TOK_OPTIONAL_SKIP)) |
| 750 | { |
| 751 | /* Actual JSON token, process below */ |
| 752 | } |
| 753 | else if (_rJSON_LIKELY(tok == _rJSON_TOK_WHITESPACE)) |
| 754 | continue; |
| 755 | else if (tok == _rJSON_TOK_NEWLINE) |
| 756 | { |
| 757 | json->source_line++; |
| 758 | json->source_column_p = p; |
| 759 | continue; |
| 760 | } |
| 761 | else if (tok == _rJSON_TOK_OPTIONAL_SKIP) |
| 762 | { |
| 763 | if (_rjson_optional_skip(json, &p, &end)) |
| 764 | continue; |
| 765 | } |
| 766 | } |
| 767 | else if (_rJSON_LIKELY(_rjson_io_input(json))) |
| 768 | { |
| 769 | p = json->input_p; |
| 770 | end = json->input_end; |
| 771 | continue; |
| 772 | } |
| 773 | else |
| 774 | { |
| 775 | p = json->input_end; |
| 776 | tok = _rJSON_TOK_EOF; |
| 777 | } |
| 778 | |
| 779 | if (stack->type == RJSON_OBJECT) |
| 780 | { |
| 781 | if (stack->count & 1) |
| 782 | { |
| 783 | /* Expecting colon followed by value. */ |
| 784 | if (passed_token) |
| 785 | goto read_value; |
| 786 | if (_rJSON_LIKELY(tok == _rJSON_TOK_COLON)) |
| 787 | { |
| 788 | passed_token = true; |
| 789 | continue; |
| 790 | } |
| 791 | json->input_p = p; |
| 792 | return _rjson_error_token(json, |
| 793 | "expected ':' not %s after member name", (enum _rjson_token)tok); |
| 794 | } |
| 795 | if (passed_token) |
| 796 | { |
| 797 | if (_rJSON_LIKELY(tok == _rJSON_TOK_STRING)) |
| 798 | goto read_value; |
| 799 | json->input_p = p; |
| 800 | return _rjson_error(json, "expected member name after ','"); |
| 801 | } |
| 802 | if (tok == _rJSON_TOK_OBJECT_END) |
| 803 | { |
| 804 | json->input_p = p; |
| 805 | json->stack_top--; |
| 806 | return RJSON_OBJECT_END; |
| 807 | } |
| 808 | if (stack->count == 0) |
| 809 | { |
| 810 | /* No member name/value pairs yet. */ |
| 811 | if (_rJSON_LIKELY(tok == _rJSON_TOK_STRING)) |
| 812 | goto read_value; |
| 813 | json->input_p = p; |
| 814 | return _rjson_error(json, "expected member name or '}'"); |
| 815 | } |
| 816 | /* Expecting comma followed by member name. */ |
| 817 | if (_rJSON_LIKELY(tok == _rJSON_TOK_COMMA)) |
| 818 | { |
| 819 | passed_token = true; |
| 820 | continue; |
| 821 | } |
| 822 | json->input_p = p; |
| 823 | return _rjson_error_token(json, |
| 824 | "expected ',' or '}' not %s after member value", (enum _rjson_token)tok); |
| 825 | } |
| 826 | else if (stack->type == RJSON_ARRAY) |
| 827 | { |
| 828 | if (passed_token) |
| 829 | goto read_value; |
| 830 | if (tok == _rJSON_TOK_ARRAY_END) |
| 831 | { |
| 832 | json->input_p = p; |
| 833 | json->stack_top--; |
| 834 | return RJSON_ARRAY_END; |
| 835 | } |
| 836 | if (stack->count == 0) |
| 837 | goto read_value; |
| 838 | if (_rJSON_LIKELY(tok == _rJSON_TOK_COMMA)) |
| 839 | { |
| 840 | passed_token = true; |
| 841 | continue; |
| 842 | } |
| 843 | json->input_p = p; |
| 844 | return _rjson_error_token(json, |
| 845 | "expected ',' or ']' not %s in array", (enum _rjson_token)tok); |
| 846 | } |
| 847 | else |
| 848 | { |
| 849 | if (_rJSON_LIKELY(!stack->count && tok != _rJSON_TOK_EOF)) |
| 850 | goto read_value; |
| 851 | json->input_p = p; |
| 852 | if (!stack->count) |
| 853 | return _rjson_error(json, "reached end without any data"); |
| 854 | if (tok == _rJSON_TOK_EOF) |
| 855 | return RJSON_DONE; |
| 856 | if (!(json->option_flags & RJSON_OPTION_ALLOW_TRAILING_DATA)) |
| 857 | return _rjson_error_token(json, |
| 858 | "expected end of stream instead of %s", (enum _rjson_token)tok); |
| 859 | json->input_p--; |
| 860 | return RJSON_DONE; |
| 861 | } |
| 862 | |
| 863 | /* read value for current token */ |
| 864 | read_value: |
| 865 | json->input_p = p; |
| 866 | stack->count++; |
| 867 | /* This is optimal when there are many strings, otherwise a switch statement |
| 868 | * or a function pointer table is better (depending on compiler/cpu) */ |
| 869 | if (tok == _rJSON_TOK_STRING) |
| 870 | return _rjson_read_string(json); |
| 871 | else if (tok == _rJSON_TOK_NUMBER) |
| 872 | return _rjson_read_number(json); |
| 873 | else if (tok == _rJSON_TOK_OBJECT) |
| 874 | return _rjson_push_stack(json, _rJSON_TOK_OBJECT); |
| 875 | else if (tok == _rJSON_TOK_ARRAY) |
| 876 | return _rjson_push_stack(json, _rJSON_TOK_ARRAY); |
| 877 | else if (tok == _rJSON_TOK_TRUE) |
| 878 | return _rjson_read_name(json, "rue", RJSON_TRUE); |
| 879 | else if (tok == _rJSON_TOK_FALSE) |
| 880 | return _rjson_read_name(json, "alse", RJSON_FALSE); |
| 881 | else if (tok == _rJSON_TOK_NULL) |
| 882 | return _rjson_read_name(json, "ull", RJSON_NULL); |
| 883 | else return _rjson_error_token(json, |
| 884 | "unexpected %s in value", (enum _rjson_token)tok); |
| 885 | } |
| 886 | } |
| 887 | return RJSON_ERROR; |
| 888 | } |
| 889 | |
| 890 | void _rjson_setup(rjson_t *json, rjson_io_t io, void *user_data, int input_len) |
| 891 | { |
| 892 | json->io = io; |
| 893 | json->user_data = user_data; |
| 894 | json->input_len = input_len; |
| 895 | json->input_p = json->input_end = json->input_buf + input_len; |
| 896 | |
| 897 | json->stack = json->inline_stack; |
| 898 | json->stack_top = json->stack; |
| 899 | json->stack_top->type = RJSON_DONE; |
| 900 | json->stack_top->count = 0; |
| 901 | json->stack_cap = (unsigned int)(sizeof(json->inline_stack) / sizeof(json->inline_stack[0])); |
| 902 | json->stack_max = (unsigned int)50; |
| 903 | |
| 904 | json->string = json->inline_string; |
| 905 | json->string_pass_through = NULL; |
| 906 | json->string_len = 0; |
| 907 | json->string_cap = sizeof(json->inline_string); |
| 908 | |
| 909 | json->source_line = 1; |
| 910 | json->source_column_p = json->input_p; |
| 911 | json->option_flags = 0; |
| 912 | json->decimal_sep = 0; |
| 913 | } |
| 914 | |
| 915 | rjson_t *rjson_open_user(rjson_io_t io, void *user_data, int io_block_size) |
| 916 | { |
| 917 | rjson_t* json = (rjson_t*)malloc( |
| 918 | sizeof(rjson_t) - sizeof(((rjson_t*)0)->input_buf) + io_block_size); |
| 919 | if (json) _rjson_setup(json, io, user_data, io_block_size); |
| 920 | return json; |
| 921 | } |
| 922 | |
| 923 | static int _rjson_buffer_io(void* buf, int len, void *user) |
| 924 | { |
| 925 | const char **ud = (const char **)user; |
| 926 | if (ud[1] - ud[0] < len) len = (int)(ud[1] - ud[0]); |
| 927 | memcpy(buf, ud[0], len); |
| 928 | ud[0] += len; |
| 929 | return len; |
| 930 | } |
| 931 | |
| 932 | rjson_t *rjson_open_buffer(const void *buffer, size_t size) |
| 933 | { |
| 934 | rjson_t *json = (rjson_t *)malloc(sizeof(rjson_t) + sizeof(const char *)*2); |
| 935 | const char **ud = (const char **)(json + 1); |
| 936 | if (!json) |
| 937 | return NULL; |
| 938 | ud[0] = (const char *)buffer; |
| 939 | ud[1] = ud[0] + size; |
| 940 | _rjson_setup(json, _rjson_buffer_io, (void*)ud, sizeof(json->input_buf)); |
| 941 | return json; |
| 942 | } |
| 943 | |
| 944 | rjson_t *rjson_open_string(const char *string, size_t len) |
| 945 | { |
| 946 | return rjson_open_buffer(string, len); |
| 947 | } |
| 948 | |
| 949 | static int _rjson_stream_io(void* buf, int len, void *user) |
| 950 | { |
| 951 | return (int)intfstream_read((intfstream_t*)user, buf, (uint64_t)len); |
| 952 | } |
| 953 | |
| 954 | rjson_t *rjson_open_stream(struct intfstream_internal *stream) |
| 955 | { |
| 956 | /* Allocate an input buffer based on the file size */ |
| 957 | int64_t size = intfstream_get_size(stream); |
| 958 | int io_size = |
| 959 | (size > 1024*1024 ? 4096 : |
| 960 | (size > 256*1024 ? 2048 : 1024)); |
| 961 | return rjson_open_user(_rjson_stream_io, stream, io_size); |
| 962 | } |
| 963 | |
| 964 | static int _rjson_rfile_io(void* buf, int len, void *user) |
| 965 | { |
| 966 | return (int)filestream_read((RFILE*)user, buf, (int64_t)len); |
| 967 | } |
| 968 | |
| 969 | rjson_t *rjson_open_rfile(RFILE *rfile) |
| 970 | { |
| 971 | /* Allocate an input buffer based on the file size */ |
| 972 | int64_t size = filestream_get_size(rfile); |
| 973 | int io_size = |
| 974 | (size > 1024*1024 ? 4096 : |
| 975 | (size > 256*1024 ? 2048 : 1024)); |
| 976 | return rjson_open_user(_rjson_rfile_io, rfile, io_size); |
| 977 | } |
| 978 | |
| 979 | void rjson_set_options(rjson_t *json, char rjson_option_flags) |
| 980 | { |
| 981 | json->option_flags = rjson_option_flags; |
| 982 | } |
| 983 | |
| 984 | void rjson_set_max_depth(rjson_t *json, unsigned int max_depth) |
| 985 | { |
| 986 | json->stack_max = max_depth; |
| 987 | } |
| 988 | |
| 989 | const char *rjson_get_string(rjson_t *json, size_t *length) |
| 990 | { |
| 991 | char* str = (json->string_pass_through |
| 992 | ? json->string_pass_through : json->string); |
| 993 | if (length) |
| 994 | *length = json->string_len; |
| 995 | str[json->string_len] = '\0'; |
| 996 | return str; |
| 997 | } |
| 998 | |
| 999 | double rjson_get_double(rjson_t *json) |
| 1000 | { |
| 1001 | char* str = (json->string_pass_through ? json->string_pass_through : json->string); |
| 1002 | str[json->string_len] = '\0'; |
| 1003 | if (json->decimal_sep != '.') |
| 1004 | { |
| 1005 | /* handle locale that uses a non-standard decimal separator */ |
| 1006 | char *p; |
| 1007 | if (json->decimal_sep == 0) |
| 1008 | { |
| 1009 | char test[4]; |
| 1010 | snprintf(test, sizeof(test), "%.1f", 0.0f); |
| 1011 | json->decimal_sep = test[1]; |
| 1012 | } |
| 1013 | if (json->decimal_sep != '.' && (p = strchr(str, '.')) != NULL) |
| 1014 | { |
| 1015 | double res; |
| 1016 | *p = json->decimal_sep; |
| 1017 | res = atof(str); |
| 1018 | *p = '.'; |
| 1019 | return res; |
| 1020 | } |
| 1021 | } |
| 1022 | return atof(str); |
| 1023 | } |
| 1024 | |
| 1025 | int rjson_get_int(rjson_t *json) |
| 1026 | { |
| 1027 | char* str = (json->string_pass_through ? json->string_pass_through : json->string); |
| 1028 | str[json->string_len] = '\0'; |
| 1029 | return atoi(str); |
| 1030 | } |
| 1031 | |
| 1032 | const char *rjson_get_error(rjson_t *json) |
| 1033 | { |
| 1034 | return (json->stack_top->type == RJSON_ERROR ? json->error_text : ""); |
| 1035 | } |
| 1036 | |
| 1037 | void rjson_set_error(rjson_t *json, const char* error) |
| 1038 | { |
| 1039 | _rjson_error(json, "%s", error); |
| 1040 | } |
| 1041 | |
| 1042 | size_t rjson_get_source_line(rjson_t *json) |
| 1043 | { |
| 1044 | return json->source_line; |
| 1045 | } |
| 1046 | |
| 1047 | size_t rjson_get_source_column(rjson_t *json) |
| 1048 | { |
| 1049 | return (json->input_p == json->source_column_p ? 1 : |
| 1050 | json->input_p - json->source_column_p); |
| 1051 | } |
| 1052 | |
| 1053 | int rjson_get_source_context_len(rjson_t *json) |
| 1054 | { |
| 1055 | const unsigned char *from = json->input_buf, *to = json->input_end, *p = json->input_p; |
| 1056 | return (int)(((p + 256 < to ? p + 256 : to) - (p > from + 256 ? p - 256 : from))); |
| 1057 | } |
| 1058 | |
| 1059 | const char* rjson_get_source_context_buf(rjson_t *json) |
| 1060 | { |
| 1061 | /* inside the input buffer, some " may have been replaced with \0. */ |
| 1062 | const unsigned char *p = json->input_p, *from = json->input_buf; |
| 1063 | unsigned char *i = json->input_buf; |
| 1064 | for (; i != json->input_end; i++) |
| 1065 | { |
| 1066 | if (*i == '\0') |
| 1067 | *i = '"'; |
| 1068 | } |
| 1069 | return (const char*)(p > from + 256 ? p - 256 : from); |
| 1070 | } |
| 1071 | |
| 1072 | bool rjson_check_context(rjson_t *json, unsigned int depth, ...) |
| 1073 | { |
| 1074 | va_list ap; |
| 1075 | const struct _rjson_stack *stack = json->stack, *stack_top = json->stack_top; |
| 1076 | if ((unsigned int)(stack_top - stack) != depth) |
| 1077 | return false; |
| 1078 | va_start(ap, depth); |
| 1079 | while (++stack <= stack_top) |
| 1080 | { |
| 1081 | if (va_arg(ap, int) == (int)stack->type) continue; |
| 1082 | va_end(ap); |
| 1083 | return false; |
| 1084 | } |
| 1085 | va_end(ap); |
| 1086 | return true; |
| 1087 | } |
| 1088 | |
| 1089 | unsigned int rjson_get_context_depth(rjson_t *json) |
| 1090 | { |
| 1091 | return (unsigned int)(json->stack_top - json->stack); |
| 1092 | } |
| 1093 | |
| 1094 | size_t rjson_get_context_count(rjson_t *json) |
| 1095 | { |
| 1096 | return json->stack_top->count; |
| 1097 | } |
| 1098 | |
| 1099 | enum rjson_type rjson_get_context_type(rjson_t *json) |
| 1100 | { |
| 1101 | return json->stack_top->type; |
| 1102 | } |
| 1103 | |
| 1104 | void rjson_free(rjson_t *json) |
| 1105 | { |
| 1106 | if (json->stack != json->inline_stack) |
| 1107 | free(json->stack); |
| 1108 | if (json->string != json->inline_string) |
| 1109 | free(json->string); |
| 1110 | free(json); |
| 1111 | } |
| 1112 | |
| 1113 | static bool _rjson_nop_default(void *context) { return true; } |
| 1114 | static bool _rjson_nop_string(void *context, const char *value, size_t length) { return true; } |
| 1115 | static bool _rjson_nop_bool(void *context, bool value) { return true; } |
| 1116 | |
| 1117 | enum rjson_type rjson_parse(rjson_t *json, void* context, |
| 1118 | bool (*object_member_handler)(void *context, const char *str, size_t len), |
| 1119 | bool (*string_handler )(void *context, const char *str, size_t len), |
| 1120 | bool (*number_handler )(void *context, const char *str, size_t len), |
| 1121 | bool (*start_object_handler )(void *context), |
| 1122 | bool (*end_object_handler )(void *context), |
| 1123 | bool (*start_array_handler )(void *context), |
| 1124 | bool (*end_array_handler )(void *context), |
| 1125 | bool (*boolean_handler )(void *context, bool value), |
| 1126 | bool (*null_handler )(void *context)) |
| 1127 | { |
| 1128 | bool in_object = false; |
| 1129 | size_t len; |
| 1130 | const char* string; |
| 1131 | if (!object_member_handler) object_member_handler = _rjson_nop_string; |
| 1132 | if (!string_handler ) string_handler = _rjson_nop_string; |
| 1133 | if (!number_handler ) number_handler = _rjson_nop_string; |
| 1134 | if (!start_object_handler ) start_object_handler = _rjson_nop_default; |
| 1135 | if (!end_object_handler ) end_object_handler = _rjson_nop_default; |
| 1136 | if (!start_array_handler ) start_array_handler = _rjson_nop_default; |
| 1137 | if (!end_array_handler ) end_array_handler = _rjson_nop_default; |
| 1138 | if (!boolean_handler ) boolean_handler = _rjson_nop_bool; |
| 1139 | if (!null_handler ) null_handler = _rjson_nop_default; |
| 1140 | for (;;) |
| 1141 | { |
| 1142 | switch (rjson_next(json)) |
| 1143 | { |
| 1144 | case RJSON_STRING: |
| 1145 | string = rjson_get_string(json, &len); |
| 1146 | if (_rJSON_LIKELY( |
| 1147 | (in_object && (json->stack_top->count & 1) ? |
| 1148 | object_member_handler : string_handler) |
| 1149 | (context, string, len))) |
| 1150 | continue; |
| 1151 | return RJSON_STRING; |
| 1152 | case RJSON_NUMBER: |
| 1153 | string = rjson_get_string(json, &len); |
| 1154 | if (_rJSON_LIKELY(number_handler(context, string, len))) |
| 1155 | continue; |
| 1156 | return RJSON_NUMBER; |
| 1157 | case RJSON_OBJECT: |
| 1158 | in_object = true; |
| 1159 | if (_rJSON_LIKELY(start_object_handler(context))) |
| 1160 | continue; |
| 1161 | return RJSON_OBJECT; |
| 1162 | case RJSON_ARRAY: |
| 1163 | in_object = false; |
| 1164 | if (_rJSON_LIKELY(start_array_handler(context))) |
| 1165 | continue; |
| 1166 | return RJSON_ARRAY; |
| 1167 | case RJSON_OBJECT_END: |
| 1168 | if (_rJSON_LIKELY(end_object_handler(context))) |
| 1169 | { |
| 1170 | in_object = (json->stack_top->type == RJSON_OBJECT); |
| 1171 | continue; |
| 1172 | } |
| 1173 | return RJSON_OBJECT_END; |
| 1174 | case RJSON_ARRAY_END: |
| 1175 | if (_rJSON_LIKELY(end_array_handler(context))) |
| 1176 | { |
| 1177 | in_object = (json->stack_top->type == RJSON_OBJECT); |
| 1178 | continue; |
| 1179 | } |
| 1180 | return RJSON_ARRAY_END; |
| 1181 | case RJSON_TRUE: |
| 1182 | if (_rJSON_LIKELY(boolean_handler(context, true))) |
| 1183 | continue; |
| 1184 | return RJSON_TRUE; |
| 1185 | case RJSON_FALSE: |
| 1186 | if (_rJSON_LIKELY(boolean_handler(context, false))) |
| 1187 | continue; |
| 1188 | return RJSON_FALSE; |
| 1189 | case RJSON_NULL: |
| 1190 | if (_rJSON_LIKELY(null_handler(context))) |
| 1191 | continue; |
| 1192 | return RJSON_NULL; |
| 1193 | case RJSON_ERROR: |
| 1194 | return RJSON_ERROR; |
| 1195 | case RJSON_DONE: |
| 1196 | return RJSON_DONE; |
| 1197 | } |
| 1198 | } |
| 1199 | } |
| 1200 | |
| 1201 | bool rjson_parse_quick(const char *string, size_t len, void* context, char option_flags, |
| 1202 | bool (*object_member_handler)(void *context, const char *str, size_t len), |
| 1203 | bool (*string_handler )(void *context, const char *str, size_t len), |
| 1204 | bool (*number_handler )(void *context, const char *str, size_t len), |
| 1205 | bool (*start_object_handler )(void *context), |
| 1206 | bool (*end_object_handler )(void *context), |
| 1207 | bool (*start_array_handler )(void *context), |
| 1208 | bool (*end_array_handler )(void *context), |
| 1209 | bool (*boolean_handler )(void *context, bool value), |
| 1210 | bool (*null_handler )(void *context), |
| 1211 | void (*error_handler )(void *context, int line, int col, const char* error)) |
| 1212 | { |
| 1213 | const char *user_data[2]; |
| 1214 | rjson_t json; |
| 1215 | user_data[0] = string; |
| 1216 | user_data[1] = string + len; |
| 1217 | _rjson_setup(&json, _rjson_buffer_io, (void*)user_data, sizeof(json.input_buf)); |
| 1218 | rjson_set_options(&json, option_flags); |
| 1219 | if (rjson_parse(&json, context, |
| 1220 | object_member_handler, string_handler, number_handler, |
| 1221 | start_object_handler, end_object_handler, |
| 1222 | start_array_handler, end_array_handler, |
| 1223 | boolean_handler, null_handler) == RJSON_DONE) |
| 1224 | return true; |
| 1225 | if (error_handler) |
| 1226 | error_handler(context, |
| 1227 | (int)rjson_get_source_line(&json), |
| 1228 | (int)rjson_get_source_column(&json), |
| 1229 | rjson_get_error(&json)); |
| 1230 | return false; |
| 1231 | } |
| 1232 | |
| 1233 | struct rjsonwriter |
| 1234 | { |
| 1235 | char* buf; |
| 1236 | int buf_num, buf_cap; |
| 1237 | |
| 1238 | rjsonwriter_io_t io; |
| 1239 | void *user_data; |
| 1240 | |
| 1241 | const char* error_text; |
| 1242 | char option_flags, decimal_sep; |
| 1243 | bool buf_is_output, final_flush; |
| 1244 | |
| 1245 | char inline_buf[1024]; |
| 1246 | }; |
| 1247 | |
| 1248 | rjsonwriter_t *rjsonwriter_open_user(rjsonwriter_io_t io, void *user_data) |
| 1249 | { |
| 1250 | rjsonwriter_t* writer = (rjsonwriter_t*)malloc(sizeof(rjsonwriter_t)); |
| 1251 | if (!writer) |
| 1252 | return NULL; |
| 1253 | |
| 1254 | writer->buf = writer->inline_buf; |
| 1255 | writer->buf_num = 0; |
| 1256 | writer->buf_cap = sizeof(writer->inline_buf); |
| 1257 | |
| 1258 | writer->error_text = NULL; |
| 1259 | writer->option_flags = writer->decimal_sep = 0; |
| 1260 | writer->buf_is_output = writer->final_flush = false; |
| 1261 | |
| 1262 | writer->io = io; |
| 1263 | writer->user_data = user_data; |
| 1264 | |
| 1265 | return writer; |
| 1266 | } |
| 1267 | |
| 1268 | static int _rjsonwriter_stream_io(const void* buf, int len, void *user) |
| 1269 | { |
| 1270 | return (int)intfstream_write((intfstream_t*)user, buf, (uint64_t)len); |
| 1271 | } |
| 1272 | |
| 1273 | rjsonwriter_t *rjsonwriter_open_stream(struct intfstream_internal *stream) |
| 1274 | { |
| 1275 | return rjsonwriter_open_user(_rjsonwriter_stream_io, stream); |
| 1276 | } |
| 1277 | |
| 1278 | static int _rjsonwriter_rfile_io(const void* buf, int len, void *user) |
| 1279 | { |
| 1280 | return (int)filestream_write((RFILE*)user, buf, (int64_t)len); |
| 1281 | } |
| 1282 | |
| 1283 | rjsonwriter_t *rjsonwriter_open_rfile(RFILE *rfile) |
| 1284 | { |
| 1285 | return rjsonwriter_open_user(_rjsonwriter_rfile_io, rfile); |
| 1286 | } |
| 1287 | |
| 1288 | static int _rjsonwriter_memory_io(const void* buf, int len, void *user) |
| 1289 | { |
| 1290 | rjsonwriter_t *writer = (rjsonwriter_t *)user; |
| 1291 | bool is_append = (buf != writer->buf); |
| 1292 | int new_cap = writer->buf_num + (is_append ? len : 0) + 512; |
| 1293 | if (!writer->final_flush && (is_append || new_cap > writer->buf_cap)) |
| 1294 | { |
| 1295 | bool can_realloc = (writer->buf != writer->inline_buf); |
| 1296 | char* new_buf = (char*)(can_realloc ? realloc(writer->buf, new_cap) : malloc(new_cap)); |
| 1297 | if (!new_buf) |
| 1298 | return 0; |
| 1299 | if (!can_realloc) |
| 1300 | memcpy(new_buf, writer->buf, writer->buf_num); |
| 1301 | if (is_append) |
| 1302 | { |
| 1303 | memcpy(new_buf + writer->buf_num, buf, len); |
| 1304 | writer->buf_num += len; |
| 1305 | } |
| 1306 | writer->buf = new_buf; |
| 1307 | writer->buf_cap = new_cap; |
| 1308 | } |
| 1309 | return len; |
| 1310 | } |
| 1311 | |
| 1312 | rjsonwriter_t *rjsonwriter_open_memory(void) |
| 1313 | { |
| 1314 | rjsonwriter_t *writer = rjsonwriter_open_user(_rjsonwriter_memory_io, NULL); |
| 1315 | if (!writer) |
| 1316 | return NULL; |
| 1317 | writer->user_data = writer; |
| 1318 | writer->buf_is_output = true; |
| 1319 | return writer; |
| 1320 | } |
| 1321 | |
| 1322 | char* rjsonwriter_get_memory_buffer(rjsonwriter_t *writer, int* len) |
| 1323 | { |
| 1324 | if (writer->io != _rjsonwriter_memory_io || writer->error_text) |
| 1325 | return NULL; |
| 1326 | if (writer->buf_num == writer->buf_cap) |
| 1327 | rjsonwriter_flush(writer); |
| 1328 | writer->buf[writer->buf_num] = '\0'; |
| 1329 | if (len) |
| 1330 | *len = writer->buf_num; |
| 1331 | return writer->buf; |
| 1332 | } |
| 1333 | |
| 1334 | int rjsonwriter_count_memory_buffer(rjsonwriter_t *writer) |
| 1335 | { |
| 1336 | return writer->buf_num; |
| 1337 | } |
| 1338 | |
| 1339 | void rjsonwriter_erase_memory_buffer(rjsonwriter_t *writer, int keep_len) |
| 1340 | { |
| 1341 | if (keep_len <= writer->buf_num) |
| 1342 | writer->buf_num = (keep_len < 0 ? 0 : keep_len); |
| 1343 | } |
| 1344 | |
| 1345 | bool rjsonwriter_free(rjsonwriter_t *writer) |
| 1346 | { |
| 1347 | bool res; |
| 1348 | writer->final_flush = true; |
| 1349 | res = rjsonwriter_flush(writer); |
| 1350 | if (writer->buf != writer->inline_buf) |
| 1351 | free(writer->buf); |
| 1352 | free(writer); |
| 1353 | return res; |
| 1354 | } |
| 1355 | |
| 1356 | void rjsonwriter_set_options(rjsonwriter_t *writer, int rjsonwriter_option_flags) |
| 1357 | { |
| 1358 | writer->option_flags = rjsonwriter_option_flags; |
| 1359 | } |
| 1360 | |
| 1361 | bool rjsonwriter_flush(rjsonwriter_t *writer) |
| 1362 | { |
| 1363 | if (writer->buf_num && !writer->error_text && writer->io(writer->buf, |
| 1364 | writer->buf_num, writer->user_data) != writer->buf_num) |
| 1365 | writer->error_text = "output error"; |
| 1366 | if (!writer->buf_is_output || writer->error_text) |
| 1367 | writer->buf_num = 0; |
| 1368 | return !writer->error_text; |
| 1369 | } |
| 1370 | |
| 1371 | const char *rjsonwriter_get_error(rjsonwriter_t *writer) |
| 1372 | { |
| 1373 | return (writer->error_text ? writer->error_text : ""); |
| 1374 | } |
| 1375 | |
| 1376 | void rjsonwriter_raw(rjsonwriter_t *writer, const char *buf, int len) |
| 1377 | { |
| 1378 | if (writer->buf_num + len > writer->buf_cap) |
| 1379 | rjsonwriter_flush(writer); |
| 1380 | if (len == 1) |
| 1381 | { |
| 1382 | if (buf[0] > ' ' || |
| 1383 | !(writer->option_flags & RJSONWRITER_OPTION_SKIP_WHITESPACE)) |
| 1384 | writer->buf[writer->buf_num++] = buf[0]; |
| 1385 | } |
| 1386 | else |
| 1387 | { |
| 1388 | int add = writer->buf_cap - writer->buf_num; |
| 1389 | if (add > len) |
| 1390 | add = len; |
| 1391 | memcpy(writer->buf + writer->buf_num, buf, add); |
| 1392 | writer->buf_num += add; |
| 1393 | if (len == add) |
| 1394 | return; |
| 1395 | rjsonwriter_flush(writer); |
| 1396 | len -= add; |
| 1397 | buf += add; |
| 1398 | if (writer->buf_num + len <= writer->buf_cap) |
| 1399 | { |
| 1400 | memcpy(writer->buf + writer->buf_num, buf, len); |
| 1401 | writer->buf_num += len; |
| 1402 | } |
| 1403 | else if (writer->io(buf, len, writer->user_data) != len) |
| 1404 | writer->error_text = "output error"; |
| 1405 | } |
| 1406 | } |
| 1407 | |
| 1408 | void rjsonwriter_rawf(rjsonwriter_t *writer, const char *fmt, ...) |
| 1409 | { |
| 1410 | int available, need; |
| 1411 | va_list ap, ap2; |
| 1412 | if (writer->buf_num >= writer->buf_cap - 16) |
| 1413 | rjsonwriter_flush(writer); |
| 1414 | available = (writer->buf_cap - writer->buf_num); |
| 1415 | va_start(ap, fmt); |
| 1416 | need = vsnprintf(writer->buf + writer->buf_num, available, fmt, ap); |
| 1417 | va_end(ap); |
| 1418 | if (need <= 0) |
| 1419 | return; |
| 1420 | if (need < available) |
| 1421 | { |
| 1422 | writer->buf_num += need; |
| 1423 | return; |
| 1424 | } |
| 1425 | rjsonwriter_flush(writer); |
| 1426 | if (writer->buf_num + need >= writer->buf_cap) |
| 1427 | { |
| 1428 | int newcap = writer->buf_num + need + 1; |
| 1429 | char* newbuf = (char*)malloc(newcap); |
| 1430 | if (!newbuf) |
| 1431 | { |
| 1432 | if (!writer->error_text) |
| 1433 | writer->error_text = "out of memory"; |
| 1434 | return; |
| 1435 | } |
| 1436 | if (writer->buf_num) |
| 1437 | memcpy(newbuf, writer->buf, writer->buf_num); |
| 1438 | if (writer->buf != writer->inline_buf) |
| 1439 | free(writer->buf); |
| 1440 | writer->buf = newbuf; |
| 1441 | writer->buf_cap = newcap; |
| 1442 | } |
| 1443 | va_start(ap2, fmt); |
| 1444 | vsnprintf(writer->buf + writer->buf_num, writer->buf_cap - writer->buf_num, fmt, ap2); |
| 1445 | va_end(ap2); |
| 1446 | writer->buf_num += need; |
| 1447 | } |
| 1448 | |
| 1449 | void _rjsonwriter_add_escaped(rjsonwriter_t *writer, unsigned char c) |
| 1450 | { |
| 1451 | char esc_buf[8], esc_len = 2; |
| 1452 | const char* esc; |
| 1453 | switch (c) |
| 1454 | { |
| 1455 | case '\b': |
| 1456 | esc = "\\b"; |
| 1457 | break; |
| 1458 | case '\t': |
| 1459 | esc = "\\t"; |
| 1460 | break; |
| 1461 | case '\n': |
| 1462 | esc = "\\n"; |
| 1463 | break; |
| 1464 | case '\f': |
| 1465 | esc = "\\f"; |
| 1466 | break; |
| 1467 | case '\r': |
| 1468 | esc = "\\r"; |
| 1469 | break; |
| 1470 | case '\"': |
| 1471 | esc = "\\\""; |
| 1472 | break; |
| 1473 | case '\\': |
| 1474 | esc = "\\\\"; |
| 1475 | break; |
| 1476 | case '/': |
| 1477 | esc = "\\/"; |
| 1478 | break; |
| 1479 | default: |
| 1480 | snprintf(esc_buf, sizeof(esc_buf), "\\u%04x", c); |
| 1481 | esc = esc_buf; |
| 1482 | esc_len = 6; |
| 1483 | } |
| 1484 | rjsonwriter_raw(writer, esc, esc_len); |
| 1485 | } |
| 1486 | |
| 1487 | void rjsonwriter_add_string(rjsonwriter_t *writer, const char *value) |
| 1488 | { |
| 1489 | const char *p = (const char*)value, *raw = p; |
| 1490 | unsigned char c; |
| 1491 | rjsonwriter_raw(writer, "\"", 1); |
| 1492 | if (!p) |
| 1493 | goto string_end; |
| 1494 | while ((c = (unsigned char)*p++) != '\0') |
| 1495 | { |
| 1496 | /* forward slash is special, it should be escaped if the previous character |
| 1497 | * was a < (intended to avoid having </script> html tags in JSON files) */ |
| 1498 | if ( c >= 0x20 && c != '\"' && c != '\\' && |
| 1499 | (c != '/' || p < value + 2 || p[-2] != '<')) |
| 1500 | continue; |
| 1501 | if (raw != p - 1) |
| 1502 | rjsonwriter_raw(writer, raw, (int)(p - 1 - raw)); |
| 1503 | _rjsonwriter_add_escaped(writer, c); |
| 1504 | raw = p; |
| 1505 | } |
| 1506 | if (raw != p - 1) |
| 1507 | rjsonwriter_raw(writer, raw, (int)(p - 1 - raw)); |
| 1508 | string_end: |
| 1509 | rjsonwriter_raw(writer, "\"", 1); |
| 1510 | } |
| 1511 | |
| 1512 | void rjsonwriter_add_string_len(rjsonwriter_t *writer, const char *value, int len) |
| 1513 | { |
| 1514 | const char *p = (const char*)value, *raw = p, *end = p + len; |
| 1515 | rjsonwriter_raw(writer, "\"", 1); |
| 1516 | while (p != end) |
| 1517 | { |
| 1518 | unsigned char c = (unsigned char)*p++; |
| 1519 | if ( c >= 0x20 && c != '\"' && c != '\\' |
| 1520 | && (c != '/' || p < value + 2 || p[-2] != '<')) |
| 1521 | continue; |
| 1522 | if (raw != p - 1) |
| 1523 | rjsonwriter_raw(writer, raw, (int)(p - 1 - raw)); |
| 1524 | _rjsonwriter_add_escaped(writer, c); |
| 1525 | raw = p; |
| 1526 | } |
| 1527 | if (raw != end) |
| 1528 | rjsonwriter_raw(writer, raw, (int)(end - raw)); |
| 1529 | rjsonwriter_raw(writer, "\"", 1); |
| 1530 | } |
| 1531 | |
| 1532 | void rjsonwriter_add_double(rjsonwriter_t *writer, double value) |
| 1533 | { |
| 1534 | int old_buf_num = writer->buf_num; |
| 1535 | rjsonwriter_rawf(writer, "%G", value); |
| 1536 | if (writer->decimal_sep != '.') |
| 1537 | { |
| 1538 | /* handle locale that uses a non-standard decimal separator */ |
| 1539 | char *p, *str; |
| 1540 | if (writer->decimal_sep == 0) |
| 1541 | { |
| 1542 | char test[4]; |
| 1543 | snprintf(test, sizeof(test), "%.1f", 0.0f); |
| 1544 | if ((writer->decimal_sep = test[1]) == '.') |
| 1545 | return; |
| 1546 | } |
| 1547 | str = writer->buf + (old_buf_num > writer->buf_num ? 0 : old_buf_num); |
| 1548 | if ((p = strchr(str, writer->decimal_sep)) != NULL) |
| 1549 | *p = '.'; |
| 1550 | } |
| 1551 | } |
| 1552 | |
| 1553 | void rjsonwriter_add_spaces(rjsonwriter_t *writer, int count) |
| 1554 | { |
| 1555 | if (!(writer->option_flags & RJSONWRITER_OPTION_SKIP_WHITESPACE)) |
| 1556 | for (; count > 0; count -= 8) |
| 1557 | rjsonwriter_raw(writer, " ", (count > 8 ? 8 : count)); |
| 1558 | } |
| 1559 | |
| 1560 | void rjsonwriter_add_tabs(rjsonwriter_t *writer, int count) |
| 1561 | { |
| 1562 | if (!(writer->option_flags & RJSONWRITER_OPTION_SKIP_WHITESPACE)) |
| 1563 | for (; count > 0; count -= 8) |
| 1564 | rjsonwriter_raw(writer, "\t\t\t\t\t\t\t\t", (count > 8 ? 8 : count)); |
| 1565 | } |
| 1566 | |
| 1567 | #undef _rJSON_EOF |
| 1568 | #undef _rJSON_LIKELY |