Commit | Line | Data |
---|---|---|
3719602c PC |
1 | /* Copyright (C) 2010-2021 The RetroArch team |
2 | * | |
3 | * --------------------------------------------------------------------------------------- | |
4 | * The following license statement only applies to this file (rjson.c). | |
5 | * --------------------------------------------------------------------------------------- | |
6 | * | |
7 | * Permission is hereby granted, free of charge, | |
8 | * to any person obtaining a copy of this software and associated documentation files (the "Software"), | |
9 | * to deal in the Software without restriction, including without limitation the rights to | |
10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
11 | * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | |
16 | * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
18 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
19 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | /* The parser is based on Public Domain JSON Parser for C by Christopher Wellons - https://github.com/skeeto/pdjson */ | |
24 | ||
25 | #include <stdio.h> /* snprintf, vsnprintf */ | |
26 | #include <stdarg.h> /* va_list */ | |
27 | #include <string.h> /* memcpy */ | |
28 | #include <stdint.h> /* int64_t */ | |
29 | #include <stdlib.h> /* malloc, realloc, atof, atoi */ | |
30 | ||
31 | #include <formats/rjson.h> | |
32 | #include <compat/posix_string.h> | |
33 | #include <streams/interface_stream.h> | |
34 | #include <streams/file_stream.h> | |
35 | ||
36 | struct _rjson_stack { enum rjson_type type; size_t count; }; | |
37 | ||
38 | struct rjson | |
39 | { | |
40 | /* Order of the top few struct elements have an impact on performance */ | |
41 | /* Place most frequently accessed things on top */ | |
42 | const unsigned char *input_p; | |
43 | struct _rjson_stack *stack_top; | |
44 | const unsigned char *input_end; | |
45 | const unsigned char* source_column_p; | |
46 | size_t source_line; | |
47 | ||
48 | char *string, *string_pass_through; | |
49 | size_t string_len, string_cap; | |
50 | ||
51 | struct _rjson_stack inline_stack[10]; | |
52 | struct _rjson_stack *stack; | |
53 | ||
54 | rjson_io_t io; | |
55 | void *user_data; | |
56 | ||
57 | unsigned int stack_cap, stack_max; | |
58 | int input_len; | |
59 | ||
60 | char option_flags; | |
61 | char decimal_sep; | |
62 | char error_text[80]; | |
63 | char inline_string[512]; | |
64 | ||
65 | /* Must be at the end of the struct, can be allocated with custom size */ | |
66 | unsigned char input_buf[512]; | |
67 | }; | |
68 | ||
69 | enum _rjson_token | |
70 | { | |
71 | _rJSON_TOK_WHITESPACE, _rJSON_TOK_NEWLINE, _rJSON_TOK_OPTIONAL_SKIP, | |
72 | _rJSON_TOK_OBJECT, _rJSON_TOK_ARRAY, _rJSON_TOK_STRING, _rJSON_TOK_NUMBER, | |
73 | _rJSON_TOK_TRUE, _rJSON_TOK_FALSE, _rJSON_TOK_NULL, | |
74 | _rJSON_TOK_OBJECT_END, _rJSON_TOK_ARRAY_END, _rJSON_TOK_COLON, | |
75 | _rJSON_TOK_COMMA, _rJSON_TOK_ERROR, _rJSON_TOK_EOF, _rJSON_TOKCOUNT | |
76 | }; | |
77 | ||
78 | /* The used char type is int and not short for better performance */ | |
79 | typedef unsigned int _rjson_char_t; | |
80 | #define _rJSON_EOF ((_rjson_char_t)256) | |
81 | ||
82 | /* Compiler branching hint for expression with high probability | |
83 | * Explicitly only have likely (and no unlikely) because compilers | |
84 | * that don't support it expect likely branches to come first. */ | |
85 | #if defined(__GNUC__) || defined(__clang__) | |
86 | #define _rJSON_LIKELY(x) __builtin_expect(!!(x), 1) | |
87 | #else | |
88 | #define _rJSON_LIKELY(x) (x) | |
89 | #endif | |
90 | ||
91 | /* These 3 error functions return RJSON_ERROR for convenience */ | |
92 | static enum rjson_type _rjson_error(rjson_t *json, const char *fmt, ...) | |
93 | { | |
94 | va_list ap; | |
95 | if (json->stack_top->type == RJSON_ERROR) | |
96 | return RJSON_ERROR; | |
97 | json->stack_top->type = RJSON_ERROR; | |
98 | va_start(ap, fmt); | |
99 | vsnprintf(json->error_text, sizeof(json->error_text), fmt, ap); | |
100 | va_end(ap); | |
101 | return RJSON_ERROR; | |
102 | } | |
103 | ||
104 | static enum rjson_type _rjson_error_char(rjson_t *json, | |
105 | const char *fmt, _rjson_char_t chr) | |
106 | { | |
107 | char buf[16]; | |
108 | if (json->stack_top->type == RJSON_ERROR) | |
109 | return RJSON_ERROR; | |
110 | snprintf(buf, sizeof(buf), | |
111 | (chr == _rJSON_EOF ? "end of stream" : | |
112 | (chr >= ' ' && chr <= '~' ? "'%c'" : "byte 0x%02X")), chr); | |
113 | return _rjson_error(json, fmt, buf); | |
114 | } | |
115 | ||
116 | static enum rjson_type _rjson_error_token(rjson_t *json, | |
117 | const char *fmt, enum _rjson_token tok) | |
118 | { | |
119 | return _rjson_error_char(json, fmt, | |
120 | (tok == _rJSON_TOK_EOF ? _rJSON_EOF : json->input_p[-1])); | |
121 | } | |
122 | ||
123 | static bool _rjson_io_input(rjson_t *json) | |
124 | { | |
125 | if (json->input_end == json->input_buf) | |
126 | return false; | |
127 | json->source_column_p -= (json->input_end - json->input_buf); | |
128 | json->input_p = json->input_buf; | |
129 | json->input_end = json->input_buf + | |
130 | json->io(json->input_buf, json->input_len, json->user_data); | |
131 | if (json->input_end < json->input_buf) | |
132 | { | |
133 | _rjson_error(json, "input stream read error"); | |
134 | json->input_end = json->input_buf; | |
135 | } | |
136 | return (json->input_end != json->input_p); | |
137 | } | |
138 | ||
139 | static bool _rjson_grow_string(rjson_t *json) | |
140 | { | |
141 | char *string; | |
142 | size_t new_string_cap = json->string_cap * 2; | |
143 | if (json->string != json->inline_string) | |
144 | string = (char*)realloc(json->string, new_string_cap); | |
145 | else if ((string = (char*)malloc(new_string_cap)) != NULL) | |
146 | memcpy(string, json->inline_string, sizeof(json->inline_string)); | |
147 | if (!string) | |
148 | { | |
149 | _rjson_error(json, "out of memory"); | |
150 | return false; | |
151 | } | |
152 | json->string_cap = new_string_cap; | |
153 | json->string = string; | |
154 | return true; | |
155 | } | |
156 | ||
157 | static INLINE bool _rjson_pushchar(rjson_t *json, _rjson_char_t c) | |
158 | { | |
159 | json->string[json->string_len++] = (char)c; | |
160 | return (json->string_len != json->string_cap || _rjson_grow_string(json)); | |
161 | } | |
162 | ||
163 | static INLINE bool _rjson_pushchars(rjson_t *json, | |
164 | const unsigned char *from, const unsigned char *to) | |
165 | { | |
166 | size_t len = json->string_len, new_len = len + (to - from); | |
167 | unsigned char* string; | |
168 | while (new_len >= json->string_cap) | |
169 | if (!_rjson_grow_string(json)) | |
170 | return false; | |
171 | string = (unsigned char *)json->string; | |
172 | while (len != new_len) | |
173 | string[len++] = *(from++); | |
174 | json->string_len = new_len; | |
175 | return true; | |
176 | } | |
177 | ||
178 | static INLINE _rjson_char_t _rjson_char_get(rjson_t *json) | |
179 | { | |
180 | return (json->input_p != json->input_end || _rjson_io_input(json) | |
181 | ? *json->input_p++ : _rJSON_EOF); | |
182 | } | |
183 | ||
184 | static unsigned int _rjson_get_unicode_cp(rjson_t *json) | |
185 | { | |
186 | unsigned int cp = 0, shift = 16; | |
187 | for (;;) | |
188 | { | |
189 | _rjson_char_t c = _rjson_char_get(json); | |
190 | switch (c) | |
191 | { | |
192 | case '0': case '1': case '2': case '3': case '4': | |
193 | case '5': case '6': case '7': case '8': case '9': | |
194 | c -= '0'; | |
195 | break; | |
196 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
197 | c -= ('a' - 10); | |
198 | break; | |
199 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
200 | c -= ('A' - 10); | |
201 | break; | |
202 | case _rJSON_EOF: | |
203 | _rjson_error(json, "unterminated string literal in Unicode"); | |
204 | return (unsigned int)-1; | |
205 | default: | |
206 | _rjson_error_char(json, "invalid Unicode escape hexadecimal %s", c); | |
207 | return (unsigned int)-1; | |
208 | } | |
209 | shift -= 4; | |
210 | cp |= ((unsigned int)c << shift); | |
211 | if (!shift) | |
212 | return cp; | |
213 | } | |
214 | } | |
215 | ||
216 | static bool _rjson_read_unicode(rjson_t *json) | |
217 | { | |
218 | #define _rJSON_READ_UNICODE_REPLACE_OR_IGNORE \ | |
219 | if (json->option_flags & (RJSON_OPTION_IGNORE_INVALID_ENCODING \ | |
220 | | RJSON_OPTION_REPLACE_INVALID_ENCODING)) goto replace_or_ignore; | |
221 | ||
222 | unsigned int cp; | |
223 | ||
224 | if ((cp = _rjson_get_unicode_cp(json)) == (unsigned int)-1) | |
225 | return false; | |
226 | ||
227 | if (cp >= 0xd800 && cp <= 0xdbff) | |
228 | { | |
229 | /* This is the high portion of a surrogate pair; we need to read the | |
230 | * lower portion to get the codepoint */ | |
231 | unsigned int l, h = cp; | |
232 | ||
233 | _rjson_char_t c = _rjson_char_get(json); | |
234 | if (c == _rJSON_EOF) | |
235 | { | |
236 | _rjson_error(json, "unterminated string literal in Unicode"); | |
237 | return false; | |
238 | } | |
239 | if (c != '\\') | |
240 | { | |
241 | _rjson_error_char(json, "invalid continuation %s" | |
242 | " for surrogate pair, expected '\\'", c); | |
243 | return false; | |
244 | } | |
245 | ||
246 | c = _rjson_char_get(json); | |
247 | if (c == _rJSON_EOF) | |
248 | { | |
249 | _rjson_error(json, "unterminated string literal in Unicode"); | |
250 | return false; | |
251 | } | |
252 | if (c != 'u') | |
253 | { | |
254 | _rjson_error_char(json, "invalid continuation %s" | |
255 | " for surrogate pair, expected 'u'", c); | |
256 | return false; | |
257 | } | |
258 | if ((l = _rjson_get_unicode_cp(json)) == (unsigned int)-1) | |
259 | return false; | |
260 | if (l < 0xdc00 || l > 0xdfff) | |
261 | { | |
262 | _rJSON_READ_UNICODE_REPLACE_OR_IGNORE | |
263 | _rjson_error(json, "surrogate pair continuation \\u%04x out " | |
264 | "of range (dc00-dfff)", l); | |
265 | return false; | |
266 | } | |
267 | cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000); | |
268 | } | |
269 | else if (cp >= 0xdc00 && cp <= 0xdfff) | |
270 | { | |
271 | _rJSON_READ_UNICODE_REPLACE_OR_IGNORE | |
272 | _rjson_error(json, "dangling surrogate \\u%04x", cp); | |
273 | return false; | |
274 | } | |
275 | ||
276 | if (cp < 0x80UL) | |
277 | return _rjson_pushchar(json, cp); | |
278 | ||
279 | if (cp < 0x0800UL) | |
280 | return (_rjson_pushchar(json, (cp >> 6 & 0x1F) | 0xC0) && | |
281 | _rjson_pushchar(json, (cp >> 0 & 0x3F) | 0x80)); | |
282 | ||
283 | if (cp < 0x010000UL) | |
284 | { | |
285 | if (cp >= 0xd800 && cp <= 0xdfff) | |
286 | { | |
287 | _rJSON_READ_UNICODE_REPLACE_OR_IGNORE | |
288 | _rjson_error(json, "invalid codepoint %04x", cp); | |
289 | return false; | |
290 | } | |
291 | return (_rjson_pushchar(json, (cp >> 12 & 0x0F) | 0xE0) && | |
292 | _rjson_pushchar(json, (cp >> 6 & 0x3F) | 0x80) && | |
293 | _rjson_pushchar(json, (cp >> 0 & 0x3F) | 0x80)); | |
294 | } | |
295 | if (cp < 0x110000UL) | |
296 | return (_rjson_pushchar(json, (cp >> 18 & 0x07) | 0xF0) && | |
297 | _rjson_pushchar(json, (cp >> 12 & 0x3F) | 0x80) && | |
298 | _rjson_pushchar(json, (cp >> 6 & 0x3F) | 0x80) && | |
299 | _rjson_pushchar(json, (cp >> 0 & 0x3F) | 0x80)); | |
300 | ||
301 | _rJSON_READ_UNICODE_REPLACE_OR_IGNORE | |
302 | _rjson_error(json, "unable to encode %04x as UTF-8", cp); | |
303 | return false; | |
304 | ||
305 | replace_or_ignore: | |
306 | return ((json->option_flags & RJSON_OPTION_IGNORE_INVALID_ENCODING) || | |
307 | _rjson_pushchar(json, '?')); | |
308 | #undef _rJSON_READ_UNICODE_REPLACE_OR_IGNORE | |
309 | } | |
310 | ||
311 | static bool _rjson_validate_utf8(rjson_t *json) | |
312 | { | |
313 | unsigned char first, c; | |
314 | unsigned char *p; | |
315 | unsigned char *from = (unsigned char *) | |
316 | (json->string_pass_through ? json->string_pass_through : json->string); | |
317 | unsigned char *to = from + json->string_len; | |
318 | ||
319 | if (json->option_flags & RJSON_OPTION_IGNORE_INVALID_ENCODING) | |
320 | return true; | |
321 | ||
322 | for (;;) | |
323 | { | |
324 | if (from == to) | |
325 | return true; | |
326 | first = *from; | |
327 | if (first <= 0x7F) /* ASCII */ | |
328 | { | |
329 | from++; | |
330 | continue; | |
331 | } | |
332 | p = from; | |
333 | /* Continuation or overlong encoding of an ASCII byte */ | |
334 | if (first <= 0xC1) | |
335 | goto invalid_utf8; | |
336 | if (first <= 0xDF) | |
337 | { | |
338 | if ((from = p + 2) > to) | |
339 | goto invalid_utf8; | |
340 | continue_length_2: | |
341 | c = p[1]; | |
342 | switch (first) | |
343 | { | |
344 | case 0xE0: | |
345 | c = (c < 0xA0 || c > 0xBF); | |
346 | break; | |
347 | case 0xED: | |
348 | c = (c < 0x80 || c > 0x9F); | |
349 | break; | |
350 | case 0xF0: | |
351 | c = (c < 0x90 || c > 0xBF); | |
352 | break; | |
353 | case 0xF4: | |
354 | c = (c < 0x80 || c > 0x8F); | |
355 | break; | |
356 | default: | |
357 | c = (c < 0x80 || c > 0xBF); | |
358 | break; | |
359 | } | |
360 | if (c) | |
361 | goto invalid_utf8; | |
362 | } | |
363 | else if (first <= 0xEF) | |
364 | { | |
365 | if ((from = p + 3) > to) | |
366 | goto invalid_utf8; | |
367 | continue_length_3: | |
368 | if ((c = p[2]) < 0x80 || c > 0xBF) | |
369 | goto invalid_utf8; | |
370 | goto continue_length_2; | |
371 | } | |
372 | else if (first <= 0xF4) | |
373 | { | |
374 | if ((from = p + 4) > to) | |
375 | goto invalid_utf8; | |
376 | if ((c = p[3]) < 0x80 || c > 0xBF) | |
377 | goto invalid_utf8; | |
378 | goto continue_length_3; | |
379 | } | |
380 | else | |
381 | goto invalid_utf8; /* length 5 or 6 or invalid UTF-8 */ | |
382 | continue; | |
383 | invalid_utf8: | |
384 | if (!(json->option_flags & RJSON_OPTION_REPLACE_INVALID_ENCODING)) | |
385 | { | |
386 | _rjson_error(json, "invalid UTF-8 character in string"); | |
387 | return false; | |
388 | } | |
389 | from = p; | |
390 | *from++ = '?'; | |
391 | while (from != to && (*from & 0x80)) | |
392 | *from++ = '?'; | |
393 | } | |
394 | } | |
395 | ||
396 | static enum rjson_type _rjson_read_string(rjson_t *json) | |
397 | { | |
398 | const unsigned char *p = json->input_p, *raw = p; | |
399 | const unsigned char *end = json->input_end; | |
400 | unsigned char utf8mask = 0; | |
401 | json->string_pass_through = NULL; | |
402 | json->string_len = 0; | |
403 | ||
404 | for (;;) | |
405 | { | |
406 | if (_rJSON_LIKELY(p != end)) | |
407 | { | |
408 | unsigned char c = *p; | |
409 | if (_rJSON_LIKELY(c != '"' && c != '\\' && c >= 0x20)) | |
410 | { | |
411 | /* handle most common case first, it's faster */ | |
412 | utf8mask |= c; | |
413 | p++; | |
414 | } | |
415 | else if (c == '"') | |
416 | { | |
417 | json->input_p = p + 1; | |
418 | if (json->string_len == 0 && p + 1 != end) | |
419 | { | |
420 | /* raw string fully inside input buffer, pass through */ | |
421 | json->string_len = p - raw; | |
422 | json->string_pass_through = (char*)raw; | |
423 | } | |
424 | else if (raw != p && !_rjson_pushchars(json, raw, p)) /* OOM */ | |
425 | return RJSON_ERROR; | |
426 | /* Contains invalid UTF-8 byte sequences */ | |
427 | if ((utf8mask & 0x80) && !_rjson_validate_utf8(json)) | |
428 | return RJSON_ERROR; | |
429 | return RJSON_STRING; | |
430 | } | |
431 | else if (c == '\\') | |
432 | { | |
433 | _rjson_char_t esc; | |
434 | if (raw != p) | |
435 | { | |
436 | /* Can't pass through string with escapes, use string buffer */ | |
437 | if (!_rjson_pushchars(json, raw, p)) | |
438 | return RJSON_ERROR; | |
439 | } | |
440 | json->input_p = p + 1; | |
441 | esc = _rjson_char_get(json); | |
442 | switch (esc) | |
443 | { | |
444 | case 'u': | |
445 | if (!_rjson_read_unicode(json)) | |
446 | return RJSON_ERROR; | |
447 | break; | |
448 | ||
449 | case 'b': | |
450 | esc = '\b'; | |
451 | goto escape_pushchar; | |
452 | case 'f': | |
453 | esc = '\f'; | |
454 | goto escape_pushchar; | |
455 | case 'n': | |
456 | esc = '\n'; | |
457 | goto escape_pushchar; | |
458 | case 'r': | |
459 | if (!(json->option_flags & RJSON_OPTION_IGNORE_STRING_CARRIAGE_RETURN)) | |
460 | { | |
461 | esc = '\r'; | |
462 | goto escape_pushchar; | |
463 | } | |
464 | break; | |
465 | case 't': | |
466 | esc = '\t'; | |
467 | goto escape_pushchar; | |
468 | case '/': | |
469 | case '"': | |
470 | case '\\': | |
471 | escape_pushchar: | |
472 | if (!_rjson_pushchar(json, esc)) | |
473 | return RJSON_ERROR; | |
474 | break; | |
475 | ||
476 | case _rJSON_EOF: | |
477 | return _rjson_error(json, "unterminated string literal in escape"); | |
478 | ||
479 | default: | |
480 | return _rjson_error_char(json, "invalid escaped %s", esc); | |
481 | } | |
482 | raw = p = json->input_p; | |
483 | end = json->input_end; | |
484 | } | |
485 | else if (!(json->option_flags & RJSON_OPTION_ALLOW_UNESCAPED_CONTROL_CHARACTERS)) | |
486 | return _rjson_error_char(json, "unescaped control character %s in string", c); | |
487 | else | |
488 | p++; | |
489 | } | |
490 | else | |
491 | { | |
492 | if (raw != p) | |
493 | { | |
494 | /* not fully inside input buffer, copy to string buffer */ | |
495 | if (!_rjson_pushchars(json, raw, p)) | |
496 | return RJSON_ERROR; | |
497 | } | |
498 | if (!_rjson_io_input(json)) | |
499 | return _rjson_error(json, "unterminated string literal"); | |
500 | raw = p = json->input_p; | |
501 | end = json->input_end; | |
502 | } | |
503 | } | |
504 | } | |
505 | ||
506 | static enum rjson_type _rjson_read_number(rjson_t *json) | |
507 | { | |
508 | const unsigned char *p = json->input_p - 1; | |
509 | const unsigned char *end = json->input_end; | |
510 | const unsigned char *start = p; | |
511 | ||
512 | json->string_len = 0; | |
513 | json->string_pass_through = NULL; | |
514 | for (;;) | |
515 | { | |
516 | if (_rJSON_LIKELY(p != end)) | |
517 | { | |
518 | switch (*p++) | |
519 | { | |
520 | case '+': case '-': case '.': | |
521 | case '0': case '1': case '2': case '3': case '4': | |
522 | case '5': case '6': case '7': case '8': case '9': | |
523 | case 'E': case 'e': | |
524 | continue; | |
525 | } | |
526 | p--; | |
527 | json->input_p = p; | |
528 | if (!_rjson_pushchars(json, start, p)) | |
529 | return RJSON_ERROR; /* out of memory */ | |
530 | break; | |
531 | } | |
532 | else | |
533 | { | |
534 | /* number sequences are always copied to the string buffer */ | |
535 | if (!_rjson_pushchars(json, start, p)) | |
536 | return RJSON_ERROR; | |
537 | if (!_rjson_io_input(json)) | |
538 | { | |
539 | /* EOF here is not an error for a number */ | |
540 | json->input_p = json->input_end; | |
541 | break; | |
542 | } | |
543 | start = p = json->input_p; | |
544 | end = json->input_end; | |
545 | } | |
546 | } | |
547 | ||
548 | p = (const unsigned char *)json->string; | |
549 | end = (p + json->string_len); | |
550 | ||
551 | /* validate json number */ | |
552 | if (*p == '-' && ++p == end) | |
553 | goto invalid_number; | |
554 | if (*p == '0') | |
555 | { | |
556 | if (++p == end) | |
557 | return RJSON_NUMBER; | |
558 | } | |
559 | else | |
560 | { | |
561 | if (*p < '1' || *p > '9') | |
562 | goto invalid_number; | |
563 | do | |
564 | { | |
565 | if (++p == end) | |
566 | return RJSON_NUMBER; | |
567 | } | |
568 | while (*p >= '0' && *p <= '9'); | |
569 | } | |
570 | if (*p == '.') | |
571 | { | |
572 | if (++p == end) | |
573 | goto invalid_number; | |
574 | if (*p < '0' || *p > '9') | |
575 | goto invalid_number; | |
576 | do | |
577 | { | |
578 | if (++p == end) | |
579 | return RJSON_NUMBER; | |
580 | } | |
581 | while (*p >= '0' && *p <= '9'); | |
582 | } | |
583 | if (((*p)|0x20) == 'e') | |
584 | { | |
585 | if (++p == end) | |
586 | goto invalid_number; | |
587 | if ((*p == '-' || *p == '+') && ++p == end) | |
588 | goto invalid_number; | |
589 | if (*p < '0' || *p > '9') | |
590 | goto invalid_number; | |
591 | do | |
592 | { | |
593 | if (++p == end) | |
594 | return RJSON_NUMBER; | |
595 | } | |
596 | while (*p >= '0' && *p <= '9'); | |
597 | } | |
598 | invalid_number: | |
599 | return _rjson_error_char(json, "unexpected %s in number", | |
600 | (p == json->input_end ? _rJSON_EOF : p[p == end ? -1 : 0])); | |
601 | } | |
602 | ||
603 | static enum rjson_type _rjson_push_stack(rjson_t *json, enum _rjson_token t) | |
604 | { | |
605 | if (json->stack_top + 1 == json->stack + json->stack_cap) | |
606 | { | |
607 | /* reached allocated stack size, either reallocate or abort */ | |
608 | unsigned int new_stack_cap; | |
609 | struct _rjson_stack *new_stack; | |
610 | size_t stack_alloc; | |
611 | if (json->stack_cap == json->stack_max) | |
612 | return _rjson_error(json, "maximum depth of nesting reached"); | |
613 | ||
614 | new_stack_cap = json->stack_cap + 4; | |
615 | if (new_stack_cap > json->stack_max) | |
616 | new_stack_cap = json->stack_max; | |
617 | stack_alloc = new_stack_cap * sizeof(struct _rjson_stack); | |
618 | if (json->stack != json->inline_stack) | |
619 | new_stack = (struct _rjson_stack *)realloc(json->stack, stack_alloc); | |
620 | else if ((new_stack = (struct _rjson_stack*)malloc(stack_alloc)) != NULL) | |
621 | memcpy(new_stack, json->inline_stack, sizeof(json->inline_stack)); | |
622 | if (!new_stack) | |
623 | return _rjson_error(json, "out of memory"); | |
624 | ||
625 | json->stack = new_stack; | |
626 | json->stack_top = new_stack + json->stack_cap - 1; | |
627 | json->stack_cap = new_stack_cap; | |
628 | } | |
629 | json->stack_top++; | |
630 | json->stack_top->count = 0; | |
631 | return (json->stack_top->type = | |
632 | (t == _rJSON_TOK_ARRAY ? RJSON_ARRAY : RJSON_OBJECT)); | |
633 | } | |
634 | ||
635 | static enum rjson_type _rjson_read_name(rjson_t *json, const char *pattern, enum rjson_type type) | |
636 | { | |
637 | _rjson_char_t c; | |
638 | const char *p; | |
639 | for (p = pattern; *p; p++) | |
640 | { | |
641 | if ((_rjson_char_t)*p != (c = _rjson_char_get(json))) | |
642 | return _rjson_error_char(json, "unexpected %s in value", c); | |
643 | } | |
644 | return type; | |
645 | } | |
646 | ||
647 | static bool _rjson_optional_skip(rjson_t *json, const unsigned char **p, const unsigned char **end) | |
648 | { | |
649 | unsigned char c, skip = (*p)[-1]; | |
650 | int state = 0; | |
651 | ||
652 | if (skip == '/' && !(json->option_flags & RJSON_OPTION_ALLOW_COMMENTS)) | |
653 | return false; | |
654 | ||
655 | if ( skip == 0xEF && (!(json->option_flags & RJSON_OPTION_ALLOW_UTF8BOM) | |
656 | || json->source_line != 1 || json->source_column_p != json->input_p)) | |
657 | return false; | |
658 | ||
659 | for (;;) | |
660 | { | |
661 | if (*p == *end) | |
662 | { | |
663 | if (!_rjson_io_input(json)) | |
664 | { | |
665 | _rjson_error(json, "unfinished %s", | |
666 | (skip == '/' ? "comment" : "utf8 byte order mark")); | |
667 | break; | |
668 | } | |
669 | *p = json->input_p; | |
670 | *end = json->input_end; | |
671 | } | |
672 | c = *(*p)++; | |
673 | if (skip == '/') | |
674 | { | |
675 | if (state == 0 && c == '/') | |
676 | state = 1; | |
677 | else if (state == 0 && c == '*') | |
678 | state = 2; | |
679 | else if (state == 0) | |
680 | break; | |
681 | else if (state == 1 && c == '\n') | |
682 | return true; | |
683 | else if (state == 2 && c == '*') | |
684 | state = 3; | |
685 | else if (state == 3 && c == '/') | |
686 | return true; | |
687 | else if (state == 3 && c != '*') | |
688 | state = 2; | |
689 | } | |
690 | else if (skip == 0xEF) | |
691 | { | |
692 | /* Silence warning - state being set never used */ | |
693 | if (state == 0 && c == 0xBB) | |
694 | state = 1; | |
695 | else if (state == 1 && c == 0xBF) | |
696 | return true; | |
697 | else | |
698 | break; | |
699 | } | |
700 | } | |
701 | return false; | |
702 | } | |
703 | ||
704 | enum rjson_type rjson_next(rjson_t *json) | |
705 | { | |
706 | unsigned char tok; | |
707 | struct _rjson_stack *stack = json->stack_top; | |
708 | const unsigned char *p = json->input_p; | |
709 | const unsigned char *end = json->input_end; | |
710 | unsigned char passed_token = false; | |
711 | ||
712 | /* JSON token look-up-table */ | |
713 | static const unsigned char token_lut[256] = | |
714 | { | |
715 | #define i _rJSON_TOK_ERROR | |
716 | /* 0 | 0x00 | */ i,i,i,i,i,i,i,i,i, | |
717 | /* 9 | 0x09 |\t */ _rJSON_TOK_WHITESPACE, | |
718 | /* 10 | 0x0A |\n */ _rJSON_TOK_NEWLINE, i,i, | |
719 | /* 13 | 0x0D |\r */ _rJSON_TOK_WHITESPACE, i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i, | |
720 | /* 32 | 0x20 | */ _rJSON_TOK_WHITESPACE, i, | |
721 | /* 34 | 0x22 | " */ _rJSON_TOK_STRING, i,i,i,i,i,i,i,i,i, | |
722 | /* 44 | 0x2C | , */ _rJSON_TOK_COMMA, | |
723 | /* 45 | 0x2D | - */ _rJSON_TOK_NUMBER, i, | |
724 | /* 47 | 0x2F | / */ _rJSON_TOK_OPTIONAL_SKIP, | |
725 | /* 48 | 0x30 | 0 */ _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, | |
726 | /* 53 | 0x35 | 5 */ _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, _rJSON_TOK_NUMBER, | |
727 | /* 58 | 0x3A | : */ _rJSON_TOK_COLON, i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i, | |
728 | /* 91 | 0x5B | [ */ _rJSON_TOK_ARRAY, i, | |
729 | /* 93 | 0x5D | ] */ _rJSON_TOK_ARRAY_END, i,i,i,i,i,i,i,i, | |
730 | /* 102 | 0x66 | f */ _rJSON_TOK_FALSE, i,i,i,i,i,i,i, | |
731 | /* 110 | 0x6E | n */ _rJSON_TOK_NULL, i,i,i,i,i, | |
732 | /* 116 | 0x74 | t */ _rJSON_TOK_TRUE, i,i,i,i,i,i, | |
733 | /* 123 | 0x7B | { */ _rJSON_TOK_OBJECT, i, | |
734 | /* 125 | 0x7D | } */ _rJSON_TOK_OBJECT_END, | |
735 | /* 126 | 0x7E | ~ */ i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i, | |
736 | /* 164 | 0xA4 | */ i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i, | |
737 | /* 202 | 0xCA | */ i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i, | |
738 | /* 239 | 0xEF | */ _rJSON_TOK_OPTIONAL_SKIP, i,i,i,i,i,i,i,i,i,i,i,i,i,i,i,i | |
739 | #undef i | |
740 | }; | |
741 | ||
742 | if (_rJSON_LIKELY(stack->type != RJSON_ERROR)) | |
743 | { | |
744 | for (;;) | |
745 | { | |
746 | if (_rJSON_LIKELY(p != end)) | |
747 | { | |
748 | tok = token_lut[*p++]; | |
749 | if (_rJSON_LIKELY(tok > _rJSON_TOK_OPTIONAL_SKIP)) | |
750 | { | |
751 | /* Actual JSON token, process below */ | |
752 | } | |
753 | else if (_rJSON_LIKELY(tok == _rJSON_TOK_WHITESPACE)) | |
754 | continue; | |
755 | else if (tok == _rJSON_TOK_NEWLINE) | |
756 | { | |
757 | json->source_line++; | |
758 | json->source_column_p = p; | |
759 | continue; | |
760 | } | |
761 | else if (tok == _rJSON_TOK_OPTIONAL_SKIP) | |
762 | { | |
763 | if (_rjson_optional_skip(json, &p, &end)) | |
764 | continue; | |
765 | } | |
766 | } | |
767 | else if (_rJSON_LIKELY(_rjson_io_input(json))) | |
768 | { | |
769 | p = json->input_p; | |
770 | end = json->input_end; | |
771 | continue; | |
772 | } | |
773 | else | |
774 | { | |
775 | p = json->input_end; | |
776 | tok = _rJSON_TOK_EOF; | |
777 | } | |
778 | ||
779 | if (stack->type == RJSON_OBJECT) | |
780 | { | |
781 | if (stack->count & 1) | |
782 | { | |
783 | /* Expecting colon followed by value. */ | |
784 | if (passed_token) | |
785 | goto read_value; | |
786 | if (_rJSON_LIKELY(tok == _rJSON_TOK_COLON)) | |
787 | { | |
788 | passed_token = true; | |
789 | continue; | |
790 | } | |
791 | json->input_p = p; | |
792 | return _rjson_error_token(json, | |
793 | "expected ':' not %s after member name", (enum _rjson_token)tok); | |
794 | } | |
795 | if (passed_token) | |
796 | { | |
797 | if (_rJSON_LIKELY(tok == _rJSON_TOK_STRING)) | |
798 | goto read_value; | |
799 | json->input_p = p; | |
800 | return _rjson_error(json, "expected member name after ','"); | |
801 | } | |
802 | if (tok == _rJSON_TOK_OBJECT_END) | |
803 | { | |
804 | json->input_p = p; | |
805 | json->stack_top--; | |
806 | return RJSON_OBJECT_END; | |
807 | } | |
808 | if (stack->count == 0) | |
809 | { | |
810 | /* No member name/value pairs yet. */ | |
811 | if (_rJSON_LIKELY(tok == _rJSON_TOK_STRING)) | |
812 | goto read_value; | |
813 | json->input_p = p; | |
814 | return _rjson_error(json, "expected member name or '}'"); | |
815 | } | |
816 | /* Expecting comma followed by member name. */ | |
817 | if (_rJSON_LIKELY(tok == _rJSON_TOK_COMMA)) | |
818 | { | |
819 | passed_token = true; | |
820 | continue; | |
821 | } | |
822 | json->input_p = p; | |
823 | return _rjson_error_token(json, | |
824 | "expected ',' or '}' not %s after member value", (enum _rjson_token)tok); | |
825 | } | |
826 | else if (stack->type == RJSON_ARRAY) | |
827 | { | |
828 | if (passed_token) | |
829 | goto read_value; | |
830 | if (tok == _rJSON_TOK_ARRAY_END) | |
831 | { | |
832 | json->input_p = p; | |
833 | json->stack_top--; | |
834 | return RJSON_ARRAY_END; | |
835 | } | |
836 | if (stack->count == 0) | |
837 | goto read_value; | |
838 | if (_rJSON_LIKELY(tok == _rJSON_TOK_COMMA)) | |
839 | { | |
840 | passed_token = true; | |
841 | continue; | |
842 | } | |
843 | json->input_p = p; | |
844 | return _rjson_error_token(json, | |
845 | "expected ',' or ']' not %s in array", (enum _rjson_token)tok); | |
846 | } | |
847 | else | |
848 | { | |
849 | if (_rJSON_LIKELY(!stack->count && tok != _rJSON_TOK_EOF)) | |
850 | goto read_value; | |
851 | json->input_p = p; | |
852 | if (!stack->count) | |
853 | return _rjson_error(json, "reached end without any data"); | |
854 | if (tok == _rJSON_TOK_EOF) | |
855 | return RJSON_DONE; | |
856 | if (!(json->option_flags & RJSON_OPTION_ALLOW_TRAILING_DATA)) | |
857 | return _rjson_error_token(json, | |
858 | "expected end of stream instead of %s", (enum _rjson_token)tok); | |
859 | json->input_p--; | |
860 | return RJSON_DONE; | |
861 | } | |
862 | ||
863 | /* read value for current token */ | |
864 | read_value: | |
865 | json->input_p = p; | |
866 | stack->count++; | |
867 | /* This is optimal when there are many strings, otherwise a switch statement | |
868 | * or a function pointer table is better (depending on compiler/cpu) */ | |
869 | if (tok == _rJSON_TOK_STRING) | |
870 | return _rjson_read_string(json); | |
871 | else if (tok == _rJSON_TOK_NUMBER) | |
872 | return _rjson_read_number(json); | |
873 | else if (tok == _rJSON_TOK_OBJECT) | |
874 | return _rjson_push_stack(json, _rJSON_TOK_OBJECT); | |
875 | else if (tok == _rJSON_TOK_ARRAY) | |
876 | return _rjson_push_stack(json, _rJSON_TOK_ARRAY); | |
877 | else if (tok == _rJSON_TOK_TRUE) | |
878 | return _rjson_read_name(json, "rue", RJSON_TRUE); | |
879 | else if (tok == _rJSON_TOK_FALSE) | |
880 | return _rjson_read_name(json, "alse", RJSON_FALSE); | |
881 | else if (tok == _rJSON_TOK_NULL) | |
882 | return _rjson_read_name(json, "ull", RJSON_NULL); | |
883 | else return _rjson_error_token(json, | |
884 | "unexpected %s in value", (enum _rjson_token)tok); | |
885 | } | |
886 | } | |
887 | return RJSON_ERROR; | |
888 | } | |
889 | ||
890 | void _rjson_setup(rjson_t *json, rjson_io_t io, void *user_data, int input_len) | |
891 | { | |
892 | json->io = io; | |
893 | json->user_data = user_data; | |
894 | json->input_len = input_len; | |
895 | json->input_p = json->input_end = json->input_buf + input_len; | |
896 | ||
897 | json->stack = json->inline_stack; | |
898 | json->stack_top = json->stack; | |
899 | json->stack_top->type = RJSON_DONE; | |
900 | json->stack_top->count = 0; | |
901 | json->stack_cap = (unsigned int)(sizeof(json->inline_stack) / sizeof(json->inline_stack[0])); | |
902 | json->stack_max = (unsigned int)50; | |
903 | ||
904 | json->string = json->inline_string; | |
905 | json->string_pass_through = NULL; | |
906 | json->string_len = 0; | |
907 | json->string_cap = sizeof(json->inline_string); | |
908 | ||
909 | json->source_line = 1; | |
910 | json->source_column_p = json->input_p; | |
911 | json->option_flags = 0; | |
912 | json->decimal_sep = 0; | |
913 | } | |
914 | ||
915 | rjson_t *rjson_open_user(rjson_io_t io, void *user_data, int io_block_size) | |
916 | { | |
917 | rjson_t* json = (rjson_t*)malloc( | |
918 | sizeof(rjson_t) - sizeof(((rjson_t*)0)->input_buf) + io_block_size); | |
919 | if (json) _rjson_setup(json, io, user_data, io_block_size); | |
920 | return json; | |
921 | } | |
922 | ||
923 | static int _rjson_buffer_io(void* buf, int len, void *user) | |
924 | { | |
925 | const char **ud = (const char **)user; | |
926 | if (ud[1] - ud[0] < len) len = (int)(ud[1] - ud[0]); | |
927 | memcpy(buf, ud[0], len); | |
928 | ud[0] += len; | |
929 | return len; | |
930 | } | |
931 | ||
932 | rjson_t *rjson_open_buffer(const void *buffer, size_t size) | |
933 | { | |
934 | rjson_t *json = (rjson_t *)malloc(sizeof(rjson_t) + sizeof(const char *)*2); | |
935 | const char **ud = (const char **)(json + 1); | |
936 | if (!json) | |
937 | return NULL; | |
938 | ud[0] = (const char *)buffer; | |
939 | ud[1] = ud[0] + size; | |
940 | _rjson_setup(json, _rjson_buffer_io, (void*)ud, sizeof(json->input_buf)); | |
941 | return json; | |
942 | } | |
943 | ||
944 | rjson_t *rjson_open_string(const char *string, size_t len) | |
945 | { | |
946 | return rjson_open_buffer(string, len); | |
947 | } | |
948 | ||
949 | static int _rjson_stream_io(void* buf, int len, void *user) | |
950 | { | |
951 | return (int)intfstream_read((intfstream_t*)user, buf, (uint64_t)len); | |
952 | } | |
953 | ||
954 | rjson_t *rjson_open_stream(struct intfstream_internal *stream) | |
955 | { | |
956 | /* Allocate an input buffer based on the file size */ | |
957 | int64_t size = intfstream_get_size(stream); | |
958 | int io_size = | |
959 | (size > 1024*1024 ? 4096 : | |
960 | (size > 256*1024 ? 2048 : 1024)); | |
961 | return rjson_open_user(_rjson_stream_io, stream, io_size); | |
962 | } | |
963 | ||
964 | static int _rjson_rfile_io(void* buf, int len, void *user) | |
965 | { | |
966 | return (int)filestream_read((RFILE*)user, buf, (int64_t)len); | |
967 | } | |
968 | ||
969 | rjson_t *rjson_open_rfile(RFILE *rfile) | |
970 | { | |
971 | /* Allocate an input buffer based on the file size */ | |
972 | int64_t size = filestream_get_size(rfile); | |
973 | int io_size = | |
974 | (size > 1024*1024 ? 4096 : | |
975 | (size > 256*1024 ? 2048 : 1024)); | |
976 | return rjson_open_user(_rjson_rfile_io, rfile, io_size); | |
977 | } | |
978 | ||
979 | void rjson_set_options(rjson_t *json, char rjson_option_flags) | |
980 | { | |
981 | json->option_flags = rjson_option_flags; | |
982 | } | |
983 | ||
984 | void rjson_set_max_depth(rjson_t *json, unsigned int max_depth) | |
985 | { | |
986 | json->stack_max = max_depth; | |
987 | } | |
988 | ||
989 | const char *rjson_get_string(rjson_t *json, size_t *length) | |
990 | { | |
991 | char* str = (json->string_pass_through | |
992 | ? json->string_pass_through : json->string); | |
993 | if (length) | |
994 | *length = json->string_len; | |
995 | str[json->string_len] = '\0'; | |
996 | return str; | |
997 | } | |
998 | ||
999 | double rjson_get_double(rjson_t *json) | |
1000 | { | |
1001 | char* str = (json->string_pass_through ? json->string_pass_through : json->string); | |
1002 | str[json->string_len] = '\0'; | |
1003 | if (json->decimal_sep != '.') | |
1004 | { | |
1005 | /* handle locale that uses a non-standard decimal separator */ | |
1006 | char *p; | |
1007 | if (json->decimal_sep == 0) | |
1008 | { | |
1009 | char test[4]; | |
1010 | snprintf(test, sizeof(test), "%.1f", 0.0f); | |
1011 | json->decimal_sep = test[1]; | |
1012 | } | |
1013 | if (json->decimal_sep != '.' && (p = strchr(str, '.')) != NULL) | |
1014 | { | |
1015 | double res; | |
1016 | *p = json->decimal_sep; | |
1017 | res = atof(str); | |
1018 | *p = '.'; | |
1019 | return res; | |
1020 | } | |
1021 | } | |
1022 | return atof(str); | |
1023 | } | |
1024 | ||
1025 | int rjson_get_int(rjson_t *json) | |
1026 | { | |
1027 | char* str = (json->string_pass_through ? json->string_pass_through : json->string); | |
1028 | str[json->string_len] = '\0'; | |
1029 | return atoi(str); | |
1030 | } | |
1031 | ||
1032 | const char *rjson_get_error(rjson_t *json) | |
1033 | { | |
1034 | return (json->stack_top->type == RJSON_ERROR ? json->error_text : ""); | |
1035 | } | |
1036 | ||
1037 | void rjson_set_error(rjson_t *json, const char* error) | |
1038 | { | |
1039 | _rjson_error(json, "%s", error); | |
1040 | } | |
1041 | ||
1042 | size_t rjson_get_source_line(rjson_t *json) | |
1043 | { | |
1044 | return json->source_line; | |
1045 | } | |
1046 | ||
1047 | size_t rjson_get_source_column(rjson_t *json) | |
1048 | { | |
1049 | return (json->input_p == json->source_column_p ? 1 : | |
1050 | json->input_p - json->source_column_p); | |
1051 | } | |
1052 | ||
1053 | int rjson_get_source_context_len(rjson_t *json) | |
1054 | { | |
1055 | const unsigned char *from = json->input_buf, *to = json->input_end, *p = json->input_p; | |
1056 | return (int)(((p + 256 < to ? p + 256 : to) - (p > from + 256 ? p - 256 : from))); | |
1057 | } | |
1058 | ||
1059 | const char* rjson_get_source_context_buf(rjson_t *json) | |
1060 | { | |
1061 | /* inside the input buffer, some " may have been replaced with \0. */ | |
1062 | const unsigned char *p = json->input_p, *from = json->input_buf; | |
1063 | unsigned char *i = json->input_buf; | |
1064 | for (; i != json->input_end; i++) | |
1065 | { | |
1066 | if (*i == '\0') | |
1067 | *i = '"'; | |
1068 | } | |
1069 | return (const char*)(p > from + 256 ? p - 256 : from); | |
1070 | } | |
1071 | ||
1072 | bool rjson_check_context(rjson_t *json, unsigned int depth, ...) | |
1073 | { | |
1074 | va_list ap; | |
1075 | const struct _rjson_stack *stack = json->stack, *stack_top = json->stack_top; | |
1076 | if ((unsigned int)(stack_top - stack) != depth) | |
1077 | return false; | |
1078 | va_start(ap, depth); | |
1079 | while (++stack <= stack_top) | |
1080 | { | |
1081 | if (va_arg(ap, int) == (int)stack->type) continue; | |
1082 | va_end(ap); | |
1083 | return false; | |
1084 | } | |
1085 | va_end(ap); | |
1086 | return true; | |
1087 | } | |
1088 | ||
1089 | unsigned int rjson_get_context_depth(rjson_t *json) | |
1090 | { | |
1091 | return (unsigned int)(json->stack_top - json->stack); | |
1092 | } | |
1093 | ||
1094 | size_t rjson_get_context_count(rjson_t *json) | |
1095 | { | |
1096 | return json->stack_top->count; | |
1097 | } | |
1098 | ||
1099 | enum rjson_type rjson_get_context_type(rjson_t *json) | |
1100 | { | |
1101 | return json->stack_top->type; | |
1102 | } | |
1103 | ||
1104 | void rjson_free(rjson_t *json) | |
1105 | { | |
1106 | if (json->stack != json->inline_stack) | |
1107 | free(json->stack); | |
1108 | if (json->string != json->inline_string) | |
1109 | free(json->string); | |
1110 | free(json); | |
1111 | } | |
1112 | ||
1113 | static bool _rjson_nop_default(void *context) { return true; } | |
1114 | static bool _rjson_nop_string(void *context, const char *value, size_t length) { return true; } | |
1115 | static bool _rjson_nop_bool(void *context, bool value) { return true; } | |
1116 | ||
1117 | enum rjson_type rjson_parse(rjson_t *json, void* context, | |
1118 | bool (*object_member_handler)(void *context, const char *str, size_t len), | |
1119 | bool (*string_handler )(void *context, const char *str, size_t len), | |
1120 | bool (*number_handler )(void *context, const char *str, size_t len), | |
1121 | bool (*start_object_handler )(void *context), | |
1122 | bool (*end_object_handler )(void *context), | |
1123 | bool (*start_array_handler )(void *context), | |
1124 | bool (*end_array_handler )(void *context), | |
1125 | bool (*boolean_handler )(void *context, bool value), | |
1126 | bool (*null_handler )(void *context)) | |
1127 | { | |
1128 | bool in_object = false; | |
1129 | size_t len; | |
1130 | const char* string; | |
1131 | if (!object_member_handler) object_member_handler = _rjson_nop_string; | |
1132 | if (!string_handler ) string_handler = _rjson_nop_string; | |
1133 | if (!number_handler ) number_handler = _rjson_nop_string; | |
1134 | if (!start_object_handler ) start_object_handler = _rjson_nop_default; | |
1135 | if (!end_object_handler ) end_object_handler = _rjson_nop_default; | |
1136 | if (!start_array_handler ) start_array_handler = _rjson_nop_default; | |
1137 | if (!end_array_handler ) end_array_handler = _rjson_nop_default; | |
1138 | if (!boolean_handler ) boolean_handler = _rjson_nop_bool; | |
1139 | if (!null_handler ) null_handler = _rjson_nop_default; | |
1140 | for (;;) | |
1141 | { | |
1142 | switch (rjson_next(json)) | |
1143 | { | |
1144 | case RJSON_STRING: | |
1145 | string = rjson_get_string(json, &len); | |
1146 | if (_rJSON_LIKELY( | |
1147 | (in_object && (json->stack_top->count & 1) ? | |
1148 | object_member_handler : string_handler) | |
1149 | (context, string, len))) | |
1150 | continue; | |
1151 | return RJSON_STRING; | |
1152 | case RJSON_NUMBER: | |
1153 | string = rjson_get_string(json, &len); | |
1154 | if (_rJSON_LIKELY(number_handler(context, string, len))) | |
1155 | continue; | |
1156 | return RJSON_NUMBER; | |
1157 | case RJSON_OBJECT: | |
1158 | in_object = true; | |
1159 | if (_rJSON_LIKELY(start_object_handler(context))) | |
1160 | continue; | |
1161 | return RJSON_OBJECT; | |
1162 | case RJSON_ARRAY: | |
1163 | in_object = false; | |
1164 | if (_rJSON_LIKELY(start_array_handler(context))) | |
1165 | continue; | |
1166 | return RJSON_ARRAY; | |
1167 | case RJSON_OBJECT_END: | |
1168 | if (_rJSON_LIKELY(end_object_handler(context))) | |
1169 | { | |
1170 | in_object = (json->stack_top->type == RJSON_OBJECT); | |
1171 | continue; | |
1172 | } | |
1173 | return RJSON_OBJECT_END; | |
1174 | case RJSON_ARRAY_END: | |
1175 | if (_rJSON_LIKELY(end_array_handler(context))) | |
1176 | { | |
1177 | in_object = (json->stack_top->type == RJSON_OBJECT); | |
1178 | continue; | |
1179 | } | |
1180 | return RJSON_ARRAY_END; | |
1181 | case RJSON_TRUE: | |
1182 | if (_rJSON_LIKELY(boolean_handler(context, true))) | |
1183 | continue; | |
1184 | return RJSON_TRUE; | |
1185 | case RJSON_FALSE: | |
1186 | if (_rJSON_LIKELY(boolean_handler(context, false))) | |
1187 | continue; | |
1188 | return RJSON_FALSE; | |
1189 | case RJSON_NULL: | |
1190 | if (_rJSON_LIKELY(null_handler(context))) | |
1191 | continue; | |
1192 | return RJSON_NULL; | |
1193 | case RJSON_ERROR: | |
1194 | return RJSON_ERROR; | |
1195 | case RJSON_DONE: | |
1196 | return RJSON_DONE; | |
1197 | } | |
1198 | } | |
1199 | } | |
1200 | ||
1201 | bool rjson_parse_quick(const char *string, size_t len, void* context, char option_flags, | |
1202 | bool (*object_member_handler)(void *context, const char *str, size_t len), | |
1203 | bool (*string_handler )(void *context, const char *str, size_t len), | |
1204 | bool (*number_handler )(void *context, const char *str, size_t len), | |
1205 | bool (*start_object_handler )(void *context), | |
1206 | bool (*end_object_handler )(void *context), | |
1207 | bool (*start_array_handler )(void *context), | |
1208 | bool (*end_array_handler )(void *context), | |
1209 | bool (*boolean_handler )(void *context, bool value), | |
1210 | bool (*null_handler )(void *context), | |
1211 | void (*error_handler )(void *context, int line, int col, const char* error)) | |
1212 | { | |
1213 | const char *user_data[2]; | |
1214 | rjson_t json; | |
1215 | user_data[0] = string; | |
1216 | user_data[1] = string + len; | |
1217 | _rjson_setup(&json, _rjson_buffer_io, (void*)user_data, sizeof(json.input_buf)); | |
1218 | rjson_set_options(&json, option_flags); | |
1219 | if (rjson_parse(&json, context, | |
1220 | object_member_handler, string_handler, number_handler, | |
1221 | start_object_handler, end_object_handler, | |
1222 | start_array_handler, end_array_handler, | |
1223 | boolean_handler, null_handler) == RJSON_DONE) | |
1224 | return true; | |
1225 | if (error_handler) | |
1226 | error_handler(context, | |
1227 | (int)rjson_get_source_line(&json), | |
1228 | (int)rjson_get_source_column(&json), | |
1229 | rjson_get_error(&json)); | |
1230 | return false; | |
1231 | } | |
1232 | ||
1233 | struct rjsonwriter | |
1234 | { | |
1235 | char* buf; | |
1236 | int buf_num, buf_cap; | |
1237 | ||
1238 | rjsonwriter_io_t io; | |
1239 | void *user_data; | |
1240 | ||
1241 | const char* error_text; | |
1242 | char option_flags, decimal_sep; | |
1243 | bool buf_is_output, final_flush; | |
1244 | ||
1245 | char inline_buf[1024]; | |
1246 | }; | |
1247 | ||
1248 | rjsonwriter_t *rjsonwriter_open_user(rjsonwriter_io_t io, void *user_data) | |
1249 | { | |
1250 | rjsonwriter_t* writer = (rjsonwriter_t*)malloc(sizeof(rjsonwriter_t)); | |
1251 | if (!writer) | |
1252 | return NULL; | |
1253 | ||
1254 | writer->buf = writer->inline_buf; | |
1255 | writer->buf_num = 0; | |
1256 | writer->buf_cap = sizeof(writer->inline_buf); | |
1257 | ||
1258 | writer->error_text = NULL; | |
1259 | writer->option_flags = writer->decimal_sep = 0; | |
1260 | writer->buf_is_output = writer->final_flush = false; | |
1261 | ||
1262 | writer->io = io; | |
1263 | writer->user_data = user_data; | |
1264 | ||
1265 | return writer; | |
1266 | } | |
1267 | ||
1268 | static int _rjsonwriter_stream_io(const void* buf, int len, void *user) | |
1269 | { | |
1270 | return (int)intfstream_write((intfstream_t*)user, buf, (uint64_t)len); | |
1271 | } | |
1272 | ||
1273 | rjsonwriter_t *rjsonwriter_open_stream(struct intfstream_internal *stream) | |
1274 | { | |
1275 | return rjsonwriter_open_user(_rjsonwriter_stream_io, stream); | |
1276 | } | |
1277 | ||
1278 | static int _rjsonwriter_rfile_io(const void* buf, int len, void *user) | |
1279 | { | |
1280 | return (int)filestream_write((RFILE*)user, buf, (int64_t)len); | |
1281 | } | |
1282 | ||
1283 | rjsonwriter_t *rjsonwriter_open_rfile(RFILE *rfile) | |
1284 | { | |
1285 | return rjsonwriter_open_user(_rjsonwriter_rfile_io, rfile); | |
1286 | } | |
1287 | ||
1288 | static int _rjsonwriter_memory_io(const void* buf, int len, void *user) | |
1289 | { | |
1290 | rjsonwriter_t *writer = (rjsonwriter_t *)user; | |
1291 | bool is_append = (buf != writer->buf); | |
1292 | int new_cap = writer->buf_num + (is_append ? len : 0) + 512; | |
1293 | if (!writer->final_flush && (is_append || new_cap > writer->buf_cap)) | |
1294 | { | |
1295 | bool can_realloc = (writer->buf != writer->inline_buf); | |
1296 | char* new_buf = (char*)(can_realloc ? realloc(writer->buf, new_cap) : malloc(new_cap)); | |
1297 | if (!new_buf) | |
1298 | return 0; | |
1299 | if (!can_realloc) | |
1300 | memcpy(new_buf, writer->buf, writer->buf_num); | |
1301 | if (is_append) | |
1302 | { | |
1303 | memcpy(new_buf + writer->buf_num, buf, len); | |
1304 | writer->buf_num += len; | |
1305 | } | |
1306 | writer->buf = new_buf; | |
1307 | writer->buf_cap = new_cap; | |
1308 | } | |
1309 | return len; | |
1310 | } | |
1311 | ||
1312 | rjsonwriter_t *rjsonwriter_open_memory(void) | |
1313 | { | |
1314 | rjsonwriter_t *writer = rjsonwriter_open_user(_rjsonwriter_memory_io, NULL); | |
1315 | if (!writer) | |
1316 | return NULL; | |
1317 | writer->user_data = writer; | |
1318 | writer->buf_is_output = true; | |
1319 | return writer; | |
1320 | } | |
1321 | ||
1322 | char* rjsonwriter_get_memory_buffer(rjsonwriter_t *writer, int* len) | |
1323 | { | |
1324 | if (writer->io != _rjsonwriter_memory_io || writer->error_text) | |
1325 | return NULL; | |
1326 | if (writer->buf_num == writer->buf_cap) | |
1327 | rjsonwriter_flush(writer); | |
1328 | writer->buf[writer->buf_num] = '\0'; | |
1329 | if (len) | |
1330 | *len = writer->buf_num; | |
1331 | return writer->buf; | |
1332 | } | |
1333 | ||
1334 | int rjsonwriter_count_memory_buffer(rjsonwriter_t *writer) | |
1335 | { | |
1336 | return writer->buf_num; | |
1337 | } | |
1338 | ||
1339 | void rjsonwriter_erase_memory_buffer(rjsonwriter_t *writer, int keep_len) | |
1340 | { | |
1341 | if (keep_len <= writer->buf_num) | |
1342 | writer->buf_num = (keep_len < 0 ? 0 : keep_len); | |
1343 | } | |
1344 | ||
1345 | bool rjsonwriter_free(rjsonwriter_t *writer) | |
1346 | { | |
1347 | bool res; | |
1348 | writer->final_flush = true; | |
1349 | res = rjsonwriter_flush(writer); | |
1350 | if (writer->buf != writer->inline_buf) | |
1351 | free(writer->buf); | |
1352 | free(writer); | |
1353 | return res; | |
1354 | } | |
1355 | ||
1356 | void rjsonwriter_set_options(rjsonwriter_t *writer, int rjsonwriter_option_flags) | |
1357 | { | |
1358 | writer->option_flags = rjsonwriter_option_flags; | |
1359 | } | |
1360 | ||
1361 | bool rjsonwriter_flush(rjsonwriter_t *writer) | |
1362 | { | |
1363 | if (writer->buf_num && !writer->error_text && writer->io(writer->buf, | |
1364 | writer->buf_num, writer->user_data) != writer->buf_num) | |
1365 | writer->error_text = "output error"; | |
1366 | if (!writer->buf_is_output || writer->error_text) | |
1367 | writer->buf_num = 0; | |
1368 | return !writer->error_text; | |
1369 | } | |
1370 | ||
1371 | const char *rjsonwriter_get_error(rjsonwriter_t *writer) | |
1372 | { | |
1373 | return (writer->error_text ? writer->error_text : ""); | |
1374 | } | |
1375 | ||
1376 | void rjsonwriter_raw(rjsonwriter_t *writer, const char *buf, int len) | |
1377 | { | |
1378 | if (writer->buf_num + len > writer->buf_cap) | |
1379 | rjsonwriter_flush(writer); | |
1380 | if (len == 1) | |
1381 | { | |
1382 | if (buf[0] > ' ' || | |
1383 | !(writer->option_flags & RJSONWRITER_OPTION_SKIP_WHITESPACE)) | |
1384 | writer->buf[writer->buf_num++] = buf[0]; | |
1385 | } | |
1386 | else | |
1387 | { | |
1388 | int add = writer->buf_cap - writer->buf_num; | |
1389 | if (add > len) | |
1390 | add = len; | |
1391 | memcpy(writer->buf + writer->buf_num, buf, add); | |
1392 | writer->buf_num += add; | |
1393 | if (len == add) | |
1394 | return; | |
1395 | rjsonwriter_flush(writer); | |
1396 | len -= add; | |
1397 | buf += add; | |
1398 | if (writer->buf_num + len <= writer->buf_cap) | |
1399 | { | |
1400 | memcpy(writer->buf + writer->buf_num, buf, len); | |
1401 | writer->buf_num += len; | |
1402 | } | |
1403 | else if (writer->io(buf, len, writer->user_data) != len) | |
1404 | writer->error_text = "output error"; | |
1405 | } | |
1406 | } | |
1407 | ||
1408 | void rjsonwriter_rawf(rjsonwriter_t *writer, const char *fmt, ...) | |
1409 | { | |
1410 | int available, need; | |
1411 | va_list ap, ap2; | |
1412 | if (writer->buf_num >= writer->buf_cap - 16) | |
1413 | rjsonwriter_flush(writer); | |
1414 | available = (writer->buf_cap - writer->buf_num); | |
1415 | va_start(ap, fmt); | |
1416 | need = vsnprintf(writer->buf + writer->buf_num, available, fmt, ap); | |
1417 | va_end(ap); | |
1418 | if (need <= 0) | |
1419 | return; | |
1420 | if (need < available) | |
1421 | { | |
1422 | writer->buf_num += need; | |
1423 | return; | |
1424 | } | |
1425 | rjsonwriter_flush(writer); | |
1426 | if (writer->buf_num + need >= writer->buf_cap) | |
1427 | { | |
1428 | int newcap = writer->buf_num + need + 1; | |
1429 | char* newbuf = (char*)malloc(newcap); | |
1430 | if (!newbuf) | |
1431 | { | |
1432 | if (!writer->error_text) | |
1433 | writer->error_text = "out of memory"; | |
1434 | return; | |
1435 | } | |
1436 | if (writer->buf_num) | |
1437 | memcpy(newbuf, writer->buf, writer->buf_num); | |
1438 | if (writer->buf != writer->inline_buf) | |
1439 | free(writer->buf); | |
1440 | writer->buf = newbuf; | |
1441 | writer->buf_cap = newcap; | |
1442 | } | |
1443 | va_start(ap2, fmt); | |
1444 | vsnprintf(writer->buf + writer->buf_num, writer->buf_cap - writer->buf_num, fmt, ap2); | |
1445 | va_end(ap2); | |
1446 | writer->buf_num += need; | |
1447 | } | |
1448 | ||
1449 | void _rjsonwriter_add_escaped(rjsonwriter_t *writer, unsigned char c) | |
1450 | { | |
1451 | char esc_buf[8], esc_len = 2; | |
1452 | const char* esc; | |
1453 | switch (c) | |
1454 | { | |
1455 | case '\b': | |
1456 | esc = "\\b"; | |
1457 | break; | |
1458 | case '\t': | |
1459 | esc = "\\t"; | |
1460 | break; | |
1461 | case '\n': | |
1462 | esc = "\\n"; | |
1463 | break; | |
1464 | case '\f': | |
1465 | esc = "\\f"; | |
1466 | break; | |
1467 | case '\r': | |
1468 | esc = "\\r"; | |
1469 | break; | |
1470 | case '\"': | |
1471 | esc = "\\\""; | |
1472 | break; | |
1473 | case '\\': | |
1474 | esc = "\\\\"; | |
1475 | break; | |
1476 | case '/': | |
1477 | esc = "\\/"; | |
1478 | break; | |
1479 | default: | |
1480 | snprintf(esc_buf, sizeof(esc_buf), "\\u%04x", c); | |
1481 | esc = esc_buf; | |
1482 | esc_len = 6; | |
1483 | } | |
1484 | rjsonwriter_raw(writer, esc, esc_len); | |
1485 | } | |
1486 | ||
1487 | void rjsonwriter_add_string(rjsonwriter_t *writer, const char *value) | |
1488 | { | |
1489 | const char *p = (const char*)value, *raw = p; | |
1490 | unsigned char c; | |
1491 | rjsonwriter_raw(writer, "\"", 1); | |
1492 | if (!p) | |
1493 | goto string_end; | |
1494 | while ((c = (unsigned char)*p++) != '\0') | |
1495 | { | |
1496 | /* forward slash is special, it should be escaped if the previous character | |
1497 | * was a < (intended to avoid having </script> html tags in JSON files) */ | |
1498 | if ( c >= 0x20 && c != '\"' && c != '\\' && | |
1499 | (c != '/' || p < value + 2 || p[-2] != '<')) | |
1500 | continue; | |
1501 | if (raw != p - 1) | |
1502 | rjsonwriter_raw(writer, raw, (int)(p - 1 - raw)); | |
1503 | _rjsonwriter_add_escaped(writer, c); | |
1504 | raw = p; | |
1505 | } | |
1506 | if (raw != p - 1) | |
1507 | rjsonwriter_raw(writer, raw, (int)(p - 1 - raw)); | |
1508 | string_end: | |
1509 | rjsonwriter_raw(writer, "\"", 1); | |
1510 | } | |
1511 | ||
1512 | void rjsonwriter_add_string_len(rjsonwriter_t *writer, const char *value, int len) | |
1513 | { | |
1514 | const char *p = (const char*)value, *raw = p, *end = p + len; | |
1515 | rjsonwriter_raw(writer, "\"", 1); | |
1516 | while (p != end) | |
1517 | { | |
1518 | unsigned char c = (unsigned char)*p++; | |
1519 | if ( c >= 0x20 && c != '\"' && c != '\\' | |
1520 | && (c != '/' || p < value + 2 || p[-2] != '<')) | |
1521 | continue; | |
1522 | if (raw != p - 1) | |
1523 | rjsonwriter_raw(writer, raw, (int)(p - 1 - raw)); | |
1524 | _rjsonwriter_add_escaped(writer, c); | |
1525 | raw = p; | |
1526 | } | |
1527 | if (raw != end) | |
1528 | rjsonwriter_raw(writer, raw, (int)(end - raw)); | |
1529 | rjsonwriter_raw(writer, "\"", 1); | |
1530 | } | |
1531 | ||
1532 | void rjsonwriter_add_double(rjsonwriter_t *writer, double value) | |
1533 | { | |
1534 | int old_buf_num = writer->buf_num; | |
1535 | rjsonwriter_rawf(writer, "%G", value); | |
1536 | if (writer->decimal_sep != '.') | |
1537 | { | |
1538 | /* handle locale that uses a non-standard decimal separator */ | |
1539 | char *p, *str; | |
1540 | if (writer->decimal_sep == 0) | |
1541 | { | |
1542 | char test[4]; | |
1543 | snprintf(test, sizeof(test), "%.1f", 0.0f); | |
1544 | if ((writer->decimal_sep = test[1]) == '.') | |
1545 | return; | |
1546 | } | |
1547 | str = writer->buf + (old_buf_num > writer->buf_num ? 0 : old_buf_num); | |
1548 | if ((p = strchr(str, writer->decimal_sep)) != NULL) | |
1549 | *p = '.'; | |
1550 | } | |
1551 | } | |
1552 | ||
1553 | void rjsonwriter_add_spaces(rjsonwriter_t *writer, int count) | |
1554 | { | |
1555 | if (!(writer->option_flags & RJSONWRITER_OPTION_SKIP_WHITESPACE)) | |
1556 | for (; count > 0; count -= 8) | |
1557 | rjsonwriter_raw(writer, " ", (count > 8 ? 8 : count)); | |
1558 | } | |
1559 | ||
1560 | void rjsonwriter_add_tabs(rjsonwriter_t *writer, int count) | |
1561 | { | |
1562 | if (!(writer->option_flags & RJSONWRITER_OPTION_SKIP_WHITESPACE)) | |
1563 | for (; count > 0; count -= 8) | |
1564 | rjsonwriter_raw(writer, "\t\t\t\t\t\t\t\t", (count > 8 ? 8 : count)); | |
1565 | } | |
1566 | ||
1567 | #undef _rJSON_EOF | |
1568 | #undef _rJSON_LIKELY |