]> git.ozlabs.org Git - ccan/blob - ccan/json/json.c
htable: fix tools/speed.
[ccan] / ccan / json / json.c
1 /*
2   Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
3   All rights reserved.
4
5   Permission is hereby granted, free of charge, to any person obtaining a copy
6   of this software and associated documentation files (the "Software"), to deal
7   in the Software without restriction, including without limitation the rights
8   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9   copies of the Software, and to permit persons to whom the Software is
10   furnished to do so, subject to the following conditions:
11
12   The above copyright notice and this permission notice shall be included in
13   all copies or substantial portions of the Software.
14
15   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21   THE SOFTWARE.
22 */
23
24 #include "json.h"
25
26 #include <assert.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #define out_of_memory() do {                    \
33                 fprintf(stderr, "Out of memory.\n");    \
34                 exit(EXIT_FAILURE);                     \
35         } while (0)
36
37 /* Sadly, strdup is not portable. */
38 static char *json_strdup(const char *str)
39 {
40         char *ret = (char*) malloc(strlen(str) + 1);
41         if (ret == NULL)
42                 out_of_memory();
43         strcpy(ret, str);
44         return ret;
45 }
46
47 /* String buffer */
48
49 typedef struct
50 {
51         char *cur;
52         char *end;
53         char *start;
54 } SB;
55
56 static void sb_init(SB *sb)
57 {
58         sb->start = (char*) malloc(17);
59         if (sb->start == NULL)
60                 out_of_memory();
61         sb->cur = sb->start;
62         sb->end = sb->start + 16;
63 }
64
65 /* sb and need may be evaluated multiple times. */
66 #define sb_need(sb, need) do {                  \
67                 if ((sb)->end - (sb)->cur < (need))     \
68                         sb_grow(sb, need);                  \
69         } while (0)
70
71 static void sb_grow(SB *sb, int need)
72 {
73         size_t length = sb->cur - sb->start;
74         size_t alloc = sb->end - sb->start;
75         
76         do {
77                 alloc *= 2;
78         } while (alloc < length + need);
79         
80         sb->start = (char*) realloc(sb->start, alloc + 1);
81         if (sb->start == NULL)
82                 out_of_memory();
83         sb->cur = sb->start + length;
84         sb->end = sb->start + alloc;
85 }
86
87 static void sb_put(SB *sb, const char *bytes, int count)
88 {
89         sb_need(sb, count);
90         memcpy(sb->cur, bytes, count);
91         sb->cur += count;
92 }
93
94 #define sb_putc(sb, c) do {         \
95                 if ((sb)->cur >= (sb)->end) \
96                         sb_grow(sb, 1);         \
97                 *(sb)->cur++ = (c);         \
98         } while (0)
99
100 static void sb_puts(SB *sb, const char *str)
101 {
102         sb_put(sb, str, strlen(str));
103 }
104
105 static char *sb_finish(SB *sb)
106 {
107         *sb->cur = 0;
108         assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start));
109         return sb->start;
110 }
111
112 static void sb_free(SB *sb)
113 {
114         free(sb->start);
115 }
116
117 /*
118  * Unicode helper functions
119  *
120  * These are taken from the ccan/charset module and customized a bit.
121  * Putting them here means the compiler can (choose to) inline them,
122  * and it keeps ccan/json from having a dependency.
123  */
124
125 /*
126  * Type for Unicode codepoints.
127  * We need our own because wchar_t might be 16 bits.
128  */
129 typedef uint32_t uchar_t;
130
131 /*
132  * Validate a single UTF-8 character starting at @s.
133  * The string must be null-terminated.
134  *
135  * If it's valid, return its length (1 thru 4).
136  * If it's invalid or clipped, return 0.
137  *
138  * This function implements the syntax given in RFC3629, which is
139  * the same as that given in The Unicode Standard, Version 6.0.
140  *
141  * It has the following properties:
142  *
143  *  * All codepoints U+0000..U+10FFFF may be encoded,
144  *    except for U+D800..U+DFFF, which are reserved
145  *    for UTF-16 surrogate pair encoding.
146  *  * UTF-8 byte sequences longer than 4 bytes are not permitted,
147  *    as they exceed the range of Unicode.
148  *  * The sixty-six Unicode "non-characters" are permitted
149  *    (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF).
150  */
151 static int utf8_validate_cz(const char *s)
152 {
153         unsigned char c = *s++;
154         
155         if (c <= 0x7F) {        /* 00..7F */
156                 return 1;
157         } else if (c <= 0xC1) { /* 80..C1 */
158                 /* Disallow overlong 2-byte sequence. */
159                 return 0;
160         } else if (c <= 0xDF) { /* C2..DF */
161                 /* Make sure subsequent byte is in the range 0x80..0xBF. */
162                 if (((unsigned char)*s++ & 0xC0) != 0x80)
163                         return 0;
164                 
165                 return 2;
166         } else if (c <= 0xEF) { /* E0..EF */
167                 /* Disallow overlong 3-byte sequence. */
168                 if (c == 0xE0 && (unsigned char)*s < 0xA0)
169                         return 0;
170                 
171                 /* Disallow U+D800..U+DFFF. */
172                 if (c == 0xED && (unsigned char)*s > 0x9F)
173                         return 0;
174                 
175                 /* Make sure subsequent bytes are in the range 0x80..0xBF. */
176                 if (((unsigned char)*s++ & 0xC0) != 0x80)
177                         return 0;
178                 if (((unsigned char)*s++ & 0xC0) != 0x80)
179                         return 0;
180                 
181                 return 3;
182         } else if (c <= 0xF4) { /* F0..F4 */
183                 /* Disallow overlong 4-byte sequence. */
184                 if (c == 0xF0 && (unsigned char)*s < 0x90)
185                         return 0;
186                 
187                 /* Disallow codepoints beyond U+10FFFF. */
188                 if (c == 0xF4 && (unsigned char)*s > 0x8F)
189                         return 0;
190                 
191                 /* Make sure subsequent bytes are in the range 0x80..0xBF. */
192                 if (((unsigned char)*s++ & 0xC0) != 0x80)
193                         return 0;
194                 if (((unsigned char)*s++ & 0xC0) != 0x80)
195                         return 0;
196                 if (((unsigned char)*s++ & 0xC0) != 0x80)
197                         return 0;
198                 
199                 return 4;
200         } else {                /* F5..FF */
201                 return 0;
202         }
203 }
204
205 /* Validate a null-terminated UTF-8 string. */
206 static bool utf8_validate(const char *s)
207 {
208         int len;
209         
210         for (; *s != 0; s += len) {
211                 len = utf8_validate_cz(s);
212                 if (len == 0)
213                         return false;
214         }
215         
216         return true;
217 }
218
219 /*
220  * Read a single UTF-8 character starting at @s,
221  * returning the length, in bytes, of the character read.
222  *
223  * This function assumes input is valid UTF-8,
224  * and that there are enough characters in front of @s.
225  */
226 static int utf8_read_char(const char *s, uchar_t *out)
227 {
228         const unsigned char *c = (const unsigned char*) s;
229         
230         assert(utf8_validate_cz(s));
231
232         if (c[0] <= 0x7F) {
233                 /* 00..7F */
234                 *out = c[0];
235                 return 1;
236         } else if (c[0] <= 0xDF) {
237                 /* C2..DF (unless input is invalid) */
238                 *out = ((uchar_t)c[0] & 0x1F) << 6 |
239                        ((uchar_t)c[1] & 0x3F);
240                 return 2;
241         } else if (c[0] <= 0xEF) {
242                 /* E0..EF */
243                 *out = ((uchar_t)c[0] &  0xF) << 12 |
244                        ((uchar_t)c[1] & 0x3F) << 6  |
245                        ((uchar_t)c[2] & 0x3F);
246                 return 3;
247         } else {
248                 /* F0..F4 (unless input is invalid) */
249                 *out = ((uchar_t)c[0] &  0x7) << 18 |
250                        ((uchar_t)c[1] & 0x3F) << 12 |
251                        ((uchar_t)c[2] & 0x3F) << 6  |
252                        ((uchar_t)c[3] & 0x3F);
253                 return 4;
254         }
255 }
256
257 /*
258  * Write a single UTF-8 character to @s,
259  * returning the length, in bytes, of the character written.
260  *
261  * @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF.
262  *
263  * This function will write up to 4 bytes to @out.
264  */
265 static int utf8_write_char(uchar_t unicode, char *out)
266 {
267         unsigned char *o = (unsigned char*) out;
268         
269         assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF));
270
271         if (unicode <= 0x7F) {
272                 /* U+0000..U+007F */
273                 *o++ = unicode;
274                 return 1;
275         } else if (unicode <= 0x7FF) {
276                 /* U+0080..U+07FF */
277                 *o++ = 0xC0 | unicode >> 6;
278                 *o++ = 0x80 | (unicode & 0x3F);
279                 return 2;
280         } else if (unicode <= 0xFFFF) {
281                 /* U+0800..U+FFFF */
282                 *o++ = 0xE0 | unicode >> 12;
283                 *o++ = 0x80 | (unicode >> 6 & 0x3F);
284                 *o++ = 0x80 | (unicode & 0x3F);
285                 return 3;
286         } else {
287                 /* U+10000..U+10FFFF */
288                 *o++ = 0xF0 | unicode >> 18;
289                 *o++ = 0x80 | (unicode >> 12 & 0x3F);
290                 *o++ = 0x80 | (unicode >> 6 & 0x3F);
291                 *o++ = 0x80 | (unicode & 0x3F);
292                 return 4;
293         }
294 }
295
296 /*
297  * Compute the Unicode codepoint of a UTF-16 surrogate pair.
298  *
299  * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF.
300  * If they aren't, this function returns false.
301  */
302 static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uchar_t *unicode)
303 {
304         if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) {
305                 *unicode = 0x10000 + ((((uchar_t)uc & 0x3FF) << 10) | (lc & 0x3FF));
306                 return true;
307         } else {
308                 return false;
309         }
310 }
311
312 /*
313  * Construct a UTF-16 surrogate pair given a Unicode codepoint.
314  *
315  * @unicode must be U+10000..U+10FFFF.
316  */
317 static void to_surrogate_pair(uchar_t unicode, uint16_t *uc, uint16_t *lc)
318 {
319         uchar_t n;
320         
321         assert(unicode >= 0x10000 && unicode <= 0x10FFFF);
322         
323         n = unicode - 0x10000;
324         *uc = ((n >> 10) & 0x3FF) | 0xD800;
325         *lc = (n & 0x3FF) | 0xDC00;
326 }
327
328 #define is_space(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || (c) == ' ')
329 #define is_digit(c) ((c) >= '0' && (c) <= '9')
330
331 static bool parse_value     (const char **sp, JsonNode        **out);
332 static bool parse_string    (const char **sp, char            **out);
333 static bool parse_number    (const char **sp, double           *out);
334 static bool parse_array     (const char **sp, JsonNode        **out);
335 static bool parse_object    (const char **sp, JsonNode        **out);
336 static bool parse_hex16     (const char **sp, uint16_t         *out);
337
338 static bool expect_literal  (const char **sp, const char *str);
339 static void skip_space      (const char **sp);
340
341 static void emit_value              (SB *out, const JsonNode *node);
342 static void emit_value_indented     (SB *out, const JsonNode *node, const char *space, int indent_level);
343 static void emit_string             (SB *out, const char *str);
344 static void emit_number             (SB *out, double num);
345 static void emit_array              (SB *out, const JsonNode *array);
346 static void emit_array_indented     (SB *out, const JsonNode *array, const char *space, int indent_level);
347 static void emit_object             (SB *out, const JsonNode *object);
348 static void emit_object_indented    (SB *out, const JsonNode *object, const char *space, int indent_level);
349
350 static int write_hex16(char *out, uint16_t val);
351
352 static JsonNode *mknode(JsonTag tag);
353 static void append_node(JsonNode *parent, JsonNode *child);
354 static void prepend_node(JsonNode *parent, JsonNode *child);
355 static void append_member(JsonNode *object, char *key, JsonNode *value);
356
357 /* Assertion-friendly validity checks */
358 static bool tag_is_valid(unsigned int tag);
359 static bool number_is_valid(const char *num);
360
361 JsonNode *json_decode(const char *json)
362 {
363         const char *s = json;
364         JsonNode *ret;
365         
366         skip_space(&s);
367         if (!parse_value(&s, &ret))
368                 return NULL;
369         
370         skip_space(&s);
371         if (*s != 0) {
372                 json_delete(ret);
373                 return NULL;
374         }
375         
376         return ret;
377 }
378
379 char *json_encode(const JsonNode *node)
380 {
381         return json_stringify(node, NULL);
382 }
383
384 char *json_encode_string(const char *str)
385 {
386         SB sb;
387         sb_init(&sb);
388         
389         emit_string(&sb, str);
390         
391         return sb_finish(&sb);
392 }
393
394 char *json_stringify(const JsonNode *node, const char *space)
395 {
396         SB sb;
397         sb_init(&sb);
398         
399         if (space != NULL)
400                 emit_value_indented(&sb, node, space, 0);
401         else
402                 emit_value(&sb, node);
403         
404         return sb_finish(&sb);
405 }
406
407 void json_delete(JsonNode *node)
408 {
409         if (node != NULL) {
410                 json_remove_from_parent(node);
411                 
412                 switch (node->tag) {
413                         case JSON_STRING:
414                                 free(node->string_);
415                                 break;
416                         case JSON_ARRAY:
417                         case JSON_OBJECT:
418                         {
419                                 JsonNode *child, *next;
420                                 for (child = node->children.head; child != NULL; child = next) {
421                                         next = child->next;
422                                         json_delete(child);
423                                 }
424                                 break;
425                         }
426                         default:;
427                 }
428                 
429                 free(node);
430         }
431 }
432
433 bool json_validate(const char *json)
434 {
435         const char *s = json;
436         
437         skip_space(&s);
438         if (!parse_value(&s, NULL))
439                 return false;
440         
441         skip_space(&s);
442         if (*s != 0)
443                 return false;
444         
445         return true;
446 }
447
448 JsonNode *json_find_element(JsonNode *array, int index)
449 {
450         JsonNode *element;
451         int i = 0;
452         
453         if (array == NULL || array->tag != JSON_ARRAY)
454                 return NULL;
455         
456         json_foreach(element, array) {
457                 if (i == index)
458                         return element;
459                 i++;
460         }
461         
462         return NULL;
463 }
464
465 JsonNode *json_find_member(JsonNode *object, const char *name)
466 {
467         JsonNode *member;
468         
469         if (object == NULL || object->tag != JSON_OBJECT)
470                 return NULL;
471         
472         json_foreach(member, object)
473                 if (strcmp(member->key, name) == 0)
474                         return member;
475         
476         return NULL;
477 }
478
479 JsonNode *json_first_child(const JsonNode *node)
480 {
481         if (node != NULL && (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT))
482                 return node->children.head;
483         return NULL;
484 }
485
486 static JsonNode *mknode(JsonTag tag)
487 {
488         JsonNode *ret = (JsonNode*) calloc(1, sizeof(JsonNode));
489         if (ret == NULL)
490                 out_of_memory();
491         ret->tag = tag;
492         return ret;
493 }
494
495 JsonNode *json_mknull(void)
496 {
497         return mknode(JSON_NULL);
498 }
499
500 JsonNode *json_mkbool(bool b)
501 {
502         JsonNode *ret = mknode(JSON_BOOL);
503         ret->bool_ = b;
504         return ret;
505 }
506
507 static JsonNode *mkstring(char *s)
508 {
509         JsonNode *ret = mknode(JSON_STRING);
510         ret->string_ = s;
511         return ret;
512 }
513
514 JsonNode *json_mkstring(const char *s)
515 {
516         return mkstring(json_strdup(s));
517 }
518
519 JsonNode *json_mknumber(double n)
520 {
521         JsonNode *node = mknode(JSON_NUMBER);
522         node->number_ = n;
523         return node;
524 }
525
526 JsonNode *json_mkarray(void)
527 {
528         return mknode(JSON_ARRAY);
529 }
530
531 JsonNode *json_mkobject(void)
532 {
533         return mknode(JSON_OBJECT);
534 }
535
536 static void append_node(JsonNode *parent, JsonNode *child)
537 {
538         child->parent = parent;
539         child->prev = parent->children.tail;
540         child->next = NULL;
541         
542         if (parent->children.tail != NULL)
543                 parent->children.tail->next = child;
544         else
545                 parent->children.head = child;
546         parent->children.tail = child;
547 }
548
549 static void prepend_node(JsonNode *parent, JsonNode *child)
550 {
551         child->parent = parent;
552         child->prev = NULL;
553         child->next = parent->children.head;
554         
555         if (parent->children.head != NULL)
556                 parent->children.head->prev = child;
557         else
558                 parent->children.tail = child;
559         parent->children.head = child;
560 }
561
562 static void append_member(JsonNode *object, char *key, JsonNode *value)
563 {
564         value->key = key;
565         append_node(object, value);
566 }
567
568 void json_append_element(JsonNode *array, JsonNode *element)
569 {
570         assert(array->tag == JSON_ARRAY);
571         assert(element->parent == NULL);
572         
573         append_node(array, element);
574 }
575
576 void json_prepend_element(JsonNode *array, JsonNode *element)
577 {
578         assert(array->tag == JSON_ARRAY);
579         assert(element->parent == NULL);
580         
581         prepend_node(array, element);
582 }
583
584 void json_append_member(JsonNode *object, const char *key, JsonNode *value)
585 {
586         assert(object->tag == JSON_OBJECT);
587         assert(value->parent == NULL);
588         
589         append_member(object, json_strdup(key), value);
590 }
591
592 void json_prepend_member(JsonNode *object, const char *key, JsonNode *value)
593 {
594         assert(object->tag == JSON_OBJECT);
595         assert(value->parent == NULL);
596         
597         value->key = json_strdup(key);
598         prepend_node(object, value);
599 }
600
601 void json_remove_from_parent(JsonNode *node)
602 {
603         JsonNode *parent = node->parent;
604         
605         if (parent != NULL) {
606                 if (node->prev != NULL)
607                         node->prev->next = node->next;
608                 else
609                         parent->children.head = node->next;
610                 if (node->next != NULL)
611                         node->next->prev = node->prev;
612                 else
613                         parent->children.tail = node->prev;
614                 
615                 free(node->key);
616                 
617                 node->parent = NULL;
618                 node->prev = node->next = NULL;
619                 node->key = NULL;
620         }
621 }
622
623 static bool parse_value(const char **sp, JsonNode **out)
624 {
625         const char *s = *sp;
626         
627         switch (*s) {
628                 case 'n':
629                         if (expect_literal(&s, "null")) {
630                                 if (out)
631                                         *out = json_mknull();
632                                 *sp = s;
633                                 return true;
634                         }
635                         return false;
636                 
637                 case 'f':
638                         if (expect_literal(&s, "false")) {
639                                 if (out)
640                                         *out = json_mkbool(false);
641                                 *sp = s;
642                                 return true;
643                         }
644                         return false;
645                 
646                 case 't':
647                         if (expect_literal(&s, "true")) {
648                                 if (out)
649                                         *out = json_mkbool(true);
650                                 *sp = s;
651                                 return true;
652                         }
653                         return false;
654                 
655                 case '"': {
656                         char *str;
657                         if (parse_string(&s, out ? &str : NULL)) {
658                                 if (out)
659                                         *out = mkstring(str);
660                                 *sp = s;
661                                 return true;
662                         }
663                         return false;
664                 }
665                 
666                 case '[':
667                         if (parse_array(&s, out)) {
668                                 *sp = s;
669                                 return true;
670                         }
671                         return false;
672                 
673                 case '{':
674                         if (parse_object(&s, out)) {
675                                 *sp = s;
676                                 return true;
677                         }
678                         return false;
679                 
680                 default: {
681                         double num;
682                         if (parse_number(&s, out ? &num : NULL)) {
683                                 if (out)
684                                         *out = json_mknumber(num);
685                                 *sp = s;
686                                 return true;
687                         }
688                         return false;
689                 }
690         }
691 }
692
693 static bool parse_array(const char **sp, JsonNode **out)
694 {
695         const char *s = *sp;
696         JsonNode *ret = out ? json_mkarray() : NULL;
697         JsonNode *element;
698         
699         if (*s++ != '[')
700                 goto failure;
701         skip_space(&s);
702         
703         if (*s == ']') {
704                 s++;
705                 goto success;
706         }
707         
708         for (;;) {
709                 if (!parse_value(&s, out ? &element : NULL))
710                         goto failure;
711                 skip_space(&s);
712                 
713                 if (out)
714                         json_append_element(ret, element);
715                 
716                 if (*s == ']') {
717                         s++;
718                         goto success;
719                 }
720                 
721                 if (*s++ != ',')
722                         goto failure;
723                 skip_space(&s);
724         }
725         
726 success:
727         *sp = s;
728         if (out)
729                 *out = ret;
730         return true;
731
732 failure:
733         json_delete(ret);
734         return false;
735 }
736
737 static bool parse_object(const char **sp, JsonNode **out)
738 {
739         const char *s = *sp;
740         JsonNode *ret = out ? json_mkobject() : NULL;
741         char *key;
742         JsonNode *value;
743         
744         if (*s++ != '{')
745                 goto failure;
746         skip_space(&s);
747         
748         if (*s == '}') {
749                 s++;
750                 goto success;
751         }
752         
753         for (;;) {
754                 if (!parse_string(&s, out ? &key : NULL))
755                         goto failure;
756                 skip_space(&s);
757                 
758                 if (*s++ != ':')
759                         goto failure_free_key;
760                 skip_space(&s);
761                 
762                 if (!parse_value(&s, out ? &value : NULL))
763                         goto failure_free_key;
764                 skip_space(&s);
765                 
766                 if (out)
767                         append_member(ret, key, value);
768                 
769                 if (*s == '}') {
770                         s++;
771                         goto success;
772                 }
773                 
774                 if (*s++ != ',')
775                         goto failure;
776                 skip_space(&s);
777         }
778         
779 success:
780         *sp = s;
781         if (out)
782                 *out = ret;
783         return true;
784
785 failure_free_key:
786         if (out)
787                 free(key);
788 failure:
789         json_delete(ret);
790         return false;
791 }
792
793 bool parse_string(const char **sp, char **out)
794 {
795         const char *s = *sp;
796         SB sb;
797         char throwaway_buffer[4];
798                 /* enough space for a UTF-8 character */
799         char *b;
800         
801         if (*s++ != '"')
802                 return false;
803         
804         if (out) {
805                 sb_init(&sb);
806                 sb_need(&sb, 4);
807                 b = sb.cur;
808         } else {
809                 b = throwaway_buffer;
810         }
811         
812         while (*s != '"') {
813                 unsigned char c = *s++;
814                 
815                 /* Parse next character, and write it to b. */
816                 if (c == '\\') {
817                         c = *s++;
818                         switch (c) {
819                                 case '"':
820                                 case '\\':
821                                 case '/':
822                                         *b++ = c;
823                                         break;
824                                 case 'b':
825                                         *b++ = '\b';
826                                         break;
827                                 case 'f':
828                                         *b++ = '\f';
829                                         break;
830                                 case 'n':
831                                         *b++ = '\n';
832                                         break;
833                                 case 'r':
834                                         *b++ = '\r';
835                                         break;
836                                 case 't':
837                                         *b++ = '\t';
838                                         break;
839                                 case 'u':
840                                 {
841                                         uint16_t uc, lc;
842                                         uchar_t unicode;
843                                         
844                                         if (!parse_hex16(&s, &uc))
845                                                 goto failed;
846                                         
847                                         if (uc >= 0xD800 && uc <= 0xDFFF) {
848                                                 /* Handle UTF-16 surrogate pair. */
849                                                 if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc))
850                                                         goto failed; /* Incomplete surrogate pair. */
851                                                 if (!from_surrogate_pair(uc, lc, &unicode))
852                                                         goto failed; /* Invalid surrogate pair. */
853                                         } else if (uc == 0) {
854                                                 /* Disallow "\u0000". */
855                                                 goto failed;
856                                         } else {
857                                                 unicode = uc;
858                                         }
859                                         
860                                         b += utf8_write_char(unicode, b);
861                                         break;
862                                 }
863                                 default:
864                                         /* Invalid escape */
865                                         goto failed;
866                         }
867                 } else if (c <= 0x1F) {
868                         /* Control characters are not allowed in string literals. */
869                         goto failed;
870                 } else {
871                         /* Validate and echo a UTF-8 character. */
872                         int len;
873                         
874                         s--;
875                         len = utf8_validate_cz(s);
876                         if (len == 0)
877                                 goto failed; /* Invalid UTF-8 character. */
878                         
879                         while (len--)
880                                 *b++ = *s++;
881                 }
882                 
883                 /*
884                  * Update sb to know about the new bytes,
885                  * and set up b to write another character.
886                  */
887                 if (out) {
888                         sb.cur = b;
889                         sb_need(&sb, 4);
890                         b = sb.cur;
891                 } else {
892                         b = throwaway_buffer;
893                 }
894         }
895         s++;
896         
897         if (out)
898                 *out = sb_finish(&sb);
899         *sp = s;
900         return true;
901
902 failed:
903         if (out)
904                 sb_free(&sb);
905         return false;
906 }
907
908 /*
909  * The JSON spec says that a number shall follow this precise pattern
910  * (spaces and quotes added for readability):
911  *       '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
912  *
913  * However, some JSON parsers are more liberal.  For instance, PHP accepts
914  * '.5' and '1.'.  JSON.parse accepts '+3'.
915  *
916  * This function takes the strict approach.
917  */
918 bool parse_number(const char **sp, double *out)
919 {
920         const char *s = *sp;
921
922         /* '-'? */
923         if (*s == '-')
924                 s++;
925
926         /* (0 | [1-9][0-9]*) */
927         if (*s == '0') {
928                 s++;
929         } else {
930                 if (!is_digit(*s))
931                         return false;
932                 do {
933                         s++;
934                 } while (is_digit(*s));
935         }
936
937         /* ('.' [0-9]+)? */
938         if (*s == '.') {
939                 s++;
940                 if (!is_digit(*s))
941                         return false;
942                 do {
943                         s++;
944                 } while (is_digit(*s));
945         }
946
947         /* ([Ee] [+-]? [0-9]+)? */
948         if (*s == 'E' || *s == 'e') {
949                 s++;
950                 if (*s == '+' || *s == '-')
951                         s++;
952                 if (!is_digit(*s))
953                         return false;
954                 do {
955                         s++;
956                 } while (is_digit(*s));
957         }
958
959         if (out)
960                 *out = strtod(*sp, NULL);
961
962         *sp = s;
963         return true;
964 }
965
966 static void skip_space(const char **sp)
967 {
968         const char *s = *sp;
969         while (is_space(*s))
970                 s++;
971         *sp = s;
972 }
973
974 static void emit_value(SB *out, const JsonNode *node)
975 {
976         assert(tag_is_valid(node->tag));
977         switch (node->tag) {
978                 case JSON_NULL:
979                         sb_puts(out, "null");
980                         break;
981                 case JSON_BOOL:
982                         sb_puts(out, node->bool_ ? "true" : "false");
983                         break;
984                 case JSON_STRING:
985                         emit_string(out, node->string_);
986                         break;
987                 case JSON_NUMBER:
988                         emit_number(out, node->number_);
989                         break;
990                 case JSON_ARRAY:
991                         emit_array(out, node);
992                         break;
993                 case JSON_OBJECT:
994                         emit_object(out, node);
995                         break;
996                 default:
997                         assert(false);
998         }
999 }
1000
1001 void emit_value_indented(SB *out, const JsonNode *node, const char *space, int indent_level)
1002 {
1003         assert(tag_is_valid(node->tag));
1004         switch (node->tag) {
1005                 case JSON_NULL:
1006                         sb_puts(out, "null");
1007                         break;
1008                 case JSON_BOOL:
1009                         sb_puts(out, node->bool_ ? "true" : "false");
1010                         break;
1011                 case JSON_STRING:
1012                         emit_string(out, node->string_);
1013                         break;
1014                 case JSON_NUMBER:
1015                         emit_number(out, node->number_);
1016                         break;
1017                 case JSON_ARRAY:
1018                         emit_array_indented(out, node, space, indent_level);
1019                         break;
1020                 case JSON_OBJECT:
1021                         emit_object_indented(out, node, space, indent_level);
1022                         break;
1023                 default:
1024                         assert(false);
1025         }
1026 }
1027
1028 static void emit_array(SB *out, const JsonNode *array)
1029 {
1030         const JsonNode *element;
1031         
1032         sb_putc(out, '[');
1033         json_foreach(element, array) {
1034                 emit_value(out, element);
1035                 if (element->next != NULL)
1036                         sb_putc(out, ',');
1037         }
1038         sb_putc(out, ']');
1039 }
1040
1041 static void emit_array_indented(SB *out, const JsonNode *array, const char *space, int indent_level)
1042 {
1043         const JsonNode *element = array->children.head;
1044         int i;
1045         
1046         if (element == NULL) {
1047                 sb_puts(out, "[]");
1048                 return;
1049         }
1050         
1051         sb_puts(out, "[\n");
1052         while (element != NULL) {
1053                 for (i = 0; i < indent_level + 1; i++)
1054                         sb_puts(out, space);
1055                 emit_value_indented(out, element, space, indent_level + 1);
1056                 
1057                 element = element->next;
1058                 sb_puts(out, element != NULL ? ",\n" : "\n");
1059         }
1060         for (i = 0; i < indent_level; i++)
1061                 sb_puts(out, space);
1062         sb_putc(out, ']');
1063 }
1064
1065 static void emit_object(SB *out, const JsonNode *object)
1066 {
1067         const JsonNode *member;
1068         
1069         sb_putc(out, '{');
1070         json_foreach(member, object) {
1071                 emit_string(out, member->key);
1072                 sb_putc(out, ':');
1073                 emit_value(out, member);
1074                 if (member->next != NULL)
1075                         sb_putc(out, ',');
1076         }
1077         sb_putc(out, '}');
1078 }
1079
1080 static void emit_object_indented(SB *out, const JsonNode *object, const char *space, int indent_level)
1081 {
1082         const JsonNode *member = object->children.head;
1083         int i;
1084         
1085         if (member == NULL) {
1086                 sb_puts(out, "{}");
1087                 return;
1088         }
1089         
1090         sb_puts(out, "{\n");
1091         while (member != NULL) {
1092                 for (i = 0; i < indent_level + 1; i++)
1093                         sb_puts(out, space);
1094                 emit_string(out, member->key);
1095                 sb_puts(out, ": ");
1096                 emit_value_indented(out, member, space, indent_level + 1);
1097                 
1098                 member = member->next;
1099                 sb_puts(out, member != NULL ? ",\n" : "\n");
1100         }
1101         for (i = 0; i < indent_level; i++)
1102                 sb_puts(out, space);
1103         sb_putc(out, '}');
1104 }
1105
1106 void emit_string(SB *out, const char *str)
1107 {
1108         bool escape_unicode = false;
1109         const char *s = str;
1110         char *b;
1111         
1112         assert(utf8_validate(str));
1113         
1114         /*
1115          * 14 bytes is enough space to write up to two
1116          * \uXXXX escapes and two quotation marks.
1117          */
1118         sb_need(out, 14);
1119         b = out->cur;
1120         
1121         *b++ = '"';
1122         while (*s != 0) {
1123                 unsigned char c = *s++;
1124                 
1125                 /* Encode the next character, and write it to b. */
1126                 switch (c) {
1127                         case '"':
1128                                 *b++ = '\\';
1129                                 *b++ = '"';
1130                                 break;
1131                         case '\\':
1132                                 *b++ = '\\';
1133                                 *b++ = '\\';
1134                                 break;
1135                         case '\b':
1136                                 *b++ = '\\';
1137                                 *b++ = 'b';
1138                                 break;
1139                         case '\f':
1140                                 *b++ = '\\';
1141                                 *b++ = 'f';
1142                                 break;
1143                         case '\n':
1144                                 *b++ = '\\';
1145                                 *b++ = 'n';
1146                                 break;
1147                         case '\r':
1148                                 *b++ = '\\';
1149                                 *b++ = 'r';
1150                                 break;
1151                         case '\t':
1152                                 *b++ = '\\';
1153                                 *b++ = 't';
1154                                 break;
1155                         default: {
1156                                 int len;
1157                                 
1158                                 s--;
1159                                 len = utf8_validate_cz(s);
1160                                 
1161                                 if (len == 0) {
1162                                         /*
1163                                          * Handle invalid UTF-8 character gracefully in production
1164                                          * by writing a replacement character (U+FFFD)
1165                                          * and skipping a single byte.
1166                                          *
1167                                          * This should never happen when assertions are enabled
1168                                          * due to the assertion at the beginning of this function.
1169                                          */
1170                                         assert(false);
1171                                         if (escape_unicode) {
1172                                                 strcpy(b, "\\uFFFD");
1173                                                 b += 6;
1174                                         } else {
1175                                                 *b++ = 0xEF;
1176                                                 *b++ = 0xBF;
1177                                                 *b++ = 0xBD;
1178                                         }
1179                                         s++;
1180                                 } else if (c < 0x1F || (c >= 0x80 && escape_unicode)) {
1181                                         /* Encode using \u.... */
1182                                         uint32_t unicode;
1183                                         
1184                                         s += utf8_read_char(s, &unicode);
1185                                         
1186                                         if (unicode <= 0xFFFF) {
1187                                                 *b++ = '\\';
1188                                                 *b++ = 'u';
1189                                                 b += write_hex16(b, unicode);
1190                                         } else {
1191                                                 /* Produce a surrogate pair. */
1192                                                 uint16_t uc, lc;
1193                                                 assert(unicode <= 0x10FFFF);
1194                                                 to_surrogate_pair(unicode, &uc, &lc);
1195                                                 *b++ = '\\';
1196                                                 *b++ = 'u';
1197                                                 b += write_hex16(b, uc);
1198                                                 *b++ = '\\';
1199                                                 *b++ = 'u';
1200                                                 b += write_hex16(b, lc);
1201                                         }
1202                                 } else {
1203                                         /* Write the character directly. */
1204                                         while (len--)
1205                                                 *b++ = *s++;
1206                                 }
1207                                 
1208                                 break;
1209                         }
1210                 }
1211         
1212                 /*
1213                  * Update *out to know about the new bytes,
1214                  * and set up b to write another encoded character.
1215                  */
1216                 out->cur = b;
1217                 sb_need(out, 14);
1218                 b = out->cur;
1219         }
1220         *b++ = '"';
1221         
1222         out->cur = b;
1223 }
1224
1225 static void emit_number(SB *out, double num)
1226 {
1227         /*
1228          * This isn't exactly how JavaScript renders numbers,
1229          * but it should produce valid JSON for reasonable numbers
1230          * preserve precision well enough, and avoid some oddities
1231          * like 0.3 -> 0.299999999999999988898 .
1232          */
1233         char buf[64];
1234         sprintf(buf, "%.16g", num);
1235         
1236         if (number_is_valid(buf))
1237                 sb_puts(out, buf);
1238         else
1239                 sb_puts(out, "null");
1240 }
1241
1242 static bool tag_is_valid(unsigned int tag)
1243 {
1244         return (/* tag >= JSON_NULL && */ tag <= JSON_OBJECT);
1245 }
1246
1247 static bool number_is_valid(const char *num)
1248 {
1249         return (parse_number(&num, NULL) && *num == '\0');
1250 }
1251
1252 static bool expect_literal(const char **sp, const char *str)
1253 {
1254         const char *s = *sp;
1255         
1256         while (*str != '\0')
1257                 if (*s++ != *str++)
1258                         return false;
1259         
1260         *sp = s;
1261         return true;
1262 }
1263
1264 /*
1265  * Parses exactly 4 hex characters (capital or lowercase).
1266  * Fails if any input chars are not [0-9A-Fa-f].
1267  */
1268 static bool parse_hex16(const char **sp, uint16_t *out)
1269 {
1270         const char *s = *sp;
1271         uint16_t ret = 0;
1272         uint16_t i;
1273         uint16_t tmp;
1274         char c;
1275
1276         for (i = 0; i < 4; i++) {
1277                 c = *s++;
1278                 if (c >= '0' && c <= '9')
1279                         tmp = c - '0';
1280                 else if (c >= 'A' && c <= 'F')
1281                         tmp = c - 'A' + 10;
1282                 else if (c >= 'a' && c <= 'f')
1283                         tmp = c - 'a' + 10;
1284                 else
1285                         return false;
1286
1287                 ret <<= 4;
1288                 ret += tmp;
1289         }
1290         
1291         if (out)
1292                 *out = ret;
1293         *sp = s;
1294         return true;
1295 }
1296
1297 /*
1298  * Encodes a 16-bit number into hexadecimal,
1299  * writing exactly 4 hex chars.
1300  */
1301 static int write_hex16(char *out, uint16_t val)
1302 {
1303         const char *hex = "0123456789ABCDEF";
1304         
1305         *out++ = hex[(val >> 12) & 0xF];
1306         *out++ = hex[(val >> 8)  & 0xF];
1307         *out++ = hex[(val >> 4)  & 0xF];
1308         *out++ = hex[ val        & 0xF];
1309         
1310         return 4;
1311 }
1312
1313 bool json_check(const JsonNode *node, char errmsg[256])
1314 {
1315         #define problem(...) do { \
1316                         if (errmsg != NULL) \
1317                                 snprintf(errmsg, 256, __VA_ARGS__); \
1318                         return false; \
1319                 } while (0)
1320         
1321         if (node->key != NULL && !utf8_validate(node->key))
1322                 problem("key contains invalid UTF-8");
1323         
1324         if (!tag_is_valid(node->tag))
1325                 problem("tag is invalid (%u)", node->tag);
1326         
1327         if (node->tag == JSON_BOOL) {
1328                 if (node->bool_ != false && node->bool_ != true)
1329                         problem("bool_ is neither false (%d) nor true (%d)", (int)false, (int)true);
1330         } else if (node->tag == JSON_STRING) {
1331                 if (node->string_ == NULL)
1332                         problem("string_ is NULL");
1333                 if (!utf8_validate(node->string_))
1334                         problem("string_ contains invalid UTF-8");
1335         } else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) {
1336                 JsonNode *head = node->children.head;
1337                 JsonNode *tail = node->children.tail;
1338                 
1339                 if (head == NULL || tail == NULL) {
1340                         if (head != NULL)
1341                                 problem("tail is NULL, but head is not");
1342                         if (tail != NULL)
1343                                 problem("head is NULL, but tail is not");
1344                 } else {
1345                         JsonNode *child;
1346                         JsonNode *last = NULL;
1347                         
1348                         if (head->prev != NULL)
1349                                 problem("First child's prev pointer is not NULL");
1350                         
1351                         for (child = head; child != NULL; last = child, child = child->next) {
1352                                 if (child == node)
1353                                         problem("node is its own child");
1354                                 if (child->next == child)
1355                                         problem("child->next == child (cycle)");
1356                                 if (child->next == head)
1357                                         problem("child->next == head (cycle)");
1358                                 
1359                                 if (child->parent != node)
1360                                         problem("child does not point back to parent");
1361                                 if (child->next != NULL && child->next->prev != child)
1362                                         problem("child->next does not point back to child");
1363                                 
1364                                 if (node->tag == JSON_ARRAY && child->key != NULL)
1365                                         problem("Array element's key is not NULL");
1366                                 if (node->tag == JSON_OBJECT && child->key == NULL)
1367                                         problem("Object member's key is NULL");
1368                                 
1369                                 if (!json_check(child, errmsg))
1370                                         return false;
1371                         }
1372                         
1373                         if (last != tail)
1374                                 problem("tail does not match pointer found by starting at head and following next links");
1375                 }
1376         }
1377         
1378         return true;
1379         
1380         #undef problem
1381 }