]> git.ozlabs.org Git - ccan/commitdiff
json: new module for parsing and generating JSON
authorJoey Adams <joeyadams3.14159@gmail.com>
Thu, 30 Jun 2011 06:39:16 +0000 (02:39 -0400)
committerRusty Russell <rusty@rustcorp.com.au>
Fri, 1 Jul 2011 01:23:24 +0000 (10:53 +0930)
12 files changed:
ccan/json/LICENSE [new symlink]
ccan/json/_info [new file with mode: 0644]
ccan/json/json.c [new file with mode: 0644]
ccan/json/json.h [new file with mode: 0644]
ccan/json/notes [new file with mode: 0644]
ccan/json/test/common.h [new file with mode: 0644]
ccan/json/test/run-construction.c [new file with mode: 0644]
ccan/json/test/run-decode-encode.c [new file with mode: 0644]
ccan/json/test/run-stringify.c [new file with mode: 0644]
ccan/json/test/run-validate.c [new file with mode: 0644]
ccan/json/test/test-strings [new file with mode: 0644]
ccan/json/test/test-strings-reencoded [new file with mode: 0644]

diff --git a/ccan/json/LICENSE b/ccan/json/LICENSE
new file mode 120000 (symlink)
index 0000000..2354d12
--- /dev/null
@@ -0,0 +1 @@
+../../licenses/BSD-MIT
\ No newline at end of file
diff --git a/ccan/json/_info b/ccan/json/_info
new file mode 100644 (file)
index 0000000..0544cf5
--- /dev/null
@@ -0,0 +1,118 @@
+#include <string.h>
+#include "config.h"
+
+/**
+ * json - Parse and generate JSON (JavaScript Object Notation)
+ *
+ * This is a library for encoding and decoding JSON that strives to be
+ * easy to learn, use, and incorporate into an application.
+ *
+ * JSON (JavaScript Object Notation) facilitates passing data among different
+ * programming languages, particularly JavaScript.  It looks like this:
+ *
+ *     [
+ *         {
+ *             "id":           1,
+ *             "firstname":    "John",
+ *             "lastname":     "Smith",
+ *             "email":        "john@example.com",
+ *             "likes_pizza":  false
+ *         },
+ *         {
+ *             "id":           2,
+ *             "firstname":    "Linda",
+ *             "lastname":     "Jones",
+ *             "email":        null,
+ *             "likes_pizza":  true
+ *         }
+ *     ]
+ *
+ * Example:
+ *     #include <ccan/json/json.h>
+ *     #include <math.h>
+ *     #include <stdio.h>
+ *     #include <stdlib.h>
+ *     
+ *     static int find_number(JsonNode *object, const char *name, double *out)
+ *     {
+ *             JsonNode *node = json_find_member(object, name);
+ *             if (node && node->tag == JSON_NUMBER) {
+ *                     *out = node->number_;
+ *                     return 1;
+ *             }
+ *             return 0;
+ *     }
+ *     
+ *     static void solve_pythagorean(JsonNode *triple)
+ *     {
+ *             double a = 0, b = 0, c = 0;
+ *             int a_given, b_given, c_given;
+ *             
+ *             if (triple->tag != JSON_OBJECT) {
+ *                     fprintf(stderr, "Error: Expected a JSON object.\n");
+ *                     exit(EXIT_FAILURE);
+ *             }
+ *             
+ *             a_given = find_number(triple, "a", &a);
+ *             b_given = find_number(triple, "b", &b);
+ *             c_given = find_number(triple, "c", &c);
+ *             
+ *             if (a_given + b_given + c_given != 2) {
+ *                     fprintf(stderr, "Error: I need two sides to compute the length of the third.\n");
+ *                     exit(EXIT_FAILURE);
+ *             }
+ *             
+ *             if (a_given && b_given) {
+ *                     c = sqrt(a*a + b*b);
+ *                     json_append_member(triple, "c", json_mknumber(c));
+ *             } else if (a_given && c_given) {
+ *                     b = sqrt(c*c - a*a);
+ *                     json_append_member(triple, "b", json_mknumber(b));
+ *             } else if (b_given && c_given) {
+ *                     a = sqrt(c*c - b*b);
+ *                     json_append_member(triple, "a", json_mknumber(a));
+ *             }
+ *     }
+ *     
+ *     int main(void)
+ *     {
+ *             JsonNode *triples = json_mkarray();
+ *             
+ *             json_append_element(triples, json_decode("{\"a\": 3, \"b\": 4}"));
+ *             json_append_element(triples, json_decode("{\"a\": 5, \"c\": 13}"));
+ *             json_append_element(triples, json_decode("{\"b\": 24, \"c\": 25}"));
+ *             
+ *             JsonNode *triple;
+ *             json_foreach(triple, triples)
+ *                     solve_pythagorean(triple);
+ *             
+ *             char *tmp = json_stringify(triples, "\t");
+ *             puts(tmp);
+ *             free(tmp);
+ *             
+ *             json_delete(triples);
+ *             return 0;
+ *     }
+ *
+ * Author: Joey Adams
+ * Version: 0.1
+ * License: MIT
+ */
+int main(int argc, char *argv[])
+{
+       /* Expect exactly one argument */
+       if (argc != 2)
+               return 1;
+
+       if (strcmp(argv[1], "depends") == 0) {
+               /* Nothing */
+               return 0;
+       }
+       
+       if (strcmp(argv[1], "libs") == 0) {
+               printf("m\n"); /* Needed for sqrt() used in example code above. */
+               return 0;
+       }
+
+       return 1;
+}
diff --git a/ccan/json/json.c b/ccan/json/json.c
new file mode 100644 (file)
index 0000000..2f0452a
--- /dev/null
@@ -0,0 +1,1381 @@
+/*
+  Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
+  All rights reserved.
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
+
+#include "json.h"
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define out_of_memory() do {                    \
+               fprintf(stderr, "Out of memory.\n");    \
+               exit(EXIT_FAILURE);                     \
+       } while (0)
+
+/* Sadly, strdup is not portable. */
+static char *json_strdup(const char *str)
+{
+       char *ret = (char*) malloc(strlen(str) + 1);
+       if (ret == NULL)
+               out_of_memory();
+       strcpy(ret, str);
+       return ret;
+}
+
+/* String buffer */
+
+typedef struct
+{
+       char *cur;
+       char *end;
+       char *start;
+} SB;
+
+static void sb_init(SB *sb)
+{
+       sb->start = (char*) malloc(17);
+       if (sb->start == NULL)
+               out_of_memory();
+       sb->cur = sb->start;
+       sb->end = sb->start + 16;
+}
+
+/* sb and need may be evaluated multiple times. */
+#define sb_need(sb, need) do {                  \
+               if ((sb)->end - (sb)->cur < (need))     \
+                       sb_grow(sb, need);                  \
+       } while (0)
+
+static void sb_grow(SB *sb, int need)
+{
+       size_t length = sb->cur - sb->start;
+       size_t alloc = sb->end - sb->start;
+       
+       do {
+               alloc *= 2;
+       } while (alloc < length + need);
+       
+       sb->start = (char*) realloc(sb->start, alloc + 1);
+       if (sb->start == NULL)
+               out_of_memory();
+       sb->cur = sb->start + length;
+       sb->end = sb->start + alloc;
+}
+
+static void sb_put(SB *sb, const char *bytes, int count)
+{
+       sb_need(sb, count);
+       memcpy(sb->cur, bytes, count);
+       sb->cur += count;
+}
+
+#define sb_putc(sb, c) do {         \
+               if ((sb)->cur >= (sb)->end) \
+                       sb_grow(sb, 1);         \
+               *(sb)->cur++ = (c);         \
+       } while (0)
+
+static void sb_puts(SB *sb, const char *str)
+{
+       sb_put(sb, str, strlen(str));
+}
+
+static char *sb_finish(SB *sb)
+{
+       *sb->cur = 0;
+       assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start));
+       return sb->start;
+}
+
+static void sb_free(SB *sb)
+{
+       free(sb->start);
+}
+
+/*
+ * Unicode helper functions
+ *
+ * These are taken from the ccan/charset module and customized a bit.
+ * Putting them here means the compiler can (choose to) inline them,
+ * and it keeps ccan/json from having a dependency.
+ */
+
+/*
+ * Type for Unicode codepoints.
+ * We need our own because wchar_t might be 16 bits.
+ */
+typedef uint32_t uchar_t;
+
+/*
+ * Validate a single UTF-8 character starting at @s.
+ * The string must be null-terminated.
+ *
+ * If it's valid, return its length (1 thru 4).
+ * If it's invalid or clipped, return 0.
+ *
+ * This function implements the syntax given in RFC3629, which is
+ * the same as that given in The Unicode Standard, Version 6.0.
+ *
+ * It has the following properties:
+ *
+ *  * All codepoints U+0000..U+10FFFF may be encoded,
+ *    except for U+D800..U+DFFF, which are reserved
+ *    for UTF-16 surrogate pair encoding.
+ *  * UTF-8 byte sequences longer than 4 bytes are not permitted,
+ *    as they exceed the range of Unicode.
+ *  * The sixty-six Unicode "non-characters" are permitted
+ *    (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF).
+ */
+static int utf8_validate_cz(const char *s)
+{
+       unsigned char c = *s++;
+       
+       if (c <= 0x7F) {        /* 00..7F */
+               return 1;
+       } else if (c <= 0xC1) { /* 80..C1 */
+               /* Disallow overlong 2-byte sequence. */
+               return 0;
+       } else if (c <= 0xDF) { /* C2..DF */
+               /* Make sure subsequent byte is in the range 0x80..0xBF. */
+               if (((unsigned char)*s++ & 0xC0) != 0x80)
+                       return 0;
+               
+               return 2;
+       } else if (c <= 0xEF) { /* E0..EF */
+               /* Disallow overlong 3-byte sequence. */
+               if (c == 0xE0 && (unsigned char)*s < 0xA0)
+                       return 0;
+               
+               /* Disallow U+D800..U+DFFF. */
+               if (c == 0xED && (unsigned char)*s > 0x9F)
+                       return 0;
+               
+               /* Make sure subsequent bytes are in the range 0x80..0xBF. */
+               if (((unsigned char)*s++ & 0xC0) != 0x80)
+                       return 0;
+               if (((unsigned char)*s++ & 0xC0) != 0x80)
+                       return 0;
+               
+               return 3;
+       } else if (c <= 0xF4) { /* F0..F4 */
+               /* Disallow overlong 4-byte sequence. */
+               if (c == 0xF0 && (unsigned char)*s < 0x90)
+                       return 0;
+               
+               /* Disallow codepoints beyond U+10FFFF. */
+               if (c == 0xF4 && (unsigned char)*s > 0x8F)
+                       return 0;
+               
+               /* Make sure subsequent bytes are in the range 0x80..0xBF. */
+               if (((unsigned char)*s++ & 0xC0) != 0x80)
+                       return 0;
+               if (((unsigned char)*s++ & 0xC0) != 0x80)
+                       return 0;
+               if (((unsigned char)*s++ & 0xC0) != 0x80)
+                       return 0;
+               
+               return 4;
+       } else {                /* F5..FF */
+               return 0;
+       }
+}
+
+/* Validate a null-terminated UTF-8 string. */
+static bool utf8_validate(const char *s)
+{
+       int len;
+       
+       for (; *s != 0; s += len) {
+               len = utf8_validate_cz(s);
+               if (len == 0)
+                       return false;
+       }
+       
+       return true;
+}
+
+/*
+ * Read a single UTF-8 character starting at @s,
+ * returning the length, in bytes, of the character read.
+ *
+ * This function assumes input is valid UTF-8,
+ * and that there are enough characters in front of @s.
+ */
+static int utf8_read_char(const char *s, uchar_t *out)
+{
+       const unsigned char *c = (const unsigned char*) s;
+       
+       assert(utf8_validate_cz(s));
+
+       if (c[0] <= 0x7F) {
+               /* 00..7F */
+               *out = c[0];
+               return 1;
+       } else if (c[0] <= 0xDF) {
+               /* C2..DF (unless input is invalid) */
+               *out = ((uchar_t)c[0] & 0x1F) << 6 |
+                      ((uchar_t)c[1] & 0x3F);
+               return 2;
+       } else if (c[0] <= 0xEF) {
+               /* E0..EF */
+               *out = ((uchar_t)c[0] &  0xF) << 12 |
+                      ((uchar_t)c[1] & 0x3F) << 6  |
+                      ((uchar_t)c[2] & 0x3F);
+               return 3;
+       } else {
+               /* F0..F4 (unless input is invalid) */
+               *out = ((uchar_t)c[0] &  0x7) << 18 |
+                      ((uchar_t)c[1] & 0x3F) << 12 |
+                      ((uchar_t)c[2] & 0x3F) << 6  |
+                      ((uchar_t)c[3] & 0x3F);
+               return 4;
+       }
+}
+
+/*
+ * Write a single UTF-8 character to @s,
+ * returning the length, in bytes, of the character written.
+ *
+ * @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF.
+ *
+ * This function will write up to 4 bytes to @out.
+ */
+static int utf8_write_char(uchar_t unicode, char *out)
+{
+       unsigned char *o = (unsigned char*) out;
+       
+       assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF));
+
+       if (unicode <= 0x7F) {
+               /* U+0000..U+007F */
+               *o++ = unicode;
+               return 1;
+       } else if (unicode <= 0x7FF) {
+               /* U+0080..U+07FF */
+               *o++ = 0xC0 | unicode >> 6;
+               *o++ = 0x80 | (unicode & 0x3F);
+               return 2;
+       } else if (unicode <= 0xFFFF) {
+               /* U+0800..U+FFFF */
+               *o++ = 0xE0 | unicode >> 12;
+               *o++ = 0x80 | (unicode >> 6 & 0x3F);
+               *o++ = 0x80 | (unicode & 0x3F);
+               return 3;
+       } else {
+               /* U+10000..U+10FFFF */
+               *o++ = 0xF0 | unicode >> 18;
+               *o++ = 0x80 | (unicode >> 12 & 0x3F);
+               *o++ = 0x80 | (unicode >> 6 & 0x3F);
+               *o++ = 0x80 | (unicode & 0x3F);
+               return 4;
+       }
+}
+
+/*
+ * Compute the Unicode codepoint of a UTF-16 surrogate pair.
+ *
+ * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF.
+ * If they aren't, this function returns false.
+ */
+static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uchar_t *unicode)
+{
+       if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) {
+               *unicode = 0x10000 + ((((uchar_t)uc & 0x3FF) << 10) | (lc & 0x3FF));
+               return true;
+       } else {
+               return false;
+       }
+}
+
+/*
+ * Construct a UTF-16 surrogate pair given a Unicode codepoint.
+ *
+ * @unicode must be U+10000..U+10FFFF.
+ */
+static void to_surrogate_pair(uchar_t unicode, uint16_t *uc, uint16_t *lc)
+{
+       uchar_t n;
+       
+       assert(unicode >= 0x10000 && unicode <= 0x10FFFF);
+       
+       n = unicode - 0x10000;
+       *uc = ((n >> 10) & 0x3FF) | 0xD800;
+       *lc = (n & 0x3FF) | 0xDC00;
+}
+
+#define is_space(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || (c) == ' ')
+#define is_digit(c) ((c) >= '0' && (c) <= '9')
+
+static bool parse_value     (const char **sp, JsonNode        **out);
+static bool parse_string    (const char **sp, char            **out);
+static bool parse_number    (const char **sp, double           *out);
+static bool parse_array     (const char **sp, JsonNode        **out);
+static bool parse_object    (const char **sp, JsonNode        **out);
+static bool parse_hex16     (const char **sp, uint16_t         *out);
+
+static bool expect_literal  (const char **sp, const char *str);
+static void skip_space      (const char **sp);
+
+static void emit_value              (SB *out, const JsonNode *node);
+static void emit_value_indented     (SB *out, const JsonNode *node, const char *space, int indent_level);
+static void emit_string             (SB *out, const char *str);
+static void emit_number             (SB *out, double num);
+static void emit_array              (SB *out, const JsonNode *array);
+static void emit_array_indented     (SB *out, const JsonNode *array, const char *space, int indent_level);
+static void emit_object             (SB *out, const JsonNode *object);
+static void emit_object_indented    (SB *out, const JsonNode *object, const char *space, int indent_level);
+
+static int write_hex16(char *out, uint16_t val);
+
+static JsonNode *mknode(JsonTag tag);
+static void append_node(JsonNode *parent, JsonNode *child);
+static void prepend_node(JsonNode *parent, JsonNode *child);
+static void append_member(JsonNode *object, char *key, JsonNode *value);
+
+/* Assertion-friendly validity checks */
+static bool tag_is_valid(unsigned int tag);
+static bool number_is_valid(const char *num);
+
+JsonNode *json_decode(const char *json)
+{
+       const char *s = json;
+       JsonNode *ret;
+       
+       skip_space(&s);
+       if (!parse_value(&s, &ret))
+               return NULL;
+       
+       skip_space(&s);
+       if (*s != 0) {
+               json_delete(ret);
+               return NULL;
+       }
+       
+       return ret;
+}
+
+char *json_encode(const JsonNode *node)
+{
+       return json_stringify(node, NULL);
+}
+
+char *json_encode_string(const char *str)
+{
+       SB sb;
+       sb_init(&sb);
+       
+       emit_string(&sb, str);
+       
+       return sb_finish(&sb);
+}
+
+char *json_stringify(const JsonNode *node, const char *space)
+{
+       SB sb;
+       sb_init(&sb);
+       
+       if (space != NULL)
+               emit_value_indented(&sb, node, space, 0);
+       else
+               emit_value(&sb, node);
+       
+       return sb_finish(&sb);
+}
+
+void json_delete(JsonNode *node)
+{
+       if (node != NULL) {
+               json_remove_from_parent(node);
+               
+               switch (node->tag) {
+                       case JSON_STRING:
+                               free(node->string_);
+                               break;
+                       case JSON_ARRAY:
+                       case JSON_OBJECT:
+                       {
+                               JsonNode *child, *next;
+                               for (child = node->children.head; child != NULL; child = next) {
+                                       next = child->next;
+                                       json_delete(child);
+                               }
+                               break;
+                       }
+                       default:;
+               }
+               
+               free(node);
+       }
+}
+
+bool json_validate(const char *json)
+{
+       const char *s = json;
+       
+       skip_space(&s);
+       if (!parse_value(&s, NULL))
+               return false;
+       
+       skip_space(&s);
+       if (*s != 0)
+               return false;
+       
+       return true;
+}
+
+JsonNode *json_find_element(JsonNode *array, int index)
+{
+       JsonNode *element;
+       int i = 0;
+       
+       if (array == NULL || array->tag != JSON_ARRAY)
+               return NULL;
+       
+       json_foreach(element, array) {
+               if (i == index)
+                       return element;
+               i++;
+       }
+       
+       return NULL;
+}
+
+JsonNode *json_find_member(JsonNode *object, const char *name)
+{
+       JsonNode *member;
+       
+       if (object == NULL || object->tag != JSON_OBJECT)
+               return NULL;
+       
+       json_foreach(member, object)
+               if (strcmp(member->key, name) == 0)
+                       return member;
+       
+       return NULL;
+}
+
+JsonNode *json_first_child(const JsonNode *node)
+{
+       if (node != NULL && (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT))
+               return node->children.head;
+       return NULL;
+}
+
+static JsonNode *mknode(JsonTag tag)
+{
+       JsonNode *ret = (JsonNode*) calloc(1, sizeof(JsonNode));
+       if (ret == NULL)
+               out_of_memory();
+       ret->tag = tag;
+       return ret;
+}
+
+JsonNode *json_mknull(void)
+{
+       return mknode(JSON_NULL);
+}
+
+JsonNode *json_mkbool(bool b)
+{
+       JsonNode *ret = mknode(JSON_BOOL);
+       ret->bool_ = b;
+       return ret;
+}
+
+static JsonNode *mkstring(char *s)
+{
+       JsonNode *ret = mknode(JSON_STRING);
+       ret->string_ = s;
+       return ret;
+}
+
+JsonNode *json_mkstring(const char *s)
+{
+       return mkstring(json_strdup(s));
+}
+
+JsonNode *json_mknumber(double n)
+{
+       JsonNode *node = mknode(JSON_NUMBER);
+       node->number_ = n;
+       return node;
+}
+
+JsonNode *json_mkarray(void)
+{
+       return mknode(JSON_ARRAY);
+}
+
+JsonNode *json_mkobject(void)
+{
+       return mknode(JSON_OBJECT);
+}
+
+static void append_node(JsonNode *parent, JsonNode *child)
+{
+       child->parent = parent;
+       child->prev = parent->children.tail;
+       child->next = NULL;
+       
+       if (parent->children.tail != NULL)
+               parent->children.tail->next = child;
+       else
+               parent->children.head = child;
+       parent->children.tail = child;
+}
+
+static void prepend_node(JsonNode *parent, JsonNode *child)
+{
+       child->parent = parent;
+       child->prev = NULL;
+       child->next = parent->children.head;
+       
+       if (parent->children.head != NULL)
+               parent->children.head->prev = child;
+       else
+               parent->children.tail = child;
+       parent->children.head = child;
+}
+
+static void append_member(JsonNode *object, char *key, JsonNode *value)
+{
+       value->key = key;
+       append_node(object, value);
+}
+
+void json_append_element(JsonNode *array, JsonNode *element)
+{
+       assert(array->tag == JSON_ARRAY);
+       assert(element->parent == NULL);
+       
+       append_node(array, element);
+}
+
+void json_prepend_element(JsonNode *array, JsonNode *element)
+{
+       assert(array->tag == JSON_ARRAY);
+       assert(element->parent == NULL);
+       
+       prepend_node(array, element);
+}
+
+void json_append_member(JsonNode *object, const char *key, JsonNode *value)
+{
+       assert(object->tag == JSON_OBJECT);
+       assert(value->parent == NULL);
+       
+       append_member(object, json_strdup(key), value);
+}
+
+void json_prepend_member(JsonNode *object, const char *key, JsonNode *value)
+{
+       assert(object->tag == JSON_OBJECT);
+       assert(value->parent == NULL);
+       
+       value->key = json_strdup(key);
+       prepend_node(object, value);
+}
+
+void json_remove_from_parent(JsonNode *node)
+{
+       JsonNode *parent = node->parent;
+       
+       if (parent != NULL) {
+               if (node->prev != NULL)
+                       node->prev->next = node->next;
+               else
+                       parent->children.head = node->next;
+               if (node->next != NULL)
+                       node->next->prev = node->prev;
+               else
+                       parent->children.tail = node->prev;
+               
+               free(node->key);
+               
+               node->parent = NULL;
+               node->prev = node->next = NULL;
+               node->key = NULL;
+       }
+}
+
+static bool parse_value(const char **sp, JsonNode **out)
+{
+       const char *s = *sp;
+       
+       switch (*s) {
+               case 'n':
+                       if (expect_literal(&s, "null")) {
+                               if (out)
+                                       *out = json_mknull();
+                               *sp = s;
+                               return true;
+                       }
+                       return false;
+               
+               case 'f':
+                       if (expect_literal(&s, "false")) {
+                               if (out)
+                                       *out = json_mkbool(false);
+                               *sp = s;
+                               return true;
+                       }
+                       return false;
+               
+               case 't':
+                       if (expect_literal(&s, "true")) {
+                               if (out)
+                                       *out = json_mkbool(true);
+                               *sp = s;
+                               return true;
+                       }
+                       return false;
+               
+               case '"': {
+                       char *str;
+                       if (parse_string(&s, out ? &str : NULL)) {
+                               if (out)
+                                       *out = mkstring(str);
+                               *sp = s;
+                               return true;
+                       }
+                       return false;
+               }
+               
+               case '[':
+                       if (parse_array(&s, out)) {
+                               *sp = s;
+                               return true;
+                       }
+                       return false;
+               
+               case '{':
+                       if (parse_object(&s, out)) {
+                               *sp = s;
+                               return true;
+                       }
+                       return false;
+               
+               default: {
+                       double num;
+                       if (parse_number(&s, out ? &num : NULL)) {
+                               if (out)
+                                       *out = json_mknumber(num);
+                               *sp = s;
+                               return true;
+                       }
+                       return false;
+               }
+       }
+}
+
+static bool parse_array(const char **sp, JsonNode **out)
+{
+       const char *s = *sp;
+       JsonNode *ret = out ? json_mkarray() : NULL;
+       JsonNode *element;
+       
+       if (*s++ != '[')
+               goto failure;
+       skip_space(&s);
+       
+       if (*s == ']') {
+               s++;
+               goto success;
+       }
+       
+       for (;;) {
+               if (!parse_value(&s, out ? &element : NULL))
+                       goto failure;
+               skip_space(&s);
+               
+               if (out)
+                       json_append_element(ret, element);
+               
+               if (*s == ']') {
+                       s++;
+                       goto success;
+               }
+               
+               if (*s++ != ',')
+                       goto failure;
+               skip_space(&s);
+       }
+       
+success:
+       *sp = s;
+       if (out)
+               *out = ret;
+       return true;
+
+failure:
+       json_delete(ret);
+       return false;
+}
+
+static bool parse_object(const char **sp, JsonNode **out)
+{
+       const char *s = *sp;
+       JsonNode *ret = out ? json_mkobject() : NULL;
+       char *key;
+       JsonNode *value;
+       
+       if (*s++ != '{')
+               goto failure;
+       skip_space(&s);
+       
+       if (*s == '}') {
+               s++;
+               goto success;
+       }
+       
+       for (;;) {
+               if (!parse_string(&s, out ? &key : NULL))
+                       goto failure;
+               skip_space(&s);
+               
+               if (*s++ != ':')
+                       goto failure_free_key;
+               skip_space(&s);
+               
+               if (!parse_value(&s, out ? &value : NULL))
+                       goto failure_free_key;
+               skip_space(&s);
+               
+               if (out)
+                       append_member(ret, key, value);
+               
+               if (*s == '}') {
+                       s++;
+                       goto success;
+               }
+               
+               if (*s++ != ',')
+                       goto failure;
+               skip_space(&s);
+       }
+       
+success:
+       *sp = s;
+       if (out)
+               *out = ret;
+       return true;
+
+failure_free_key:
+       if (out)
+               free(key);
+failure:
+       json_delete(ret);
+       return false;
+}
+
+bool parse_string(const char **sp, char **out)
+{
+       const char *s = *sp;
+       SB sb;
+       char throwaway_buffer[4];
+               /* enough space for a UTF-8 character */
+       char *b;
+       
+       if (*s++ != '"')
+               return false;
+       
+       if (out) {
+               sb_init(&sb);
+               sb_need(&sb, 4);
+               b = sb.cur;
+       } else {
+               b = throwaway_buffer;
+       }
+       
+       while (*s != '"') {
+               unsigned char c = *s++;
+               
+               /* Parse next character, and write it to b. */
+               if (c == '\\') {
+                       c = *s++;
+                       switch (c) {
+                               case '"':
+                               case '\\':
+                               case '/':
+                                       *b++ = c;
+                                       break;
+                               case 'b':
+                                       *b++ = '\b';
+                                       break;
+                               case 'f':
+                                       *b++ = '\f';
+                                       break;
+                               case 'n':
+                                       *b++ = '\n';
+                                       break;
+                               case 'r':
+                                       *b++ = '\r';
+                                       break;
+                               case 't':
+                                       *b++ = '\t';
+                                       break;
+                               case 'u':
+                               {
+                                       uint16_t uc, lc;
+                                       uchar_t unicode;
+                                       
+                                       if (!parse_hex16(&s, &uc))
+                                               goto failed;
+                                       
+                                       if (uc >= 0xD800 && uc <= 0xDFFF) {
+                                               /* Handle UTF-16 surrogate pair. */
+                                               if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc))
+                                                       goto failed; /* Incomplete surrogate pair. */
+                                               if (!from_surrogate_pair(uc, lc, &unicode))
+                                                       goto failed; /* Invalid surrogate pair. */
+                                       } else if (uc == 0) {
+                                               /* Disallow "\u0000". */
+                                               goto failed;
+                                       } else {
+                                               unicode = uc;
+                                       }
+                                       
+                                       b += utf8_write_char(unicode, b);
+                                       break;
+                               }
+                               default:
+                                       /* Invalid escape */
+                                       goto failed;
+                       }
+               } else if (c <= 0x1F) {
+                       /* Control characters are not allowed in string literals. */
+                       goto failed;
+               } else {
+                       /* Validate and echo a UTF-8 character. */
+                       int len;
+                       
+                       s--;
+                       len = utf8_validate_cz(s);
+                       if (len == 0)
+                               goto failed; /* Invalid UTF-8 character. */
+                       
+                       while (len--)
+                               *b++ = *s++;
+               }
+               
+               /*
+                * Update sb to know about the new bytes,
+                * and set up b to write another character.
+                */
+               if (out) {
+                       sb.cur = b;
+                       sb_need(&sb, 4);
+                       b = sb.cur;
+               } else {
+                       b = throwaway_buffer;
+               }
+       }
+       s++;
+       
+       if (out)
+               *out = sb_finish(&sb);
+       *sp = s;
+       return true;
+
+failed:
+       if (out)
+               sb_free(&sb);
+       return false;
+}
+
+/*
+ * The JSON spec says that a number shall follow this precise pattern
+ * (spaces and quotes added for readability):
+ *      '-'? (0 | [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
+ *
+ * However, some JSON parsers are more liberal.  For instance, PHP accepts
+ * '.5' and '1.'.  JSON.parse accepts '+3'.
+ *
+ * This function takes the strict approach.
+ */
+bool parse_number(const char **sp, double *out)
+{
+       const char *s = *sp;
+
+       /* '-'? */
+       if (*s == '-')
+               s++;
+
+       /* (0 | [1-9][0-9]*) */
+       if (*s == '0') {
+               s++;
+       } else {
+               if (!is_digit(*s))
+                       return false;
+               do {
+                       s++;
+               } while (is_digit(*s));
+       }
+
+       /* ('.' [0-9]+)? */
+       if (*s == '.') {
+               s++;
+               if (!is_digit(*s))
+                       return false;
+               do {
+                       s++;
+               } while (is_digit(*s));
+       }
+
+       /* ([Ee] [+-]? [0-9]+)? */
+       if (*s == 'E' || *s == 'e') {
+               s++;
+               if (*s == '+' || *s == '-')
+                       s++;
+               if (!is_digit(*s))
+                       return false;
+               do {
+                       s++;
+               } while (is_digit(*s));
+       }
+
+       if (out)
+               *out = strtod(*sp, NULL);
+
+       *sp = s;
+       return true;
+}
+
+static void skip_space(const char **sp)
+{
+       const char *s = *sp;
+       while (is_space(*s))
+               s++;
+       *sp = s;
+}
+
+static void emit_value(SB *out, const JsonNode *node)
+{
+       assert(tag_is_valid(node->tag));
+       switch (node->tag) {
+               case JSON_NULL:
+                       sb_puts(out, "null");
+                       break;
+               case JSON_BOOL:
+                       sb_puts(out, node->bool_ ? "true" : "false");
+                       break;
+               case JSON_STRING:
+                       emit_string(out, node->string_);
+                       break;
+               case JSON_NUMBER:
+                       emit_number(out, node->number_);
+                       break;
+               case JSON_ARRAY:
+                       emit_array(out, node);
+                       break;
+               case JSON_OBJECT:
+                       emit_object(out, node);
+                       break;
+               default:
+                       assert(false);
+       }
+}
+
+void emit_value_indented(SB *out, const JsonNode *node, const char *space, int indent_level)
+{
+       assert(tag_is_valid(node->tag));
+       switch (node->tag) {
+               case JSON_NULL:
+                       sb_puts(out, "null");
+                       break;
+               case JSON_BOOL:
+                       sb_puts(out, node->bool_ ? "true" : "false");
+                       break;
+               case JSON_STRING:
+                       emit_string(out, node->string_);
+                       break;
+               case JSON_NUMBER:
+                       emit_number(out, node->number_);
+                       break;
+               case JSON_ARRAY:
+                       emit_array_indented(out, node, space, indent_level);
+                       break;
+               case JSON_OBJECT:
+                       emit_object_indented(out, node, space, indent_level);
+                       break;
+               default:
+                       assert(false);
+       }
+}
+
+static void emit_array(SB *out, const JsonNode *array)
+{
+       const JsonNode *element;
+       
+       sb_putc(out, '[');
+       json_foreach(element, array) {
+               emit_value(out, element);
+               if (element->next != NULL)
+                       sb_putc(out, ',');
+       }
+       sb_putc(out, ']');
+}
+
+static void emit_array_indented(SB *out, const JsonNode *array, const char *space, int indent_level)
+{
+       const JsonNode *element = array->children.head;
+       int i;
+       
+       if (element == NULL) {
+               sb_puts(out, "[]");
+               return;
+       }
+       
+       sb_puts(out, "[\n");
+       while (element != NULL) {
+               for (i = 0; i < indent_level + 1; i++)
+                       sb_puts(out, space);
+               emit_value_indented(out, element, space, indent_level + 1);
+               
+               element = element->next;
+               sb_puts(out, element != NULL ? ",\n" : "\n");
+       }
+       for (i = 0; i < indent_level; i++)
+               sb_puts(out, space);
+       sb_putc(out, ']');
+}
+
+static void emit_object(SB *out, const JsonNode *object)
+{
+       const JsonNode *member;
+       
+       sb_putc(out, '{');
+       json_foreach(member, object) {
+               emit_string(out, member->key);
+               sb_putc(out, ':');
+               emit_value(out, member);
+               if (member->next != NULL)
+                       sb_putc(out, ',');
+       }
+       sb_putc(out, '}');
+}
+
+static void emit_object_indented(SB *out, const JsonNode *object, const char *space, int indent_level)
+{
+       const JsonNode *member = object->children.head;
+       int i;
+       
+       if (member == NULL) {
+               sb_puts(out, "{}");
+               return;
+       }
+       
+       sb_puts(out, "{\n");
+       while (member != NULL) {
+               for (i = 0; i < indent_level + 1; i++)
+                       sb_puts(out, space);
+               emit_string(out, member->key);
+               sb_puts(out, ": ");
+               emit_value_indented(out, member, space, indent_level + 1);
+               
+               member = member->next;
+               sb_puts(out, member != NULL ? ",\n" : "\n");
+       }
+       for (i = 0; i < indent_level; i++)
+               sb_puts(out, space);
+       sb_putc(out, '}');
+}
+
+void emit_string(SB *out, const char *str)
+{
+       bool escape_unicode = false;
+       const char *s = str;
+       char *b;
+       
+       assert(utf8_validate(str));
+       
+       /*
+        * 14 bytes is enough space to write up to two
+        * \uXXXX escapes and two quotation marks.
+        */
+       sb_need(out, 14);
+       b = out->cur;
+       
+       *b++ = '"';
+       while (*s != 0) {
+               unsigned char c = *s++;
+               
+               /* Encode the next character, and write it to b. */
+               switch (c) {
+                       case '"':
+                               *b++ = '\\';
+                               *b++ = '"';
+                               break;
+                       case '\\':
+                               *b++ = '\\';
+                               *b++ = '\\';
+                               break;
+                       case '\b':
+                               *b++ = '\\';
+                               *b++ = 'b';
+                               break;
+                       case '\f':
+                               *b++ = '\\';
+                               *b++ = 'f';
+                               break;
+                       case '\n':
+                               *b++ = '\\';
+                               *b++ = 'n';
+                               break;
+                       case '\r':
+                               *b++ = '\\';
+                               *b++ = 'r';
+                               break;
+                       case '\t':
+                               *b++ = '\\';
+                               *b++ = 't';
+                               break;
+                       default: {
+                               int len;
+                               
+                               s--;
+                               len = utf8_validate_cz(s);
+                               
+                               if (len == 0) {
+                                       /*
+                                        * Handle invalid UTF-8 character gracefully in production
+                                        * by writing a replacement character (U+FFFD)
+                                        * and skipping a single byte.
+                                        *
+                                        * This should never happen when assertions are enabled
+                                        * due to the assertion at the beginning of this function.
+                                        */
+                                       assert(false);
+                                       if (escape_unicode) {
+                                               strcpy(b, "\\uFFFD");
+                                               b += 6;
+                                       } else {
+                                               *b++ = 0xEF;
+                                               *b++ = 0xBF;
+                                               *b++ = 0xBD;
+                                       }
+                                       s++;
+                               } else if (c < 0x1F || (c >= 0x80 && escape_unicode)) {
+                                       /* Encode using \u.... */
+                                       uint32_t unicode;
+                                       
+                                       s += utf8_read_char(s, &unicode);
+                                       
+                                       if (unicode <= 0xFFFF) {
+                                               *b++ = '\\';
+                                               *b++ = 'u';
+                                               b += write_hex16(b, unicode);
+                                       } else {
+                                               /* Produce a surrogate pair. */
+                                               uint16_t uc, lc;
+                                               assert(unicode <= 0x10FFFF);
+                                               to_surrogate_pair(unicode, &uc, &lc);
+                                               *b++ = '\\';
+                                               *b++ = 'u';
+                                               b += write_hex16(b, uc);
+                                               *b++ = '\\';
+                                               *b++ = 'u';
+                                               b += write_hex16(b, lc);
+                                       }
+                               } else {
+                                       /* Write the character directly. */
+                                       while (len--)
+                                               *b++ = *s++;
+                               }
+                               
+                               break;
+                       }
+               }
+       
+               /*
+                * Update *out to know about the new bytes,
+                * and set up b to write another encoded character.
+                */
+               out->cur = b;
+               sb_need(out, 14);
+               b = out->cur;
+       }
+       *b++ = '"';
+       
+       out->cur = b;
+}
+
+static void emit_number(SB *out, double num)
+{
+       /*
+        * This isn't exactly how JavaScript renders numbers,
+        * but it should produce valid JSON for reasonable numbers
+        * preserve precision well enough, and avoid some oddities
+        * like 0.3 -> 0.299999999999999988898 .
+        */
+       char buf[64];
+       sprintf(buf, "%.16g", num);
+       
+       if (number_is_valid(buf))
+               sb_puts(out, buf);
+       else
+               sb_puts(out, "null");
+}
+
+static bool tag_is_valid(unsigned int tag)
+{
+       return (/* tag >= JSON_NULL && */ tag <= JSON_OBJECT);
+}
+
+static bool number_is_valid(const char *num)
+{
+       return (parse_number(&num, NULL) && *num == '\0');
+}
+
+static bool expect_literal(const char **sp, const char *str)
+{
+       const char *s = *sp;
+       
+       while (*str != '\0')
+               if (*s++ != *str++)
+                       return false;
+       
+       *sp = s;
+       return true;
+}
+
+/*
+ * Parses exactly 4 hex characters (capital or lowercase).
+ * Fails if any input chars are not [0-9A-Fa-f].
+ */
+static bool parse_hex16(const char **sp, uint16_t *out)
+{
+       const char *s = *sp;
+       uint16_t ret = 0;
+       uint16_t i;
+       uint16_t tmp;
+       char c;
+
+       for (i = 0; i < 4; i++) {
+               c = *s++;
+               if (c >= '0' && c <= '9')
+                       tmp = c - '0';
+               else if (c >= 'A' && c <= 'F')
+                       tmp = c - 'A' + 10;
+               else if (c >= 'a' && c <= 'f')
+                       tmp = c - 'a' + 10;
+               else
+                       return false;
+
+               ret <<= 4;
+               ret += tmp;
+       }
+       
+       if (out)
+               *out = ret;
+       *sp = s;
+       return true;
+}
+
+/*
+ * Encodes a 16-bit number into hexadecimal,
+ * writing exactly 4 hex chars.
+ */
+static int write_hex16(char *out, uint16_t val)
+{
+       const char *hex = "0123456789ABCDEF";
+       
+       *out++ = hex[(val >> 12) & 0xF];
+       *out++ = hex[(val >> 8)  & 0xF];
+       *out++ = hex[(val >> 4)  & 0xF];
+       *out++ = hex[ val        & 0xF];
+       
+       return 4;
+}
+
+bool json_check(const JsonNode *node, char errmsg[256])
+{
+       #define problem(...) do { \
+                       if (errmsg != NULL) \
+                               snprintf(errmsg, 256, __VA_ARGS__); \
+                       return false; \
+               } while (0)
+       
+       if (node->key != NULL && !utf8_validate(node->key))
+               problem("key contains invalid UTF-8");
+       
+       if (!tag_is_valid(node->tag))
+               problem("tag is invalid (%u)", node->tag);
+       
+       if (node->tag == JSON_BOOL) {
+               if (node->bool_ != false && node->bool_ != true)
+                       problem("bool_ is neither false (%d) nor true (%d)", (int)false, (int)true);
+       } else if (node->tag == JSON_STRING) {
+               if (node->string_ == NULL)
+                       problem("string_ is NULL");
+               if (!utf8_validate(node->string_))
+                       problem("string_ contains invalid UTF-8");
+       } else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) {
+               JsonNode *head = node->children.head;
+               JsonNode *tail = node->children.tail;
+               
+               if (head == NULL || tail == NULL) {
+                       if (head != NULL)
+                               problem("tail is NULL, but head is not");
+                       if (tail != NULL)
+                               problem("head is NULL, but tail is not");
+               } else {
+                       JsonNode *child;
+                       JsonNode *last = NULL;
+                       
+                       if (head->prev != NULL)
+                               problem("First child's prev pointer is not NULL");
+                       
+                       for (child = head; child != NULL; last = child, child = child->next) {
+                               if (child == node)
+                                       problem("node is its own child");
+                               if (child->next == child)
+                                       problem("child->next == child (cycle)");
+                               if (child->next == head)
+                                       problem("child->next == head (cycle)");
+                               
+                               if (child->parent != node)
+                                       problem("child does not point back to parent");
+                               if (child->next != NULL && child->next->prev != child)
+                                       problem("child->next does not point back to child");
+                               
+                               if (node->tag == JSON_ARRAY && child->key != NULL)
+                                       problem("Array element's key is not NULL");
+                               if (node->tag == JSON_OBJECT && child->key == NULL)
+                                       problem("Object member's key is NULL");
+                               
+                               if (!json_check(child, errmsg))
+                                       return false;
+                       }
+                       
+                       if (last != tail)
+                               problem("tail does not match pointer found by starting at head and following next links");
+               }
+       }
+       
+       return true;
+       
+       #undef problem
+}
diff --git a/ccan/json/json.h b/ccan/json/json.h
new file mode 100644 (file)
index 0000000..ed5255e
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+  Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
+  All rights reserved.
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/
+
+#ifndef CCAN_JSON_H
+#define CCAN_JSON_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+typedef enum {
+       JSON_NULL,
+       JSON_BOOL,
+       JSON_STRING,
+       JSON_NUMBER,
+       JSON_ARRAY,
+       JSON_OBJECT,
+} JsonTag;
+
+typedef struct JsonNode JsonNode;
+
+struct JsonNode
+{
+       /* only if parent is an object or array (NULL otherwise) */
+       JsonNode *parent;
+       JsonNode *prev, *next;
+       
+       /* only if parent is an object (NULL otherwise) */
+       char *key; /* Must be valid UTF-8. */
+       
+       JsonTag tag;
+       union {
+               /* JSON_BOOL */
+               bool bool_;
+               
+               /* JSON_STRING */
+               char *string_; /* Must be valid UTF-8. */
+               
+               /* JSON_NUMBER */
+               double number_;
+               
+               /* JSON_ARRAY */
+               /* JSON_OBJECT */
+               struct {
+                       JsonNode *head, *tail;
+               } children;
+       };
+};
+
+/*** Encoding, decoding, and validation ***/
+
+JsonNode   *json_decode         (const char *json);
+char       *json_encode         (const JsonNode *node);
+char       *json_encode_string  (const char *str);
+char       *json_stringify      (const JsonNode *node, const char *space);
+void        json_delete         (JsonNode *node);
+
+bool        json_validate       (const char *json);
+
+/*** Lookup and traversal ***/
+
+JsonNode   *json_find_element   (JsonNode *array, int index);
+JsonNode   *json_find_member    (JsonNode *object, const char *key);
+
+JsonNode   *json_first_child    (const JsonNode *node);
+
+#define json_foreach(i, object_or_array)            \
+       for ((i) = json_first_child(object_or_array);   \
+                (i) != NULL;                               \
+                (i) = (i)->next)
+
+/*** Construction and manipulation ***/
+
+JsonNode *json_mknull(void);
+JsonNode *json_mkbool(bool b);
+JsonNode *json_mkstring(const char *s);
+JsonNode *json_mknumber(double n);
+JsonNode *json_mkarray(void);
+JsonNode *json_mkobject(void);
+
+void json_append_element(JsonNode *array, JsonNode *element);
+void json_prepend_element(JsonNode *array, JsonNode *element);
+void json_append_member(JsonNode *object, const char *key, JsonNode *value);
+void json_prepend_member(JsonNode *object, const char *key, JsonNode *value);
+
+void json_remove_from_parent(JsonNode *node);
+
+/*** Debugging ***/
+
+/*
+ * Look for structure and encoding problems in a JsonNode or its descendents.
+ *
+ * If a problem is detected, return false, writing a description of the problem
+ * to errmsg (unless errmsg is NULL).
+ */
+bool json_check(const JsonNode *node, char errmsg[256]);
+
+#endif
diff --git a/ccan/json/notes b/ccan/json/notes
new file mode 100644 (file)
index 0000000..f808cad
--- /dev/null
@@ -0,0 +1,171 @@
+A JSON fragment (rather than a "text", which may only be an object or array) has the following properties:
+
+ * The length is >= 1 (don't forget about single-digit numbers).
+ * The first character is ASCII.
+ * The last character is ASCII.
+
+[RFC4627] assumes a "JSON text", where the first two characters will always be ASCII.  Encoding detection for a JSON fragment requires doing the proof all over again, due to the fact that the second character might not be ASCII (e.g. "Česká republika").
+
+Let's start:
+
+If the first byte is null, then it's UTF-32BE or UTF-16BE.  Otherwise, it's UTF-32LE, UTF-16LE, or UTF-8.
+
+But what if the text starts with a BOM?  Then the first byte will be non-NULL for UTF-16BE.  If a BOM is detected, we can go by that, making our lives "easier".
+
+
+int json_detect_encoding(const unsigned char *s, const unsigned char *e)
+{
+       if (s >= e)
+               return JSONENC_INVALID;
+       
+}
+
+<strike>Assumption: the first character in the Unicode string is an non-null ASCII character, and the string is at least one character long.</strike>
+
+If s[0..2] == 0, then 0 < s[3] <= 0x7F, or the string is invalid.
+
+For any valid UTF16LE without BOM:
+       s[0] is ASCII, and s[1] is 0.
+For any valid UTF16BE without BOM:
+       s[0] is 0, and s[1] is ASCII.
+For any valid UTF32LE without BOM:
+       s[0] is ASCII, and s[1..3] is 0.
+
+I think with the assumption above, there's an ambiguity between UTF16LE and UTF32LE:
+
+       7x 00 00 00
+
+This can be any of the following:
+
+ * UTF-8, with an ASCII character and 3 null characters.
+ * UTF-16LE, with an ASCII character and a null character.
+ * UTF-32LE, with an ASCII character and nothing else.
+
+Therefore, I will extend it.
+
+Assumption: The string is not empty, contains no null characters, and the first character is in the ASCII range.
+
+For any valid UTF8 without BOM:
+       s[0] is non-null ASCII, and, if e - s > 1, s[1] is not 0.
+For any valid UTF16BE without BOM:
+       s[0] is 0, s[1] is non-null ASCII, and, if e - s >= 4, one or both of s[2,3] are non-zero.
+For any valid UTF16LE without BOM:
+       s[0] is non-null ASCII, s[1] is 0, and, if e - s >= 4, one or both of s[2,3] are non-zero.
+For any valid UTF32BE without BOM:
+       s[0..2] is 0, and s[3] is ASCII.
+For any valid UTF32LE without BOM:
+       s[0] is ASCII, and s[1..3] is 0.
+
+For any valid UTF8 with BOM:
+       s[0..2] is {0xEF, 0xBB, 0xBF}.
+For any valid UTF16BE with BOM:
+       s[0] is 0xFE, and s[1] is 0xFF.
+For any valid UTF16LE with BOM:
+       s[0] is 0xFF, s[1] is 0xFE, and s[2] is non-null ASCII.
+For any valid UTF32BE with BOM:
+       s[0] is 0, s[1] is 0, s[2] is 0xFE, and s[3] is 0xFF.
+For any valid UTF32LE with BOM:
+       s[0] is 0xFF, s[1] is 0xFE, s[2] is 0, and s[3] is 0.
+
+Condensed version (for any valid string in the given encoding and with the assumption above, the beginning of the bytes will always match the pattern):
+
+Without BOM:
+       UTF8            7x (xx | $)
+       UTF16BE         00 7x
+       UTF16LE         7x 00 (00 xx | xx 00 | xx xx | $)
+       UTF32BE         00 00 00 7x
+       UTF32LE         7x 00 00 00
+
+With BOM:
+       UTF8            EF BB BF
+       UTF16BE         FE FF
+       UTF16LE         FF FE 7x
+       UTF32BE         00 00 FE FF
+       UTF32LE         FF FE 00 00
+
+Key:
+
+       00                      Null byte
+       xx                      Non-null byte
+       7x                      Non-null ASCII byte
+       $                       End of byte string
+
+As fun as this is, I've decided not to worry about encoding conversion for now, and assume input is valid UTF-8.
+
+An issue more likely to affect users is expecting to be able to load binary into a JSON string, given the current API.  Thus, I plan to introduce a restriction to JSON not present in the RFC, but present in the de-facto standard (i.e. IE): JSON strings (keys and values) may not contain null characters.  "\u0000" will be treated as invalid.
+
+Maybe I should just make a JSON serialization library.  On the other hand, the hard, JSON-specific bits (encoding/decoding strings, validating numbers, etc.) should be available, and having a "json" library that does it is helpful.
+
+My plan, then, is for the JSON module to house two things:
+ * A set of parsing and emitting primitives that take care of the hard parts of JSON.
+ * A simple parser and printer, geared toward usefulness over precision.
+
+Change of plans: my goal is to have a JSON library that's SIMPLE.  I looked at some of the implementations in C and C++, and they're obsessed with iterators, streams, hashes, manipulation, etc. (Even I'm going down that road with my iobuffer stuff).  They're often hard to integrate into projects, since they consist of several translation units and expect to be built like the huge libraries that they shouldn't be.
+
+Unicode functions are going to be incorporated directly into the module whether you like it or not.  
+"parse" and "emit" functions will be private.  Steal them from the source code if you want them.
+
+I plan to take a lenient approach to Unicode: invalid characters are converted to replacement characters rather than producing flat-out failures.  I don't want everything to grind to a halt because Jöšé Hérnàñdعz signed up, but some client (examples: Internet Explorer, and the machines) decided not to produce valid UTF-8.
+
+The purpose of JSON is to facilitate communication among programming languages.  If a programming language cannot handle part of the spec idiomatically, it shouldn't.  This justifies using C strings instead of pointer/length pairs.  It also justifies only supporting ASCII instead of Unicode (e.g. via UTF-8, surrogate pairs, etc.), but there is a clear, practical reasons to support Unicode: people speak different languages, and most people speak languages containing non-ASCII characters.
+
+It's a lot easier to just validate UTF-8 rather than tolerate it by replacement.  However, I might as well replace invalid surrogate pairs.
+
+Now it's time to come up with a description for my JSON API.  The most important thing about it is that it's SIMPLE.  I should also mention the "purpose of JSON" paragraph to introduce JSON as "a simple text-based format that facilitates transferring information between programming languages" or similar.
+
+Another thing to document is that this library supports JSON values: it does not enforce the draconian restriction that the toplevel be an object or an array.
+
+For the sake of the simplicity I love, I'll require valid UTF-8 and require valid surrogate pairs.  Also, I'll remove the dependency on charset.
+
+Favors ease of use over losslessness:
+ * C strings: Although JSON allows \u0000 escapes (if I'm not mistaken), they don't always work right in some browsers.
+ * double: This may seem clumsy, but double can store 32-bit integers losslessly, and the numbers are printed with enough decimal places that 32-bit integers won't be truncated.
+
+Does not include comprehensive facilities for manipulating JSON structures.  Instead, it tries to get out of your way so you can serialize and unserialize as you see fit.
+ * Uses a linked list instead of a mapping.
+
+The code is currently a little ugly as far as toplevel organization goes.
+
+Things to test:
+ * List link pointers are consistent.
+ * 32-bit signed and unsigned integers are preserved verbatim.
+ * Appending, prepending, and and looking up members works.
+ * Appending and prepending items works.
+ * Valid and invalid UTF-8 in JSON input is handled properly.
+ * json_decode returns NULL for invalid strings.
+ * json_encode_string works
+ * json_stringify works with a non-NULL space argument
+ * Lookup functions return NULL when given NULL or invalid inputs.
+ * Removing the first, last, or only child of a node works properly.
+ * Bogus literals starting with 'n', 'f', 't' parse as invalid (e.g. 'nil', 'fals', 'falsify', 'falsetto', and 'truism').
+ * Key without colon or value.
+ * All escapes are parsed and unparsed.
+ * \u0000 is disallowed.
+ * 0.0 / 0.0 converts to null in JSON.
+
+Ways to test these:
+ * json_decode every test string.
+ * Add test strings for:
+   - Bogus literals
+   - Keys without colon or value
+   - \u0000
+ * Manually test escape parsing/unparsing, with some salt around the edges, too.
+ * Expose escaping unicode, and test that with the test strings.
+ * Build a list of numbers with various appends and prepends, verify them by testing against their encoded value, do pointer consistency checks each time, do element lookups, and remove items as well.
+ * Write tests for stringify.
+ * Test various ranges of 32-bit signed and unsigned integers, converting them to and from JSON and ensuring that the value was preserved.
+
+Hmm, I wonder if Unicode escaping should be a separate function.
+
+I implemented some of the above.  Things still not covered by tests:
+
+ * Out-of-memory situations
+ * Invalid UTF-8
+ * Non-ASCII characters in input
+ * Unicode characters from U+0080..U+07FF
+ * Escaping Unicode characters (not even exposed by the API)
+ * json_encode_string
+ * Parsing \f
+ * Emitting string values in json_stringify with non-NULL space.
+ * Passing invalid nodes to json_check
+
diff --git a/ccan/json/test/common.h b/ccan/json/test/common.h
new file mode 100644 (file)
index 0000000..328cb73
--- /dev/null
@@ -0,0 +1,18 @@
+#include <ccan/json/json.c>
+#include <ccan/tap/tap.h>
+
+#include <errno.h>
+#include <string.h>
+
+static char *chomp(char *s)
+{
+       char *e;
+       
+       if (s == NULL || *s == 0)
+               return s;
+       
+       e = strchr(s, 0);
+       if (e[-1] == '\n')
+               *--e = 0;
+       return s;
+}
diff --git a/ccan/json/test/run-construction.c b/ccan/json/test/run-construction.c
new file mode 100644 (file)
index 0000000..cc9a395
--- /dev/null
@@ -0,0 +1,191 @@
+/* Build a list of numbers with various appends and prepends, verify them by testing against their encoded value, do pointer consistency checks each time, do element lookups, and remove items as well. */
+
+#include "common.h"
+
+#define should_be(var, expected) should_be_(var, #var, expected)
+
+static void should_be_(const JsonNode *node, const char *name, const char *expected)
+{
+       char errmsg[256];
+       char *encoded;
+       
+       if (!json_check(node, errmsg)) {
+               fail("Invariants check failed: %s", errmsg);
+               return;
+       }
+       
+       encoded = json_encode(node);
+       
+       if (strcmp(encoded, expected) == 0)
+               pass("%s is %s", name, expected);
+       else
+               fail("%s should be %s, but is actually %s", name, expected, encoded);
+       
+       free(encoded);
+}
+
+static void test_string(void)
+{
+       JsonNode *str;
+       
+       str = json_mkstring("Hello\tworld!\n\001");
+       should_be(str, "\"Hello\\tworld!\\n\\u0001\"");
+       json_delete(str);
+       
+       str = json_mkstring("\"\\\b\f\n\r\t");
+       should_be(str, "\"\\\"\\\\\\b\\f\\n\\r\\t\"");
+       json_delete(str);
+}
+
+static void test_number(void)
+{
+       JsonNode *num;
+       
+       num = json_mknumber(5678901234.0);
+       should_be(num, "5678901234");
+       json_delete(num);
+       
+       num = json_mknumber(-5678901234.0);
+       should_be(num, "-5678901234");
+       json_delete(num);
+       
+       num = json_mknumber(0.0 / 0.0);
+       should_be(num, "null");
+       json_delete(num);
+}
+
+static void test_array(void)
+{
+       JsonNode *array;
+       JsonNode *children[5 + 1];
+       
+       array = json_mkarray();
+       should_be(array, "[]");
+       
+       children[1] = json_mknumber(1);
+       children[2] = json_mknumber(2);
+       children[3] = json_mknumber(3);
+       children[4] = json_mknumber(4);
+       children[5] = json_mknumber(5);
+       
+       json_append_element(array, children[3]);
+       should_be(array, "[3]");
+       
+       json_remove_from_parent(children[3]);
+       should_be(array, "[]");
+       
+       json_prepend_element(array, children[3]);
+       should_be(array, "[3]");
+       
+       json_prepend_element(array, children[2]);
+       should_be(array, "[2,3]");
+       
+       json_append_element(array, children[4]);
+       should_be(array, "[2,3,4]");
+       
+       json_delete(children[3]);
+       should_be(array, "[2,4]");
+       
+       json_prepend_element(array, children[1]);
+       should_be(array, "[1,2,4]");
+       
+       json_delete(children[1]);
+       should_be(array, "[2,4]");
+       
+       json_delete(children[4]);
+       should_be(array, "[2]");
+       
+       ok1(json_find_element(array, 0) == children[2]);
+       ok1(json_find_element(array, -1) == NULL);
+       ok1(json_find_element(array, 1) == NULL);
+       
+       json_append_element(array, children[5]);
+       should_be(array, "[2,5]");
+       
+       ok1(json_find_element(array, 0) == children[2]);
+       ok1(json_find_element(array, 1) == children[5]);
+       ok1(json_find_element(array, -1) == NULL);
+       ok1(json_find_element(array, 2) == NULL);
+       
+       json_delete(children[2]);
+       json_delete(children[5]);
+       should_be(array, "[]");
+       
+       ok1(json_find_element(array, -1) == NULL);
+       ok1(json_find_element(array, 0) == NULL);
+       ok1(json_find_element(array, 1) == NULL);
+       
+       json_delete(array);
+}
+
+static void test_object(void)
+{
+       JsonNode *object;
+       JsonNode *children[5 + 1];
+       
+       object = json_mkobject();
+       should_be(object, "{}");
+       
+       children[1] = json_mknumber(1);
+       children[2] = json_mknumber(2);
+       children[3] = json_mknumber(3);
+       
+       ok1(json_find_member(object, "one") == NULL);
+       ok1(json_find_member(object, "two") == NULL);
+       ok1(json_find_member(object, "three") == NULL);
+       
+       json_append_member(object, "one", children[1]);
+       should_be(object, "{\"one\":1}");
+       
+       ok1(json_find_member(object, "one") == children[1]);
+       ok1(json_find_member(object, "two") == NULL);
+       ok1(json_find_member(object, "three") == NULL);
+       
+       json_prepend_member(object, "two", children[2]);
+       should_be(object, "{\"two\":2,\"one\":1}");
+       
+       ok1(json_find_member(object, "one") == children[1]);
+       ok1(json_find_member(object, "two") == children[2]);
+       ok1(json_find_member(object, "three") == NULL);
+       
+       json_append_member(object, "three", children[3]);
+       should_be(object, "{\"two\":2,\"one\":1,\"three\":3}");
+       
+       ok1(json_find_member(object, "one") == children[1]);
+       ok1(json_find_member(object, "two") == children[2]);
+       ok1(json_find_member(object, "three") == children[3]);
+       
+       json_delete(object);
+}
+
+int main(void)
+{
+       JsonNode *node;
+       
+       (void) chomp;
+       
+       plan_tests(49);
+       
+       ok1(json_find_element(NULL, 0) == NULL);
+       ok1(json_find_member(NULL, "") == NULL);
+       ok1(json_first_child(NULL) == NULL);
+       
+       node = json_mknull();
+       should_be(node, "null");
+       json_delete(node);
+       
+       node = json_mkbool(false);
+       should_be(node, "false");
+       json_delete(node);
+       
+       node = json_mkbool(true);
+       should_be(node, "true");
+       json_delete(node);
+       
+       test_string();
+       test_number();
+       test_array();
+       test_object();
+       
+       return exit_status();
+}
diff --git a/ccan/json/test/run-decode-encode.c b/ccan/json/test/run-decode-encode.c
new file mode 100644 (file)
index 0000000..6bdf7c3
--- /dev/null
@@ -0,0 +1,77 @@
+#include "common.h"
+
+int main(void)
+{
+       const char *strings_file = "test/test-strings";
+       const char *strings_reencoded_file = "test/test-strings-reencoded";
+       FILE *f, *f2;
+       char buffer[1024], buffer2[1024];
+       
+       plan_tests(90);
+       
+       f = fopen(strings_file, "rb");
+       if (f == NULL) {
+               diag("Could not open %s: %s", strings_file, strerror(errno));
+               return 1;
+       }
+       f2 = fopen(strings_reencoded_file, "rb");
+       if (f2 == NULL) {
+               diag("Could not open %s: %s", strings_reencoded_file, strerror(errno));
+               return 1;
+       }
+       
+       while (fgets(buffer, sizeof(buffer), f)) {
+               const char *s = chomp(buffer);
+               bool valid;
+               JsonNode *node;
+               
+               if (expect_literal(&s, "valid ")) {
+                       valid = true;
+               } else if (expect_literal(&s, "invalid ")) {
+                       valid = false;
+               } else {
+                       fail("Invalid line in test-strings: %s", buffer);
+                       continue;
+               }
+               
+               node = json_decode(s);
+               
+               if (valid) {
+                       char *reencoded;
+                       char errmsg[256];
+                       
+                       if (node == NULL) {
+                               fail("%s is valid, but json_decode returned NULL", s);
+                               continue;
+                       }
+                       
+                       if (!json_check(node, errmsg)) {
+                               fail("Corrupt tree produced by json_decode: %s", errmsg);
+                               continue;
+                       }
+                       
+                       reencoded = json_encode(node);
+                       
+                       if (!fgets(buffer2, sizeof(buffer2), f2)) {
+                               fail("test-strings-reencoded is missing this line: %s", reencoded);
+                               continue;
+                       }
+                       chomp(buffer2);
+                       
+                       ok(strcmp(reencoded, buffer2) == 0, "re-encode %s -> %s", s, reencoded);
+                       
+                       free(reencoded);
+                       json_delete(node);
+               } else if (node != NULL) {
+                       fail("%s is invalid, but json_decode returned non-NULL", s);
+                       continue;
+               }
+       }
+       
+       if (ferror(f) || fclose(f) != 0 || ferror(f2) || fclose(f2) != 0) {
+               diag("I/O error reading test data.");
+               return 1;
+       }
+       
+       return exit_status();
+}
diff --git a/ccan/json/test/run-stringify.c b/ccan/json/test/run-stringify.c
new file mode 100644 (file)
index 0000000..3a4cb73
--- /dev/null
@@ -0,0 +1,108 @@
+#include "common.h"
+
+static char buf1[256], buf2[256];
+
+/* Used for pass and fail messages */
+static char *quote_string(const char *str, char buf[256])
+{
+       char *out = buf;
+       
+       *out++ = '"';
+       for (; *str != 0; str++) {
+               if (out - buf > 256 - 5) {
+                       /* String is too long.  End it with `...' */
+                       out = buf + 256 - 5;
+                       *out++ = '.';
+                       *out++ = '.';
+                       *out++ = '.';
+                       break;
+               }
+               switch (*str) {
+                       case '\t':
+                               *out++ = '\\';
+                               *out++ = 't';
+                               break;
+                       case '\n':
+                               *out++ = '\\';
+                               *out++ = 'n';
+                               break;
+                       case '"':
+                               *out++ = '\\';
+                               *out++ = '"';
+                               break;
+                       case '\\':
+                               *out++ = '\\';
+                               *out++ = '\\';
+                               break;
+                       default:
+                               *out++ = *str;
+                               break;
+               }
+       }
+       *out++ = '"';
+       
+       *out = 0;
+       return buf;
+}
+
+static void test_stringify(const char *input, const char *expected)
+{
+       JsonNode *node = NULL;
+       char *enc = NULL;
+       char *strn = NULL;
+       char *str = NULL;
+       
+       node = json_decode(input);
+       if (node == NULL) {
+               fail("Failed to decode %s", input);
+               goto end;
+       }
+       
+       enc = json_encode(node);
+       if (strcmp(enc, input) != 0) {
+               fail("%s re-encodes to %s.  Either encode/decode is broken, or the input string needs to be normalized", input, enc);
+               goto end;
+       }
+       
+       strn = json_stringify(node, NULL);
+       if (strcmp(strn, enc) != 0) {
+               fail("json_stringify with NULL space produced a different string than json_encode");
+               goto end;
+       }
+       
+       str = json_stringify(node, "\t");
+       if (strcmp(str, expected) != 0) {
+               fail("Expected %s, but json_stringify produced %s",
+                        quote_string(expected, buf1), quote_string(str, buf2));
+               goto end;
+       }
+       
+       pass("stringify %s", input);
+       
+end:
+       json_delete(node);
+       free(enc);
+       free(strn);
+       free(str);
+}
+
+int main(void)
+{
+       (void) chomp;
+       
+       plan_tests(9);
+       
+       test_stringify("[]", "[]");
+       test_stringify("[1]", "[\n\t1\n]");
+       test_stringify("[1,2,3]", "[\n\t1,\n\t2,\n\t3\n]");
+       test_stringify("[[]]", "[\n\t[]\n]");
+       test_stringify("[[1,2],[3,4]]", "[\n\t[\n\t\t1,\n\t\t2\n\t],\n\t[\n\t\t3,\n\t\t4\n\t]\n]");
+       
+       test_stringify("{}", "{}");
+       test_stringify("{\"one\":1}", "{\n\t\"one\": 1\n}");
+       test_stringify("{\"one\":1,\"t*\":[2,3,10]}", "{\n\t\"one\": 1,\n\t\"t*\": [\n\t\t2,\n\t\t3,\n\t\t10\n\t]\n}");
+       test_stringify("{\"a\":{\"1\":1,\"2\":2},\"b\":{\"3\":[null,false,true,\"\\f\"]}}",
+                                  "{\n\t\"a\": {\n\t\t\"1\": 1,\n\t\t\"2\": 2\n\t},\n\t\"b\": {\n\t\t\"3\": [\n\t\t\tnull,\n\t\t\tfalse,\n\t\t\ttrue,\n\t\t\t\"\\f\"\n\t\t]\n\t}\n}");
+       
+       return exit_status();
+}
diff --git a/ccan/json/test/run-validate.c b/ccan/json/test/run-validate.c
new file mode 100644 (file)
index 0000000..f7bb3b0
--- /dev/null
@@ -0,0 +1,49 @@
+#include "common.h"
+
+int main(void)
+{
+       const char *strings_file = "test/test-strings";
+       FILE *f;
+       char buffer[1024];
+       
+       plan_tests(224);
+       
+       f = fopen(strings_file, "rb");
+       if (f == NULL) {
+               diag("Could not open %s: %s", strings_file, strerror(errno));
+               return 1;
+       }
+       
+       while (fgets(buffer, sizeof(buffer), f)) {
+               const char *s = chomp(buffer);
+               bool valid;
+               
+               if (expect_literal(&s, "valid ")) {
+                       valid = true;
+               } else if (expect_literal(&s, "invalid ")) {
+                       valid = false;
+               } else {
+                       fail("Invalid line in test-strings: %s", buffer);
+                       continue;
+               }
+               
+               if (strcmp(s, "\"1\\u2\"") == 0)
+                       puts("here");
+               
+               if (json_validate(s) == valid) {
+                       pass("%s %s", valid ? "valid" : "invalid", s);
+               } else {
+                       fail("%s is %s, but json_validate returned %s",
+                                s,
+                                valid ? "valid" : "invalid",
+                                valid ? "false" : "true");
+               }
+       }
+       
+       if (ferror(f) || fclose(f) != 0) {
+               diag("I/O error reading test strings.");
+               return 1;
+       }
+       
+       return exit_status();
+}
diff --git a/ccan/json/test/test-strings b/ccan/json/test/test-strings
new file mode 100644 (file)
index 0000000..439be7d
--- /dev/null
@@ -0,0 +1,224 @@
+invalid 
+invalid   
+invalid "
+invalid [,]
+invalid [)
+invalid []]
+invalid [}
+invalid {,}
+invalid {]
+invalid ["1":2]
+invalid [1,2,]
+invalid [1:2}
+invalid {"1":2,}
+invalid {1:2}
+invalid {"1":2, "2.5" : [3, 4, {}, {"5": ["6"], [7 ]}]}
+invalid {"1":2, "2.5" : [3, 4, {}, {"5": ["6"], [7]}]}
+invalid {"1":2, "2.5" : [3, 4, {}, {"5": ["6"], "7" :[8 ]}]
+invalid {"1":2, "2.5" : [3, 4, {}, {"5": ["6"], "7" :[8 ]}]]
+invalid {"1":2, "3":4
+invalid "1\u2"
+invalid [,2]
+invalid "3
+invalid "3" "4"
+invalid [3[4]
+invalid [3[4]]
+invalid [3, [4, [5], 6] 7, 8 9]
+invalid [3, [4, [5], 6] 7, 8, 9]
+invalid [3, [4, [5], 6], 7, 8 9]
+invalid {"hello":true, "bye":false, null}
+invalid {"hello":true, "bye":false, null:null}
+invalid "hi
+invalid "hi"""
+invalid {"hi": "bye"]
+invalid "\uD800\uD800"
+invalid "\uD800\uDBFF"
+invalid "\UD834\UDD1E"
+invalid "\uDB00"
+invalid "\uDB00\uDBFF"
+valid "\uFFFE"
+valid "\uFFFF"
+invalid .
+valid ""
+valid []
+valid {}
+invalid +.
+valid 0.5
+invalid 0.e1
+valid {"1":{}}
+valid {"1":2}
+valid {"1":2, "2.5" : [3, 4, {}, {"5": ["6"]}]}
+valid {"1":2, "2.5" : [3, 4, {}, {"5": ["6"], "7" :[8 ]}]}
+valid 1234
+valid -1234
+valid {"1":2, "3":4}
+invalid +1234
+invalid ++1234
+valid 123.456e142
+valid 123.456e-142
+valid 123.456e+142
+invalid 123.e-142
+valid "1\u2000"
+valid "1\u20001"
+valid 2
+invalid .246e-142
+invalid .2e-142
+valid 3
+invalid .3
+valid "3"
+valid [3]
+invalid +3.
+valid 3.2e+1
+valid [3, [4]]
+valid [3, [4, [5]]]
+valid [3, [4, [5], 6]]
+valid [3, [4, [5], 6], 7]
+valid [3, [4, [5], 6], 7, 8]
+valid [3, [4, [5], 6], 7, 8, 9]
+invalid +3.5
+invalid .3e
+invalid .3e1
+invalid .3e-1
+invalid .3e+1
+invalid 3.e1
+invalid 3.e+1
+valid 3e+1
+invalid .5
+invalid +.5
+invalid .5e+1
+valid [ 7]
+valid [7 ]
+valid [7]
+invalid .e-14234
+valid "hello"
+valid ["hello"]
+valid ["hello", "bye"]
+valid ["hello", "bye\n"]
+valid ["hello", "bye\n\r\t"]
+valid ["hello", "bye\n\r\t\b"]
+valid ["hello", "bye\n\r\t\b",true]
+valid ["hello", "bye\n\r\t\b",true , false]
+valid ["hello", "bye\n\r\t\b",true , false,    null]
+invalid ["hello", "bye\n\r\t\v"]
+valid {"hello":true}
+valid {"hello":true, "bye":false}
+valid {"hello":true, "bye":false, "foo":["one","two","three"]}
+valid "hi"
+valid ["hi"]
+valid ["hi", "bye"]
+valid {"hi": "bye"}
+valid ["hi", "bye", 3]
+valid ["hi", "bye[", 3]
+valid "\u0007"
+valid "\u0008"
+valid "\u0009"
+valid "\u0010"
+valid "\u0020"
+valid "\u10000"
+valid "\u1234"
+valid "\u99999"
+valid "\ud800\udc00"
+valid "\uD800\uDC00"
+valid "\uD834\uDD1E"
+valid "\uDBFF\uDFFF"
+valid "\uFFFD"
+valid "\uFFFF"
+invalid hello
+valid [32, 1]
+invalid [32, 
+valid "\uD800\uDC00"
+valid "\n"
+valid "hello"
+valid "hello\u0009world"
+valid "hello"
+valid "hello\n"
+valid "hello"
+valid 3
+invalid 3.
+invalid .3
+valid 0.3
+invalid 0.3e
+invalid 0.3e+
+valid 0.3e+5
+valid 0.3e-5
+valid 0.3e5
+valid "hello"
+invalid +3
+valid -3
+invalid -3.
+valid -3.1
+invalid .5
+invalid 5.
+invalid 5.e1
+valid 0.5
+invalid .3e1
+invalid .3e+1
+invalid .3e-1
+invalid .3e-1 .5
+invalid .3e-1.5
+invalid .3e+1.5
+invalid .3e+.
+invalid .3e+.5
+invalid .3e+1.5
+invalid 9.3e+1.5
+invalid 9.e+1.5
+invalid 9.e+
+invalid 9.e+1
+valid "\""
+valid "\"3.5"
+valid "\"."
+invalid "\".".
+valid "\"....."
+invalid "\"\"\"\"""
+invalid ["\"\"\"\"", .5]
+invalid [.5]
+valid ["\"\"\"\"", 0.5]
+invalid ["\"\"\"\"", .5]
+invalid ["\"\"\"\"",.5]
+invalid ["\"",.5]
+invalid ["\".5",.5]
+invalid ["\".5",".5\"".5]
+invalid ["\".5",".5\"", .5]
+invalid ["\".5",".5\"",.5]
+valid ["\".5",".5\"",0.5]
+invalid {"key":/*comment*/"value"}
+invalid {"key":/*comment"value"}
+invalid {"key":"value"}/*
+invalid {"key":"value"}/**/
+invalid {"key":"value"}/***/
+invalid {"key":"value"}/**//
+invalid {"key":"value"}/**///
+invalid {"key":"value"}/**///----
+invalid {"key":"value"}#
+invalid {"key":"value"}#{
+invalid {"key":"value"}#{}
+invalid {"key":"value"}#,
+invalid {"key":"value"/**/, "k2":"v2"}
+valid "\u0027"
+invalid "hello\'"
+invalid 'hello\''
+invalid 'hello'
+invalid 'hell\'o'
+invalid '\'hello'
+invalid '\'hello\''
+invalid \'hello\'
+invalid 'hello\'
+invalid ['hello\']
+invalid ['hello\'']
+invalid ['hello"']
+invalid ['hello\"']
+invalid ['hello"o']
+invalid ['"']
+invalid '"'
+invalid '"hello"'
+invalid '"hello'
+invalid '"hi"'
+valid   [ 1 , 2 , 3 ]    
+invalid nil
+invalid fals
+invalid falsify
+invalid falsetto
+invalid truism
+invalid {"key"
+invalid {"key","key2":value}
+invalid "\u0000"
diff --git a/ccan/json/test/test-strings-reencoded b/ccan/json/test/test-strings-reencoded
new file mode 100644 (file)
index 0000000..97890c1
--- /dev/null
@@ -0,0 +1,90 @@
+"￾"
+"￿"
+""
+[]
+{}
+0.5
+{"1":{}}
+{"1":2}
+{"1":2,"2.5":[3,4,{},{"5":["6"]}]}
+{"1":2,"2.5":[3,4,{},{"5":["6"],"7":[8]}]}
+1234
+-1234
+{"1":2,"3":4}
+1.23456e+144
+1.23456e-140
+1.23456e+144
+"1 "
+"1 1"
+2
+3
+"3"
+[3]
+32
+[3,[4]]
+[3,[4,[5]]]
+[3,[4,[5],6]]
+[3,[4,[5],6],7]
+[3,[4,[5],6],7,8]
+[3,[4,[5],6],7,8,9]
+30
+[7]
+[7]
+[7]
+"hello"
+["hello"]
+["hello","bye"]
+["hello","bye\n"]
+["hello","bye\n\r\t"]
+["hello","bye\n\r\t\b"]
+["hello","bye\n\r\t\b",true]
+["hello","bye\n\r\t\b",true,false]
+["hello","bye\n\r\t\b",true,false,null]
+{"hello":true}
+{"hello":true,"bye":false}
+{"hello":true,"bye":false,"foo":["one","two","three"]}
+"hi"
+["hi"]
+["hi","bye"]
+{"hi":"bye"}
+["hi","bye",3]
+["hi","bye[",3]
+"\u0007"
+"\b"
+"\t"
+"\u0010"
+" "
+"က0"
+"ሴ"
+"香9"
+"𐀀"
+"𐀀"
+"𝄞"
+"􏿿"
+"�"
+"￿"
+[32,1]
+"𐀀"
+"\n"
+"hello"
+"hello\tworld"
+"hello"
+"hello\n"
+"hello"
+3
+0.3
+30000
+3e-06
+30000
+"hello"
+-3
+-3.1
+0.5
+"\""
+"\"3.5"
+"\"."
+"\"....."
+["\"\"\"\"",0.5]
+["\".5",".5\"",0.5]
+"'"
+[1,2,3]