X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fcharset%2F_info;h=e07d7040488cccfaf7e41a345ecc019843c0525e;hp=a7086ba12e99cd36e2c35cb9e06e43ebdb209b16;hb=578da7e7b6265153fa0519035fc52a086a711ac0;hpb=18636637ee013ef828cb04b2b7bb4a4922324475 diff --git a/ccan/charset/_info b/ccan/charset/_info index a7086ba1..e07d7040 100644 --- a/ccan/charset/_info +++ b/ccan/charset/_info @@ -5,43 +5,155 @@ /** * charset - character set conversion and validation routines * - * This module provides a collection (well, only one, at the moment) of - * well-tested routines for dealing with character set nonsense. - * - * Validation functions: - * - bool utf8_validate(const char *str, size_t length); + * This module provides a collection of well-tested routines + * for dealing with character set nonsense. * * Example: * #include * #include + * #include * #include * #include * #include - * #include // For talloc_free() - * - * int main(int argc, char *argv[]) + * #include + * + * static void print_json_string(const char *s); + * static bool parse_hex16(const char **sp, unsigned int *out); + * + * // Take a JSON-encoded string on input and print its literal value. + * int main(void) * { - * size_t len; - * char *file; - * bool valid; - * - * if (argc != 2) - * err(1, "Expected exactly one argument"); - * - * file = grab_file(NULL, argv[1], &len); - * if (!file) - * err(1, "Could not read file %s", argv[1]); - * - * valid = utf8_validate(file, len); - * printf("File contents are %s UTF-8\n", valid ? "valid" : "invalid"); - * - * talloc_free(file); - * + * char *input; + * size_t length; + * + * input = grab_file(NULL, NULL, &length); + * if (!input) + * err(1, "Error reading input"); + * if (!utf8_validate(input, length)) { + * fprintf(stderr, "Input contains invalid UTF-8\n"); + * return 1; + * } + * if (strlen(input) != length) { + * fprintf(stderr, "Input contains null characters\n"); + * return 1; + * } + * + * print_json_string(input); + * + * talloc_free(input); * return 0; * } + * + * static void print_json_string(const char *s) + * { + * char output_buffer[4]; + * + * // Skip leading whitespace + * while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') + * s++; + * + * if (*s++ != '"') { + * fprintf(stderr, "Expected JSON string literal surrounded by double quotes.\n"); + * exit(EXIT_FAILURE); + * } + * + * while (*s != '"') { + * unsigned char c = *s++; + * char *b = output_buffer; + * + * if (c == '\\') { + * c = *s++; + * switch (c) { + * case '"': + * case '\\': + * case '/': + * *b++ = c; + * break; + * case 'b': *b++ = '\b'; break; + * case 'f': *b++ = '\f'; break; + * case 'n': *b++ = '\n'; break; + * case 'r': *b++ = '\r'; break; + * case 't': *b++ = '\t'; break; + * case 'u': { + * unsigned int uc, lc; + * + * if (!parse_hex16(&s, &uc)) + * goto syntax_error; + * + * if (uc >= 0xD800 && uc <= 0xDFFF) { + * // Handle UTF-16 surrogate pair (e.g. "\uD834\uDD1E"). + * uchar_t unicode; + * + * if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc)) + * goto syntax_error; + * + * unicode = from_surrogate_pair(uc, lc); + * if (unicode == REPLACEMENT_CHARACTER) { + * fprintf(stderr, "Invalid surrogate pair.\n"); + * exit(EXIT_FAILURE); + * } + * + * b += utf8_write_char(unicode, b); + * } else { + * // Handle ordinary Unicode escape (e.g. "\u266B"). + * b += utf8_write_char(uc, b); + * } + * + * break; + * } + * default: + * goto syntax_error; + * } + * } else if (c <= 0x1F) { + * // Control characters are not allowed in string literals. + * goto syntax_error; + * } else { + * *b++ = c; + * } + * + * fwrite(output_buffer, 1, b - output_buffer, stdout); + * } + * + * putchar('\n'); + * return; + * + * syntax_error: + * fprintf(stderr, "Syntax error in JSON string literal.\n"); + * exit(EXIT_FAILURE); + * } + * + * static bool parse_hex16(const char **sp, unsigned int *out) + * { + * const char *s = *sp; + * unsigned int ret = 0; + * unsigned int i; + * unsigned int tmp; + * char c; + * + * for (i = 0; i < 4; i++) + * { + * c = *s++; + * if (c >= '0' && c <= '9') + * tmp = c - '0'; + * else if (c >= 'A' && c <= 'F') + * tmp = c - 'A' + 10; + * else if (c >= 'a' && c <= 'f') + * tmp = c - 'a' + 10; + * else + * return false; + * + * ret <<= 4; + * ret += tmp; + * } + * + * *out = ret; + * *sp = s; + * return true; + * } * - * Author: Joey Adams - * Licence: MIT + * Author: Joey Adams + * License: MIT + * Version: 0.3 */ int main(int argc, char *argv[]) {