X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fcharset%2F_info;h=246ca0738ad5c851554d743e5773a5da8446bc59;hp=4319ecb33022181109d5b7e613eccb4d24e801d9;hb=06c4af3163e2bd99999a93a478d1308ea39c5a79;hpb=c8c69dc68792e85b14646e8a8219dae923b34feb diff --git a/ccan/charset/_info b/ccan/charset/_info index 4319ecb3..246ca073 100644 --- a/ccan/charset/_info +++ b/ccan/charset/_info @@ -5,43 +5,154 @@ /** * charset - character set conversion and validation routines * - * This module provides a collection (well, only one, at the moment) of - * well-tested routines for dealing with character set nonsense. - * - * Validation functions: - * - bool utf8_validate(const char *str, size_t length); + * This module provides a collection of well-tested routines + * for dealing with character set nonsense. * * Example: * #include * #include + * #include * #include * #include * #include - * #include // For talloc_free() - * - * int main(int argc, char *argv[]) + * #include + * + * static void print_json_string(const char *s); + * static bool parse_hex16(const char **sp, unsigned int *out); + * + * // Take a JSON-encoded string on input and print its literal value. + * int main(void) * { - * size_t len; - * char *file; - * bool valid; - * - * if (argc != 2) - * err(1, "Expected exactly one argument"); - * - * file = grab_file(NULL, argv[1], &len); - * if (!file) - * err(1, "Could not read file %s", argv[1]); - * - * valid = utf8_validate(file, len)); - * printf("File contents are %s UTF-8\n", valid ? "valid" : "invalid"); - * - * talloc_free(file); - * + * char *input; + * size_t length; + * + * input = grab_file(NULL, NULL, &length); + * if (!input) + * err(1, "Error reading input"); + * if (!utf8_validate(input, length)) { + * fprintf(stderr, "Input contains invalid UTF-8\n"); + * return 1; + * } + * if (strlen(input) != length) { + * fprintf(stderr, "Input contains null characters\n"); + * return 1; + * } + * + * print_json_string(input); + * + * talloc_free(input); * return 0; * } + * + * static void print_json_string(const char *s) + * { + * char output_buffer[4]; + * + * // Skip leading whitespace + * while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') + * s++; + * + * if (*s++ != '"') { + * fprintf(stderr, "Expected JSON string literal surrounded by double quotes.\n"); + * exit(EXIT_FAILURE); + * } + * + * while (*s != '"') { + * unsigned char c = *s++; + * char *b = output_buffer; + * + * if (c == '\\') { + * c = *s++; + * switch (c) { + * case '"': + * case '\\': + * case '/': + * *b++ = c; + * break; + * case 'b': *b++ = '\b'; break; + * case 'f': *b++ = '\f'; break; + * case 'n': *b++ = '\n'; break; + * case 'r': *b++ = '\r'; break; + * case 't': *b++ = '\t'; break; + * case 'u': { + * unsigned int uc, lc; + * + * if (!parse_hex16(&s, &uc)) + * goto syntax_error; + * + * if (uc >= 0xD800 && uc <= 0xDFFF) { + * // Handle UTF-16 surrogate pair (e.g. "\uD834\uDD1E"). + * uchar_t unicode; + * + * if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc)) + * goto syntax_error; + * + * unicode = from_surrogate_pair(uc, lc); + * if (unicode == REPLACEMENT_CHARACTER) { + * fprintf(stderr, "Invalid surrogate pair.\n"); + * exit(EXIT_FAILURE); + * } + * + * b += utf8_write_char(unicode, b); + * } else { + * // Handle ordinary Unicode escape (e.g. "\u266B"). + * b += utf8_write_char(uc, b); + * } + * + * break; + * } + * default: + * goto syntax_error; + * } + * } else if (c <= 0x1F) { + * // Control characters are not allowed in string literals. + * goto syntax_error; + * } else { + * *b++ = c; + * } + * + * fwrite(output_buffer, 1, b - output_buffer, stdout); + * } + * + * putchar('\n'); + * return; + * + * syntax_error: + * fprintf(stderr, "Syntax error in JSON string literal.\n"); + * exit(EXIT_FAILURE); + * } + * + * static bool parse_hex16(const char **sp, unsigned int *out) + * { + * const char *s = *sp; + * unsigned int ret = 0; + * unsigned int i; + * unsigned int tmp; + * char c; + * + * for (i = 0; i < 4; i++) + * { + * c = *s++; + * if (c >= '0' && c <= '9') + * tmp = c - '0'; + * else if (c >= 'A' && c <= 'F') + * tmp = c - 'A' + 10; + * else if (c >= 'a' && c <= 'f') + * tmp = c - 'a' + 10; + * else + * return false; + * + * ret <<= 4; + * ret += tmp; + * } + * + * *out = ret; + * *sp = s; + * return true; + * } * * Author: Joey Adams - * Licence: MIT + * License: MIT */ int main(int argc, char *argv[]) {