X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Futf8%2Futf8.h;fp=ccan%2Futf8%2Futf8.h;h=a095f02e870baf53d0020fd60ffecabc8a0b0cba;hp=0000000000000000000000000000000000000000;hb=ac8694de3ef34483ce02811c1ba45096ee547a5f;hpb=aa0c1ec1f5919e46a5afb0033f0282625d8bb803 diff --git a/ccan/utf8/utf8.h b/ccan/utf8/utf8.h new file mode 100644 index 00000000..a095f02e --- /dev/null +++ b/ccan/utf8/utf8.h @@ -0,0 +1,54 @@ +/* MIT (BSD) license - see LICENSE file for details */ +#ifndef CCAN_UTF8_H +#define CCAN_UTF8_H +#include +#include +#include + +/* Unicode is limited to 21 bits. */ +#define UTF8_MAX_LEN 4 + +struct utf8_state { + /* How many characters we are expecting as part of this Unicode point */ + uint16_t total_len; + /* How many characters we've already seen. */ + uint16_t used_len; + /* Compound character, aka Unicode point. */ + uint32_t c; +}; + +#define UTF8_STATE_INIT { 0, 0, 0 } + +static inline void utf8_state_init(struct utf8_state *utf8_state) +{ + memset(utf8_state, 0, sizeof(*utf8_state)); +} + +/** + * utf8_decode - continue UTF8 decoding with this character. + * @utf8_state - initialized UTF8 state. + * @c - the character. + * + * Returns false if it needs another character to give results. + * Otherwise returns true, @utf8_state can be reused without initializeation, + * and sets errno: + * 0: success + * EINVAL: bad encoding. + * EFBIG: not a minimal encoding. + * ERANGE: encoding of invalid character. + * + * You can extract the character from @utf8_state->c; @utf8_state->used_len + * indicates how many characters have been consumed. + */ +bool utf8_decode(struct utf8_state *utf8_state, char c); + +/** + * utf8_encode - encode a point into UTF8. + * @point - Unicode point to include. + * @dest - buffer to fill. + * + * Returns 0 if point was invalid, otherwise bytes of dest used. + * Sets errno to ERANGE if point was invalid. + */ +size_t utf8_encode(uint32_t point, char dest[UTF8_MAX_LEN]); +#endif /* CCAN_UTF8_H */