1 /* MIT (BSD) license - see LICENSE file for details */
8 /* Unicode is limited to 21 bits. */
12 /* How many characters we are expecting as part of this Unicode point */
14 /* How many characters we've already seen. */
16 /* Compound character, aka Unicode point. */
20 #define UTF8_STATE_INIT { 0, 0, 0 }
22 static inline void utf8_state_init(struct utf8_state *utf8_state)
24 memset(utf8_state, 0, sizeof(*utf8_state));
28 * utf8_decode - continue UTF8 decoding with this character.
29 * @utf8_state - initialized UTF8 state.
32 * Returns false if it needs another character to give results.
33 * Otherwise returns true, @utf8_state can be reused without initializeation,
36 * EINVAL: bad encoding.
37 * EFBIG: not a minimal encoding.
38 * ERANGE: encoding of invalid character.
40 * You can extract the character from @utf8_state->c; @utf8_state->used_len
41 * indicates how many characters have been consumed.
43 bool utf8_decode(struct utf8_state *utf8_state, char c);
46 * utf8_encode - encode a point into UTF8.
47 * @point - Unicode point to include.
48 * @dest - buffer to fill.
50 * Returns 0 if point was invalid, otherwise bytes of dest used.
51 * Sets errno to ERANGE if point was invalid.
53 size_t utf8_encode(uint32_t point, char dest[UTF8_MAX_LEN]);
54 #endif /* CCAN_UTF8_H */