X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fcharset%2Fcharset.c;h=756080138d23110e26aa97c6fc3b0a0964b488ce;hp=6c21df38ac703efcb5048648ef0abd00089505c5;hb=455572f3e5a66e8a02f38458524fad651eb46489;hpb=eefefbaee15ea270992c958f6a9b145cdc017e44 diff --git a/ccan/charset/charset.c b/ccan/charset/charset.c index 6c21df38..75608013 100644 --- a/ccan/charset/charset.c +++ b/ccan/charset/charset.c @@ -50,18 +50,23 @@ bool utf8_validate(const char *str, size_t length) return false; } else if (c < 0xF0) { /* 3-byte sequence, U+0800 to U+FFFF - Note that the surrogate range is U+D800 to U+DFFF + Note that the surrogate range is U+D800 to U+DFFF, + and that U+FFFE and U+FFFF are illegal characters. c must be >= 11100000 (which it is) If c is 11100000, then s[0] must be >= 10100000 If the global parameter utf8_allow_surrogates is false: If c is 11101101 and s[0] is >= 10100000, then this is a surrogate and we should fail. + If c is 11101111, s[0] is 10111111, and s[1] >= 10111110, + then this is an illegal character and we should fail. s[0] and s[1] must be 10xxxxxx */ len = 1; if (c == 0xE0 && *s < 0xA0) return false; if (!utf8_allow_surrogates && c == 0xED && *s >= 0xA0) return false; + if (c == 0xEF && s[0] == 0xBF && (s+1 >= e || s[1] >= 0xBE)) + return false; } else { /* 4-byte sequence, U+010000 to U+10FFFF c must be >= 11110000 (which it is) and <= 11110100