7 #include "ccan_tokenizer.h"
10 #define ULLONG_MAX 18446744073709551615ULL
13 static const char *skipnum(const char *s, const char *e, readui_base base) {
15 unsigned int c = (unsigned char)*s;
18 if ( c-'0' >= (base & 0xFF) &&
19 !(base & READUI_ALLOWHIGHERDIGITS) )
21 } else if (c>='A' && c<='Z') {
22 if (!(base & READUI_ALLOWCAPLETTERS))
24 if ( c-'A'+10 >= (base & 0xFF) &&
25 !(base & READUI_ALLOWHIGHERDIGITS))
27 } else if (c>='a' && c<='z') {
28 if (!(base & READUI_ALLOWLCASELETTERS))
30 if ( c-'a'+10 >= (base & 0xFF) &&
31 !(base & READUI_ALLOWHIGHERDIGITS))
40 static uint64_t readui_valid(const char *s, const char *e, readui_base base) {
42 uint64_t multiplier = 1;
45 //64-bit multiplication with overflow checking
46 #define multiply(dest, src) do { \
47 uint32_t a0 = (uint64_t)(dest) & 0xFFFFFFFF; \
48 uint32_t a1 = (uint64_t)(dest) >> 32; \
49 uint32_t b0 = (uint64_t)(src) & 0xFFFFFFFF; \
50 uint32_t b1 = (uint64_t)(src) >> 32; \
55 a = (uint64_t)a1*b0 + (uint64_t)a0*b1; \
59 b = (uint64_t)a0*b0; \
66 if (s >= e || ((base&0xFF) < 1)) {
71 while (s<e && *s=='0') s++;
77 //this series of if statements takes advantage of the fact that 'a'>'A'>'0'
86 //TODO: Write/find a testcase where temp *= multiplier does overflow
87 multiply(digit_value, multiplier);
89 if (ret+digit_value < ret)
96 multiply(multiplier, base & 0xFF);
109 uint64_t readui(const char **sp, const char *e, readui_base base) {
112 while (s<e && cwhite(*s)) s++;
113 e = skipnum(s, e, base);
116 return readui_valid(s, e, base);
120 #define MESSAGE_PATH "tokenize/read_cnumber/"
124 * Each of the pointers points to the first character of a given component.
125 * Consider 0x50.1p+1f . It would be broken down into:
127 const char *prefix; // 0x
128 const char *digits; // 50.1
129 const char *exponent; // p+1
130 const char *suffix; // f
132 size_t dots_found; // 1
136 * Scans past all the characters in a number token, fills the struct, and
137 * returns one of TOK_INTEGER or TOK_FLOATING to indicate the type.
139 * First character must be [0-9 '.']
141 static enum token_type scan_number(struct scan_number *sn,
142 const char *s, const char *e) {
143 enum token_type type;
149 if (s+3<=e && s[0]=='0') {
150 if (s[1]=='X' || s[1]=='x') {
160 goto done_scanning_digits;
161 } else if (s[1]=='B' || s[1]=='b') {
164 if (*s!='0' && *s!='1')
170 //binary, decimal, or octal
174 else if (!cdigit(*s))
178 done_scanning_digits:
182 (sn->prefix==sn->digits && (*s=='E' || *s=='e')) ||
183 (sn->prefix < sn->digits && (*s=='P' || *s=='p'))
186 if (s<e && (*s=='+' || *s=='-'))
188 while (s<e && cdigit(*s)) s++;
192 while (s<e && (cdigit(*s) || cletter(*s) ||
193 *s=='.' || *s=='_' || *s=='$')) s++;
197 //Now we're done scanning, but now we want to know what type this is
201 if (sn->exponent < sn->suffix)
204 //if this is an octal, make the leading 0 a prefix
205 if (type==TOK_INTEGER && sn->prefix==sn->digits &&
206 sn->digits < s && sn->digits[0]=='0')
212 static enum tok_suffix read_number_suffix(const char *s, const char *e,
213 enum token_type type, tok_message_queue *mq) {
214 const char *orig_s = s;
215 enum tok_suffix sfx = 0;
217 //read the suffix in pieces
219 enum tok_suffix sfx_prev = sfx;
221 if (c>='a' && c<='z')
225 if (s<e && (*s=='L' || *s=='l')) {
229 //TOK_L and TOK_LL are mutually exclusive
246 goto invalid; //suffix piece was repeated
249 //make sure the suffix is appropriate for this number type
250 if (type==TOK_INTEGER && (sfx & TOK_F)) {
251 tok_msg_error(suffix_float_only, orig_s,
252 "Suffix only valid for floating point numbers");
255 if (type==TOK_FLOATING && (sfx & (TOK_U | TOK_LL))) {
256 tok_msg_error(suffix_integer_only, orig_s,
257 "Suffix only valid for integers");
264 if (type==TOK_INTEGER)
265 tok_msg_error(integer_suffix_invalid, orig_s,
266 "Integer suffix invalid");
268 tok_msg_error(floating_suffix_invalid, orig_s,
269 "Floating point suffix invalid");
273 static void read_integer(struct tok_integer *out, const struct scan_number *sn,
274 tok_message_queue *mq) {
276 Assertions about an integer's struct scan_number:
277 prefix is empty or [0 0B 0b 0X 0x]
278 sn->digits is not empty (i.e. sn->digits < sn->exponent)
279 *unless* the prefix is "0"
281 suffix is [0-9 A-Z a-z '.']*
284 readui_base base = READUI_DEC;
285 const char *tokstart = sn->prefix;
286 const char *s = sn->digits, *e = sn->exponent;
288 if (sn->prefix+1 < sn->digits) {
289 if (sn->prefix[1]=='X' || sn->prefix[1]=='x')
293 } else if (sn->prefix < sn->digits) {
297 if (s>=e && base==READUI_OCT) {
298 //octal contains no digits
304 out->v = readui(&s, sn->exponent, base);
305 out->base = base & 0xFF;
307 if (s != e || errno == EINVAL) {
308 tok_msg_error(integer_invalid_digits, tokstart,
309 "Integer constant contains invalid digits");
311 if (errno == ERANGE) {
312 tok_msg_error(integer_out_of_range, tokstart,
313 "Integer constant out of range");
315 tok_msg_bug(readui_unknown, tokstart,
316 "Unknown error returned by readui");
322 read_number_suffix(sn->suffix, sn->end, TOK_INTEGER, mq);
327 static void read_floating(struct tok_floating *out, const struct scan_number *sn,
328 tok_message_queue *mq) {
330 Assertions about a float's struct scan_number:
331 prefix is empty or [0B 0b 0X 0x] (note: no octal prefix 0)
332 sn->digits not empty, ever
333 exponent may or may not exist
334 If exponent exists, it is valid and formatted as:
335 ( [E P e p] ['+' '-']*0..1 [0-9]* )
336 An exponent starts with E if this is decimal, P if it is hex/binary
337 suffix is [0-9 A-Z a-z '.']*
338 dots_found can be anything
340 const char *tokstart = sn->prefix;
341 const char *s = sn->prefix, *e = sn->suffix;
342 char borrow = *sn->end;
343 //long double strtold(const char *nptr, char **endptr);
346 out->suffix = TOK_NOSUFFIX;
348 if (sn->prefix < sn->digits) {
349 if (sn->prefix[1]=='B' || sn->prefix[1]=='b') {
350 tok_msg_error(binary_float, tokstart,
351 "Binary floating point constants not allowed");
354 if (sn->exponent >= sn->suffix) {
355 tok_msg_error(hex_float_no_exponent, tokstart,
356 "Hex floating point constant missing exponent");
362 /* Stick a null terminator at the end of the input so strtold
363 * won't read beyond the given input.
365 * This is thread-safe because the input is from
366 * token_list.txt, which was generated in the
367 * tokenize function which is still running.
371 out->v = strtold(s, (char**)&s);
372 //don't forget to set it back
373 *(char*)sn->end = borrow;
376 //for some reason, strtold may errno to EDOM to indicate underrun
377 //open test/run.c and search "floating_out_of_range" for more details
378 if (errno == ERANGE || errno == EDOM) {
379 tok_msg_error(floating_out_of_range, tokstart,
380 "Floating point constant out of range");
382 tok_msg_bug(strtold_unknown, tokstart,
383 "Unknown error returned by strtold");
388 tok_msg_error(floating_invalid_digits, tokstart,
389 "Floating point constant contains invalid digits");
393 read_number_suffix(sn->suffix, sn->end, TOK_FLOATING, mq);
396 char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq) {
397 struct scan_number sn;
399 tok->type = scan_number(&sn, s, e);
400 if (tok->type == TOK_INTEGER)
401 read_integer(&tok->integer, &sn, mq);
403 read_floating(&tok->floating, &sn, mq);
405 return (char*)sn.end;