X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fccan_tokenizer%2Fread_cnumber.c;fp=ccan%2Fccan_tokenizer%2Fread_cnumber.c;h=bb8eb30ec049f641ac04d76d028b4ef42a8570a4;hp=0000000000000000000000000000000000000000;hb=69cc1b45b4921c0be738902fe0d5225f135e2aae;hpb=46b1a03e21303e03b68de213b41c0840767fbc96;ds=sidebyside diff --git a/ccan/ccan_tokenizer/read_cnumber.c b/ccan/ccan_tokenizer/read_cnumber.c new file mode 100644 index 00000000..bb8eb30e --- /dev/null +++ b/ccan/ccan_tokenizer/read_cnumber.c @@ -0,0 +1,408 @@ + +//for strtold +#define _ISOC99_SOURCE +#include +#undef _ISOC99_SOURCE + +#include "ccan_tokenizer.h" + +#ifndef ULLONG_MAX +#define ULLONG_MAX 18446744073709551615ULL +#endif + +static const char *skipnum(const char *s, const char *e, readui_base base) { + for (;s= (base & 0xFF) && + !(base & READUI_ALLOWHIGHERDIGITS) ) + break; + } else if (c>='A' && c<='Z') { + if (!(base & READUI_ALLOWCAPLETTERS)) + break; + if ( c-'A'+10 >= (base & 0xFF) && + !(base & READUI_ALLOWHIGHERDIGITS)) + break; + } else if (c>='a' && c<='z') { + if (!(base & READUI_ALLOWLCASELETTERS)) + break; + if ( c-'a'+10 >= (base & 0xFF) && + !(base & READUI_ALLOWHIGHERDIGITS)) + break; + } else + break; + } + + return s; +} + +static uint64_t readui_valid(const char *s, const char *e, readui_base base) { + uint64_t ret = 0; + uint64_t multiplier = 1; + uint64_t digit_value; + + //64-bit multiplication with overflow checking + #define multiply(dest, src) do { \ + uint32_t a0 = (uint64_t)(dest) & 0xFFFFFFFF; \ + uint32_t a1 = (uint64_t)(dest) >> 32; \ + uint32_t b0 = (uint64_t)(src) & 0xFFFFFFFF; \ + uint32_t b1 = (uint64_t)(src) >> 32; \ + uint64_t a, b; \ + \ + if (a1 && b1) \ + goto overflowed; \ + a = (uint64_t)a1*b0 + (uint64_t)a0*b1; \ + if (a > 0xFFFFFFFF) \ + goto overflowed; \ + a <<= 32; \ + b = (uint64_t)a0*b0; \ + \ + if (a+b < a) \ + goto overflowed; \ + (dest) = a+b; \ + } while(0) + + if (s >= e || ((base&0xFF) < 1)) { + errno = EINVAL; + return 0; + } + + while (s s) { + for (;;) { + char c = *--e; + + //this series of if statements takes advantage of the fact that 'a'>'A'>'0' + if (c >= 'a') + c -= 'a'-10; + else if (c >= 'A') + c -= 'A'-10; + else + c -= '0'; + digit_value = c; + + //TODO: Write/find a testcase where temp *= multiplier does overflow + multiply(digit_value, multiplier); + + if (ret+digit_value < ret) + goto overflowed; + ret += digit_value; + + if (e <= s) + break; + + multiply(multiplier, base & 0xFF); + } + } + errno = 0; + return ret; + +overflowed: + errno = ERANGE; + return ULLONG_MAX; + + #undef multiply +} + +uint64_t readui(const char **sp, const char *e, readui_base base) { + const char *s = *sp; + + while (sdots_found = 0; + + sn->prefix = s; + sn->digits = s; + if (s+3<=e && s[0]=='0') { + if (s[1]=='X' || s[1]=='x') { + //hexadecimal + s += 2; + sn->digits = s; + for (;sdots_found++; + else if (!chex(*s)) + break; + } + goto done_scanning_digits; + } else if (s[1]=='B' || s[1]=='b') { + //binary + s += 2; + if (*s!='0' && *s!='1') + s -= 2; + sn->digits = s; + } + } + + //binary, decimal, or octal + for (;sdots_found++; + else if (!cdigit(*s)) + break; + } + +done_scanning_digits: + + sn->exponent = s; + if (sprefix==sn->digits && (*s=='E' || *s=='e')) || + (sn->prefix < sn->digits && (*s=='P' || *s=='p')) + )) { + s++; + if (ssuffix = s; + while (send = s; + + //Now we're done scanning, but now we want to know what type this is + type = TOK_INTEGER; + if (sn->dots_found) + type = TOK_FLOATING; + if (sn->exponent < sn->suffix) + type = TOK_FLOATING; + + //if this is an octal, make the leading 0 a prefix + if (type==TOK_INTEGER && sn->prefix==sn->digits && + sn->digits < s && sn->digits[0]=='0') + sn->digits++; + + return type; +} + +static enum tok_suffix read_number_suffix(const char *s, const char *e, + enum token_type type, tok_message_queue *mq) { + const char *orig_s = s; + enum tok_suffix sfx = 0; + + //read the suffix in pieces + while (s='a' && c<='z') + c -= 'a'-'A'; + + if (c=='L') { + if (sdigits is not empty (i.e. sn->digits < sn->exponent) + *unless* the prefix is "0" + has no exponent + suffix is [0-9 A-Z a-z '.']* + dots_found == 0 + */ + readui_base base = READUI_DEC; + const char *tokstart = sn->prefix; + const char *s = sn->digits, *e = sn->exponent; + + if (sn->prefix+1 < sn->digits) { + if (sn->prefix[1]=='X' || sn->prefix[1]=='x') + base = READUI_HEX; + else + base = READUI_OCT; + } else if (sn->prefix < sn->digits) { + base = READUI_OCT; + } + + if (s>=e && base==READUI_OCT) { + //octal contains no digits + out->v = 0; + out->base = 8; + goto suffix; + } + + out->v = readui(&s, sn->exponent, base); + out->base = base & 0xFF; + + if (s != e || errno == EINVAL) { + tok_msg_error(integer_invalid_digits, tokstart, + "Integer constant contains invalid digits"); + } else if (errno) { + if (errno == ERANGE) { + tok_msg_error(integer_out_of_range, tokstart, + "Integer constant out of range"); + } else { + tok_msg_bug(readui_unknown, tokstart, + "Unknown error returned by readui"); + } + } + +suffix: + out->suffix = + read_number_suffix(sn->suffix, sn->end, TOK_INTEGER, mq); + + return; +} + +static void read_floating(struct tok_floating *out, const struct scan_number *sn, + tok_message_queue *mq) { + /* + Assertions about a float's struct scan_number: + prefix is empty or [0B 0b 0X 0x] (note: no octal prefix 0) + sn->digits not empty, ever + exponent may or may not exist + If exponent exists, it is valid and formatted as: + ( [E P e p] ['+' '-']*0..1 [0-9]* ) + An exponent starts with E if this is decimal, P if it is hex/binary + suffix is [0-9 A-Z a-z '.']* + dots_found can be anything + */ + const char *tokstart = sn->prefix; + const char *s = sn->prefix, *e = sn->suffix; + char borrow = *sn->end; + //long double strtold(const char *nptr, char **endptr); + + out->v = 0.0; + out->suffix = TOK_NOSUFFIX; + + if (sn->prefix < sn->digits) { + if (sn->prefix[1]=='B' || sn->prefix[1]=='b') { + tok_msg_error(binary_float, tokstart, + "Binary floating point constants not allowed"); + return; + } + if (sn->exponent >= sn->suffix) { + tok_msg_error(hex_float_no_exponent, tokstart, + "Hex floating point constant missing exponent"); + return; + } + } + + + /* Stick a null terminator at the end of the input so strtold + * won't read beyond the given input. + * + * This is thread-safe because the input is from + * token_list.txt, which was generated in the + * tokenize function which is still running. + */ + *(char*)sn->end = 0; + errno = 0; + out->v = strtold(s, (char**)&s); + //don't forget to set it back + *(char*)sn->end = borrow; + + if (errno) { + //for some reason, strtold may errno to EDOM to indicate underrun + //open test/run.c and search "floating_out_of_range" for more details + if (errno == ERANGE || errno == EDOM) { + tok_msg_error(floating_out_of_range, tokstart, + "Floating point constant out of range"); + } else { + tok_msg_bug(strtold_unknown, tokstart, + "Unknown error returned by strtold"); + } + } + + if (s != e) { + tok_msg_error(floating_invalid_digits, tokstart, + "Floating point constant contains invalid digits"); + } + + out->suffix = + read_number_suffix(sn->suffix, sn->end, TOK_FLOATING, mq); +} + +char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq) { + struct scan_number sn; + + tok->type = scan_number(&sn, s, e); + if (tok->type == TOK_INTEGER) + read_integer(&tok->integer, &sn, mq); + else + read_floating(&tok->floating, &sn, mq); + + return (char*)sn.end; +} + +#undef MESSAGE_PATH