Make tokenizer throw error on empty char literal, along with some slight cleanups...

author Joey Adams <joeyadams3.14159@gmail.com>

Sun, 12 Jul 2009 03:10:39 +0000 (23:10 -0400)

committer Joey Adams <joeyadams3.14159@gmail.com>

Sun, 12 Jul 2009 03:10:39 +0000 (23:10 -0400)
author Joey Adams <joeyadams3.14159@gmail.com>
Sun, 12 Jul 2009 03:10:39 +0000 (23:10 -0400)
committer Joey Adams <joeyadams3.14159@gmail.com>
Sun, 12 Jul 2009 03:10:39 +0000 (23:10 -0400)
diff --git a/ccan/ccan_tokenizer/ccan_tokenizer.c b/ccan/ccan_tokenizer/ccan_tokenizer.c

index 7d29e025552ba93e9a6aec802db00911b49534cb..52858feaa8797a80422a032d0f7f12ead5ff09e6 100644 (file)
--- a/ccan/ccan_tokenizer/ccan_tokenizer.c
+++ b/ccan/ccan_tokenizer/ccan_tokenizer.c
@@ -473,12 +473,19 @@ struct token_list *tokenize(const char *orig, size_t orig_size,
                                 
                                 add(.type = type,
                                         {.include = include});
+                               
                         } else if (c=='\'' || c=='\"') { //character or string literal
                                 array_char string = array_new(tl);
                                 s = read_cstring(&string, s, e, c, mq);
                                 if (s<e) s++; //advance past endquote (if available)
                                 add(.type = c=='\'' ? TOK_CHAR : TOK_STRING,
                                     {.string = string});
+                               
+                               if (c=='\'' && string.size==0) {
+                                       tok_msg_error(empty_char_constant, orig,
+                                               "Empty character constant");
+                               }
+                               
                         } else if (c=='/' && s<e && (*s=='*' || *s=='/')) { //comment
                                 if (*s++ == '*') { /* C-style comment */
                                         const char *comment_start = s-2;
diff --git a/ccan/ccan_tokenizer/ccan_tokenizer.h b/ccan/ccan_tokenizer/ccan_tokenizer.h

index 7634501f84f455774364dfe9aabb0961c0f087a7..eb541679da5bc832534f271345e31f75d2c5ff2b 100644 (file)
--- a/ccan/ccan_tokenizer/ccan_tokenizer.h
+++ b/ccan/ccan_tokenizer/ccan_tokenizer.h
@@ -41,8 +41,11 @@ enum token_type {
         TOK_INTEGER,       //integer (e.g. 5, 1000L, 0x5)
         TOK_FLOATING,      //floating point number (e.g. 5.0, 7.0f, etc.)
         TOK_OPERATOR,      //operator (e.g. +, -, (, ), ++, etc.)
+       
+       #define token_type_is_identifier(type) ((type)>=TOK_KEYWORD && (type)<=TOK_IDENTIFIER)
         TOK_KEYWORD,       //keyword (e.g. char, _Bool, ifdef)
         TOK_IDENTIFIER,    //identifier or unprocessed keyword (e.g. int, token, pp_conditions)
+       
         TOK_CHAR,          //character literal (e.g. 'a' or even '1234')
         TOK_STRING,        //string literal (e.g. "hello" or "zero\0inside")
         TOK_LEADING_POUND, //leading # in a preprocessor directive (e.g. # include)
@@ -54,6 +57,7 @@ enum token_type {
         TOK_CCOMMENT, //C comment (e.g. /* comment */)
         TOK_CPPCOMMENT, //C++ comment (e.g. //comment )
         TOK_WHITE, //whitespace (span of \t\n\v\f\r and space)
+       
         TOK_STARTLINE,  //beginning of line (txt/txtsize is always empty)
         TOK_STRAY, //control characters, weird characters, and extended characters where they shouldn't be
  };
@@ -192,6 +196,11 @@ struct token {
         size_t line, col;
  };
  
+//keywords such as int, long, etc. may be defined over, making them identifiers in a sense
+static inline int token_is_identifier(const struct token *tok) {
+       return token_type_is_identifier(tok->type);
+}
+
  static inline int token_is_ignored(const struct token *tok) {
         return token_type_is_ignored(tok->type);
  }
@@ -204,6 +213,11 @@ static inline int token_is_kw(const struct token *tok, int opkw) {
         return tok->type==TOK_KEYWORD && tok->opkw==opkw;
  }
  
+static inline int token_txt_is(const struct token *tok, const char *str) {
+       size_t len = strlen(str);
+       return tok->txt_size==len && !memcmp(tok->txt, str, len);
+}
+
  struct token_list {
         struct token *first, *last;
         
diff --git a/ccan/ccan_tokenizer/todo b/ccan/ccan_tokenizer/todo

index 0203d054baae9f4c6c403436b4854db961f54d6d..4b6d90f01f4f193cfd91b79034f6ccd52f4c611e 100644 (file)
--- a/ccan/ccan_tokenizer/todo
+++ b/ccan/ccan_tokenizer/todo
@@ -1,3 +1,8 @@
+Write test for empty_char_constant
+
+defined cannot be used as a macro name
+<strike>Add "defined" and only accept it in appropriate circumstances</strike>
+
  Update that simple tokenizer compulsory test so things will compile
  
  Handle cases like escaped question marks and pound symbols that I don't understand yet.
author	Joey Adams <joeyadams3.14159@gmail.com>
	Sun, 12 Jul 2009 03:10:39 +0000 (23:10 -0400)
committer	Joey Adams <joeyadams3.14159@gmail.com>
	Sun, 12 Jul 2009 03:10:39 +0000 (23:10 -0400)
ccan/ccan_tokenizer/ccan_tokenizer.c		patch \| blob \| history
ccan/ccan_tokenizer/ccan_tokenizer.h		patch \| blob \| history
ccan/ccan_tokenizer/todo		patch \| blob \| history