]> git.ozlabs.org Git - ccan/blob - ccan/ccan_tokenizer/read_cstring.c
base64: fix for unsigned chars (e.g. ARM).
[ccan] / ccan / ccan_tokenizer / read_cstring.c
1 #include "ccan_tokenizer.h"
2
3 static char *strdup_rng(const char *s, const char *e) {
4         char *ret = malloc(e-s+1);
5         memcpy(ret, s, e-s);
6         ret[e-s] = 0;
7         return ret;
8 }
9
10 #define MESSAGE_PATH "tokenize/read_cstring/"
11
12 //Reads a C string starting at s until quoteChar is found or e is reached
13 //  Returns the pointer to the terminating quote character or e if none was found
14 char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) {
15         const char * const tokstart = s;
16         const char *p;
17         int has_endquote=0, has_newlines=0;
18         
19         //tok_msg_debug(called, s, "Called read_cstring on `%s`", s);
20         
21         #define append(startptr,endptr) darray_append_items(*out, startptr, (endptr)-(startptr))
22         #define append_char(theChar) darray_append(*out, theChar)
23         #define append_zero() do {darray_append(*out, 0); out->size--;} while(0)
24         
25         p = s;
26         while (p<e) {
27                 char c = *p++;
28                 if (c == '\\') {
29                         append(s, p-1);
30                         s = p;
31                         if (p >= e) {
32                                 append_char('\\');
33                                 tok_msg_error(ended_in_backslash, p-1,
34                                         "read_cstring input ended in backslash");
35                                 break;
36                         }
37                         c = *p++;
38                         if (c>='0' && c<='9') {
39                                 unsigned int octal = c-'0';
40                                 size_t digit_count = 0;
41                                 while (p<e && *p>='0' && *p<='9') {
42                                         octal <<= 3;
43                                         octal += (*p++) - '0';
44                                         if (++digit_count >= 2)
45                                                 break;
46                                 }
47                                 if (p<e && *p>='0' && *p<='9') {
48                                         tok_msg_info(ambiguous_octal, s-2,
49                                                 "Octal followed by digit");
50                                 }
51                                 if (octal > 0xFF) {
52                                         tok_msg_warn(octal_overflow, s-2,
53                                                 "Octal out of range");
54                                 }
55                                 c = octal;
56                         } else {
57                                 switch (c) {
58                                         case 'x': {
59                                                 size_t digit_count = 0;
60                                                 size_t zero_count = 0;
61                                                 unsigned int hex = 0;
62                                                 while (p<e && *p=='0') p++, zero_count++;
63                                                 for (;p<e;digit_count++) {
64                                                         c = *p++;
65                                                         if (c>='0' && c<='9')
66                                                                 c -= '0';
67                                                         else if (c>='A' && c<='F')
68                                                                 c -= 'A'-10;
69                                                         else if (c>='a' && c<='f')
70                                                                 c -= 'a'-10;
71                                                         else {
72                                                                 p--;
73                                                                 break;
74                                                         }
75                                                         hex <<= 4;
76                                                         hex += c;
77                                                 }
78                                                 if (zero_count+digit_count > 2) {
79                                                         char *hex_string = strdup_rng(s-2, p);
80                                                         tok_msg_warn(ambiguous_hex, s-2,
81                                                                 "Hex escape '%s' is ambiguous", hex_string);
82                                                         if (digit_count > 2)
83                                                                 tok_msg_warn(hex_overflow, s-2,
84                                                                         "Hex escape '%s' out of range", hex_string);
85                                                         free(hex_string);
86                                                 }
87                                                 c = hex & 0xFF;
88                                         }       break;
89                                         case 'a':
90                                                 c=0x7;
91                                                 break;
92                                         case 'b':
93                                                 c=0x8;
94                                                 break;
95                                         case 'e':
96                                                 c=0x1B;
97                                                 break;
98                                         case 'f':
99                                                 c=0xC;
100                                                 break;
101                                         case 'n':
102                                                 c=0xA;
103                                                 break;
104                                         case 'r':
105                                                 c=0xD;
106                                                 break;
107                                         case 't':
108                                                 c=0x9;
109                                                 break;
110                                         case 'v':
111                                                 c=0xB;
112                                                 break;
113                                         case '\\':
114                                                 break;
115                                         default:
116                                                 if (c == quoteChar)
117                                                         break;
118                                                 if (c=='\'' && quoteChar=='"') {
119                                                         /* tok_msg_info(escaped_single_quote, s-2,
120                                                                 "Single quote characters need not be escaped within double quotes"); */
121                                                         break;
122                                                 }
123                                                 if (c=='"' && quoteChar=='\'') {
124                                                         /* tok_msg_info(escaped_double_quote, s-2,
125                                                                 "Double quote characters need not be escaped within single quotes"); */
126                                                         break;
127                                                 }
128                                                 if (c=='?') // \? is needed in some situations to avoid building a trigraph
129                                                         break;
130                                                 tok_msg_warn(unknown_escape, s-2,
131                                                         "Unknown escape sequence '\\%c'", c);
132                                                 break;
133                                 }
134                         }
135                         s = p;
136                         append_char(c);
137                 } else if (c == quoteChar) {
138                         p--;
139                         has_endquote = 1;
140                         break;
141                 } else if (creturn(c)) {
142                         has_newlines = 1;
143                 }
144         }
145         append(s, p);
146         append_zero();
147         if (!has_endquote) {
148                 tok_msg_error(missing_endquote, tokstart,
149                         "Missing endquote on %s literal",
150                         quoteChar=='\'' ? "character" : "string");
151         } else if (has_newlines) {
152                 tok_msg_warn(quote_newlines, tokstart,
153                         "%s literal contains newline character(s)",
154                         quoteChar=='\'' ? "Character" : "String");
155         }
156         return (char*)p;
157         
158         #undef append
159         #undef append_char
160         #undef append_zero
161 }
162
163 #undef MESSAGE_PATH