]> git.ozlabs.org Git - ccan/blob - ccan/graphql/graphql.c
fix warnings
[ccan] / ccan / graphql / graphql.c
1 /* MIT (BSD) license - see LICENSE file for details */
2 #include "graphql.h"
3
4 #include "ccan/tal/str/str.h"
5 #include "ccan/utf8/utf8.h"
6
7
8 // GraphQL character classes
9
10 #define SOURCE_CHAR(c) ((c) == 9 || (c) == 10 || (c) == 13 || ((c) >= 32 && (c) <= 65535))
11 #define WHITE_SPACE(c) ((c) == 9 || (c) == 32)
12 #define LINE_TERMINATOR(c) ((c) == 10 || (c) == 13)
13 #define COMMENT(c) ((c) == 35)
14 #define COMMENT_CHAR(c) (SOURCE_CHAR(c) && !LINE_TERMINATOR(c))
15 #define STRING_CHAR(c) (SOURCE_CHAR(c) && !LINE_TERMINATOR(c) && (c)!='"' && (c)!='\\')
16 #define BLOCK_STRING_CHAR(c) (SOURCE_CHAR(c))
17 #define COMMA(c) ((c) == 44)
18 #define EOF_CHAR(c) ((c) == 0 || (c) == 4)
19 #define PUNCTUATOR(c) (strchr("!$&().:=@[]{|}", c))
20 #define HEX_DIGIT(c) (DIGIT(c) || ((c) >= 0x61 && (c) <= 0x66) || ((c) >= 0x41 && (c) <= 0x46))
21 #define DIGIT(c) ((c) >= 0x30 && (c) <= 0x39)
22 #define NAME_START(c) (((c) >= 0x61 && (c) <= 0x7A) || ((c) >= 0x41 && (c) <= 0x5A) || (c) == 0x5F)
23 #define NAME_CONTINUE(c) (NAME_START(c) || DIGIT(c))
24
25 // Safe copy helper
26 #define cpystr(d,s) { char *cpystr_p; char *cpystr_q; for(cpystr_p = (s), cpystr_q = (d); *cpystr_p;) *cpystr_q++ = *cpystr_p++; *cpystr_q++ = *cpystr_p++; }
27
28 // Parser shorthands
29
30 #define RET void *
31 #define PARAMS struct list_head *tokens, struct list_head *used, const char **err
32 #define ARGS tokens, used, err
33 #define INIT(type) \
34         struct graphql_token *rollback_top = list_top(tokens, struct graphql_token, list); \
35         struct graphql_##type *obj = tal(tokens, struct graphql_##type); memset(obj, 0, sizeof(struct graphql_##type)); \
36
37 #define EXIT \
38         goto exit_label; \
39         exit_label: \
40         rollback_top = rollback_top; \
41         if (*err) obj = tal_free(obj); \
42         return obj; \
43
44 #define CONSUME_ONE { list_add(used, (struct list_node *)list_pop(tokens, struct graphql_token, list)); }
45 #define RESTORE_ONE { list_add(tokens, (struct list_node *)list_pop(used, struct graphql_token, list)); }
46 #define ROLLBACK(args) { while (list_top(tokens, struct graphql_token, list) != rollback_top) { RESTORE_ONE; } }
47 #define OR if (!*err) goto exit_label; *err = NULL;
48 #define REQ if (*err) { ROLLBACK(args); goto exit_label; }
49 #define OPT *err = NULL;
50 #define WHILE_OPT while(!*err); *err = NULL;
51 #define LOOKAHEAD(args, tok) struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
52 #define MSG(msg) if (*err) *err = msg;
53
54
55 // Parser functions
56
57 RET parse_document(PARAMS);
58 RET parse_definition(PARAMS);
59 RET parse_executable_document(PARAMS);
60 RET parse_executable_definition(PARAMS);
61 RET parse_operation_definition(PARAMS);
62 RET parse_operation_type(PARAMS);
63 RET parse_selection_set(PARAMS);
64 RET parse_selection(PARAMS);
65 RET parse_field(PARAMS);
66 RET parse_arguments(PARAMS);
67 RET parse_argument(PARAMS);
68 RET parse_alias(PARAMS);
69 RET parse_fragment_spread(PARAMS);
70 RET parse_fragment_definition(PARAMS);
71 RET parse_fragment_name(PARAMS);
72 RET parse_type_condition(PARAMS);
73 RET parse_inline_fragment(PARAMS);
74 RET parse_value(PARAMS);
75 RET parse_int_value(PARAMS);
76 RET parse_negative_sign(PARAMS);
77 RET parse_non_zero_digit(PARAMS);
78 RET parse_float_value(PARAMS);
79 RET parse_boolean_value(PARAMS);
80 RET parse_string_value(PARAMS);
81 RET parse_string_character(PARAMS);
82 RET parse_escaped_unicode(PARAMS);
83 RET parse_escaped_character(PARAMS);
84 RET parse_block_string_character(PARAMS);
85 RET parse_null_value(PARAMS);
86 RET parse_enum_value(PARAMS);
87 RET parse_list_value(PARAMS);
88 RET parse_object_value(PARAMS);
89 RET parse_object_field(PARAMS);
90 RET parse_variable(PARAMS);
91 RET parse_variable_definitions(PARAMS);
92 RET parse_variable_definition(PARAMS);
93 RET parse_default_value(PARAMS);
94 RET parse_type(PARAMS);
95 RET parse_named_type(PARAMS);
96 RET parse_list_type(PARAMS);
97 RET parse_non_null_type(PARAMS);
98 RET parse_non_null_type_1(PARAMS);
99 RET parse_non_null_type_2(PARAMS);
100 RET parse_directives(PARAMS);
101 RET parse_directive(PARAMS);
102 RET parse_type_system_document(PARAMS);
103 RET parse_type_system_definition(PARAMS);
104 RET parse_type_system_extension_document(PARAMS);
105 RET parse_type_system_definition_or_extension(PARAMS);
106 RET parse_type_system_extension(PARAMS);
107 RET parse_description(PARAMS);
108 RET parse_schema_definition(PARAMS);
109 RET parse_root_operation_type_definition(PARAMS);
110 RET parse_schema_extension(PARAMS);
111 RET parse_type_definition(PARAMS);
112 RET parse_type_extension(PARAMS);
113 RET parse_scalar_type_definition(PARAMS);
114 RET parse_scalar_type_extension(PARAMS);
115 RET parse_object_type_definition(PARAMS);
116 RET parse_implements_interfaces(PARAMS);
117 RET parse_fields_definition(PARAMS);
118 RET parse_field_definition(PARAMS);
119 RET parse_arguments_definition(PARAMS);
120 RET parse_input_value_definition(PARAMS);
121 RET parse_object_type_extension(PARAMS);
122 RET parse_interface_type_definition(PARAMS);
123 RET parse_interface_type_extension(PARAMS);
124 RET parse_union_type_definition(PARAMS);
125 RET parse_union_member_types(PARAMS);
126 RET parse_union_type_extension(PARAMS);
127 RET parse_enum_type_definition(PARAMS);
128 RET parse_enum_values_definition(PARAMS);
129 RET parse_enum_value_definition(PARAMS);
130 RET parse_enum_type_extension(PARAMS);
131 RET parse_input_object_type_definition(PARAMS);
132 RET parse_input_fields_definition(PARAMS);
133 RET parse_directive_definition(PARAMS);
134 RET parse_directive_locations(PARAMS);
135 RET parse_directive_location(PARAMS);
136 RET parse_executable_directive_location(PARAMS);
137 RET parse_type_system_directive_location(PARAMS);
138
139 RET parse_keyword(PARAMS, const char *keyword, const char *errmsg);
140 RET parse_punct(PARAMS, int punct);
141 RET parse_name(PARAMS);
142 RET parse_int(PARAMS);
143 RET parse_float(PARAMS);
144 RET parse_string(PARAMS);
145
146 // Convert input string into AST.
147 const char *graphql_lexparse(const char *input, const tal_t *ctx, struct list_head **tokens, struct graphql_executable_document **doc) {
148         const char *err = graphql_lex(input, ctx, tokens);
149         if (!err)
150                 err = graphql_parse(*tokens, doc);
151         return err;
152 }
153
154 // Convert lexed tokens into AST.
155 const char *graphql_parse(struct list_head *tokens, struct graphql_executable_document **doc) {
156         struct list_head used = LIST_HEAD_INIT(used);
157         const char *err = NULL;
158         *doc = parse_executable_document(tokens, &used, &err);
159         return err;
160 }
161
162 /* The following parser functions follow special rules:
163  *      - The declaration is standardized with RET and PARAMS
164  *      - The "err" argument is assumed to be NULL upon entrance
165  *      - The "err" argument is set on failure
166  *      - If the function fails to parse, then "tokens" shall be as it was upon entrance
167  *      - INIT and EXIT macros are used
168  *      - Macros such as REQ and OPT facilitate readability and conciseness
169  */
170
171 RET parse_document(PARAMS) {
172         INIT(document);
173         obj->first_def = parse_definition(ARGS); REQ
174         struct graphql_definition *p = obj->first_def;
175         do {
176                 p->next_def = parse_definition(ARGS);
177                 p = p->next_def;
178         } WHILE_OPT;
179         EXIT;
180 }
181
182 RET parse_definition(PARAMS) {
183         INIT(definition);
184         obj->executable_def = parse_executable_definition(ARGS);
185 /*      OR
186         obj->type_system_def = parse_type_system_definition_or_extension(ARGS);
187         // NOTE: Optional type system is not (yet) implemented.
188 */
189         EXIT;
190 }
191
192 RET parse_executable_document(PARAMS) {
193         INIT(executable_document);
194         obj->first_def = parse_executable_definition(ARGS); REQ
195         struct graphql_executable_definition *p = obj->first_def;
196         do {
197                 p->next_def = parse_executable_definition(ARGS);
198                 p = p->next_def;
199         } WHILE_OPT;
200         EXIT;
201 }
202
203 RET parse_executable_definition(PARAMS) {
204         INIT(executable_definition);
205         obj->op_def = parse_operation_definition(ARGS); MSG("invalid operation or fragment definition"); OR
206         obj->frag_def = parse_fragment_definition(ARGS); MSG("invalid operation or fragment definition"); 
207         EXIT;
208 }
209
210 RET parse_operation_definition(PARAMS) {
211         INIT(operation_definition);
212         obj->op_type = parse_operation_type(ARGS);
213         if (!*err) {
214                 obj->op_name = parse_name(ARGS); OPT
215                 obj->vars = parse_variable_definitions(ARGS); OPT
216                 obj->directives = parse_directives(ARGS); OPT
217         } else
218                 *err = NULL;
219         obj->sel_set = parse_selection_set(ARGS);
220         if (*err) ROLLBACK(ARGS);
221         EXIT;
222 }
223
224 RET parse_operation_type(PARAMS) {
225         INIT(operation_type);
226         const char *errmsg = "expected: query, mutation, or subscription";
227         obj->op_type = parse_keyword(ARGS, "query", errmsg); OR
228         obj->op_type = parse_keyword(ARGS, "mutation", errmsg); OR
229         obj->op_type = parse_keyword(ARGS, "subscription", errmsg);
230         EXIT;
231 }
232
233 RET parse_selection_set(PARAMS) {
234         INIT(selection_set);
235         parse_punct(ARGS, '{'); REQ;
236         obj->first = parse_selection(ARGS); REQ;
237         struct graphql_selection *p = obj->first;
238         parse_punct(ARGS, '}');
239         while (*err) {
240                 *err = NULL;
241                 p->next = parse_selection(ARGS); MSG("expected: selection or '}'"); REQ;
242                 p = p->next;
243                 parse_punct(ARGS, '}');
244         }
245         EXIT;
246 }
247
248 RET parse_selection(PARAMS) {
249         INIT(selection);
250         obj->field = parse_field(ARGS); OR
251         obj->frag_spread = parse_fragment_spread(ARGS); OR
252         obj->inline_frag = parse_inline_fragment(ARGS);
253         MSG("expected: field, fragment spread, or inline fragment");
254         EXIT;
255 }
256
257 RET parse_field(PARAMS) {
258         INIT(field);
259         obj->alias = parse_alias(ARGS); OPT
260         obj->name = parse_name(ARGS); REQ
261         obj->args = parse_arguments(ARGS); OPT
262         obj->directives = parse_directives(ARGS); OPT
263         obj->sel_set = parse_selection_set(ARGS); OPT
264         EXIT;
265 }
266
267 RET parse_arguments(PARAMS) {
268         INIT(arguments);
269         parse_punct(ARGS, '('); REQ
270         obj->first = parse_argument(ARGS); REQ
271         struct graphql_argument *p = obj->first;
272         parse_punct(ARGS, ')');
273         while (*err) {
274                 *err = NULL;
275                 p->next = parse_argument(ARGS); MSG("expected: argument or ')'"); REQ;
276                 p = p->next;
277                 parse_punct(ARGS, ')');
278         }
279         EXIT;
280 }
281
282 RET parse_argument(PARAMS) {
283         INIT(argument);
284         obj->name = parse_name(ARGS); REQ
285         parse_punct(ARGS, ':'); REQ
286         obj->val = parse_value(ARGS); REQ
287         EXIT;
288 }
289
290 RET parse_alias(PARAMS) {
291         INIT(alias);
292         obj->name = parse_name(ARGS); REQ
293         parse_punct(ARGS, ':'); REQ
294         EXIT;
295 }
296
297 RET parse_fragment_spread(PARAMS) {
298         INIT(fragment_spread);
299         parse_punct(ARGS, 0x2026); REQ // ...
300         obj->name = parse_fragment_name(ARGS); REQ
301         obj->directives = parse_directives(ARGS); OPT
302         EXIT;
303 }
304
305 RET parse_fragment_definition(PARAMS) {
306         INIT(fragment_definition);
307         parse_keyword(ARGS, "fragment", "fragment expected"); REQ
308         obj->name = parse_fragment_name(ARGS); REQ
309         obj->type_cond = parse_type_condition(ARGS); REQ
310         obj->directives = parse_directives(ARGS); OPT
311         obj->sel_set = parse_selection_set(ARGS); REQ
312         EXIT;
313 }
314
315 RET parse_fragment_name(PARAMS) {
316         INIT(fragment_name);
317         obj->name = parse_name(ARGS); REQ
318         struct graphql_token *tok = list_top(used, struct graphql_token, list);
319         if (streq(tok->token_string, "on")) {
320                 *err = "invalid fragment name";
321                 ROLLBACK(ARGS);
322         }
323         EXIT;
324 }
325
326 RET parse_type_condition(PARAMS) {
327         INIT(type_condition);
328         parse_keyword(ARGS, "on", "expected: 'on'"); REQ
329         obj->named_type = parse_named_type(ARGS); REQ
330         EXIT;
331 }
332
333 RET parse_inline_fragment(PARAMS) {
334         INIT(inline_fragment);
335         parse_punct(ARGS, 0x2026); REQ // ...
336         obj->type_cond = parse_type_condition(ARGS); OPT
337         obj->directives = parse_directives(ARGS); OPT
338         obj->sel_set = parse_selection_set(ARGS); REQ
339         EXIT;
340 }
341
342 RET parse_value(PARAMS) {
343         INIT(value);
344         obj->var = parse_variable(ARGS); // FIXME: if not const
345         OR
346         obj->int_val = parse_int_value(ARGS); OR
347         obj->float_val = parse_float_value(ARGS); OR
348         obj->str_val = parse_string_value(ARGS); OR
349         obj->bool_val = parse_boolean_value(ARGS); OR
350         obj->null_val = parse_null_value(ARGS); OR
351         obj->enum_val = parse_enum_value(ARGS); OR
352         obj->list_val = parse_list_value(ARGS); OR
353         obj->obj_val = parse_object_value(ARGS);
354         EXIT;
355 }
356
357 RET parse_int_value(PARAMS) {
358         INIT(int_value);
359         obj->val = parse_int(ARGS);
360         EXIT;
361 }
362
363 RET parse_float_value(PARAMS) {
364         INIT(float_value);
365         obj->val = parse_float(ARGS);
366         EXIT;
367 }
368
369 RET parse_boolean_value(PARAMS) {
370         INIT(boolean_value);
371         obj->val = parse_keyword(ARGS, "true", "invalid boolean value"); OR
372         obj->val = parse_keyword(ARGS, "false", "invalid boolean value");
373         EXIT;
374 }
375
376 RET parse_string_value(PARAMS) {
377         INIT(string_value);
378         obj->val = parse_string(ARGS);
379         EXIT;
380 }
381
382 RET parse_null_value(PARAMS) {
383         INIT(null_value);
384         obj->val = parse_keyword(ARGS, "null", "null expected");
385         EXIT;
386 }
387
388 RET parse_enum_value(PARAMS) {
389         INIT(enum_value);
390         obj->val = parse_name(ARGS); REQ
391         struct graphql_token *tok = list_top(used, struct graphql_token, list);
392         if (streq(tok->token_string, "true")
393          || streq(tok->token_string, "false")
394          || streq(tok->token_string, "null")) {
395                 *err = "enum value cannot be true, false, or null";
396                 ROLLBACK(ARGS);
397         }
398         EXIT;
399 }
400
401 RET parse_list_value(PARAMS) {
402         INIT(list_value);
403         parse_punct(ARGS, '['); REQ
404         parse_punct(ARGS, ']');
405         while (*err) {
406                 *err = NULL;
407                 parse_value(ARGS); MSG("expected: value or ']'"); REQ
408                 parse_punct(ARGS, ']');
409         }
410         EXIT;
411 }
412
413 RET parse_object_value(PARAMS) {
414         INIT(object_value);
415         parse_punct(ARGS, '{'); REQ
416         parse_punct(ARGS, '}');
417         struct graphql_object_field *p = NULL;
418         while (*err) {
419                 *err = NULL;
420                 if (!p) {
421                         obj->first = p = parse_object_field(ARGS); MSG("expected: object field or '}'"); REQ
422                 } else {
423                         p->next = parse_object_field(ARGS); MSG("expected: object field or '}'"); REQ
424                         p = p->next;
425                 }
426                 parse_punct(ARGS, '}');
427         }
428         EXIT;
429 }
430
431 RET parse_object_field(PARAMS) {
432         INIT(object_field);
433         obj->name = parse_name(ARGS); REQ
434         parse_punct(ARGS, ':'); REQ
435         obj->val = parse_value(ARGS); REQ
436         EXIT;
437 }
438
439 RET parse_variable(PARAMS) {
440         INIT(variable);
441         parse_punct(ARGS, '$'); REQ
442         obj->name = parse_name(ARGS); REQ
443         EXIT;
444 }
445
446 RET parse_variable_definitions(PARAMS) {
447         INIT(variable_definitions);
448         parse_punct(ARGS, '('); REQ
449         obj->first = parse_variable_definition(ARGS); REQ
450         struct graphql_variable_definition *p = obj->first;
451         parse_punct(ARGS, ')');
452         while (*err) {
453                 *err = NULL;
454                 p->next = parse_variable_definition(ARGS); MSG("expected: variable definition or ')'"); REQ
455                 p = p->next;
456                 parse_punct(ARGS, ')');
457         }
458         EXIT;
459 }
460
461 RET parse_variable_definition(PARAMS) {
462         INIT(variable_definition);
463         obj->var = parse_variable(ARGS); REQ
464         parse_punct(ARGS, ':'); REQ
465         obj->type = parse_type(ARGS); REQ
466         obj->default_val = parse_default_value(ARGS); OPT
467         obj->directives = parse_directives(ARGS); OPT
468         EXIT;
469 }
470
471 RET parse_default_value(PARAMS) {
472         INIT(default_value);
473         parse_punct(ARGS, '='); REQ
474         obj->val = parse_value(ARGS); REQ
475         EXIT;
476 }
477
478 RET parse_type(PARAMS) {
479         INIT(type);
480         obj->named = parse_named_type(ARGS);
481 /*
482         OR
483         obj->list = parse_list_type(ARGS); OR
484         obj->non_null = parse_non_null_type(ARGS);
485 */
486         EXIT;
487 }
488
489 RET parse_named_type(PARAMS) {
490         INIT(named_type);
491         obj->name = parse_name(ARGS);
492         EXIT;
493 }
494
495 /*
496 RET parse_list_type(PARAMS) {
497         INIT(list_type);
498         parse_punct(ARGS, '['); REQ
499         parse_type(ARGS); REQ
500         parse_punct(ARGS, ']'); REQ
501         EXIT;
502 }
503
504 RET parse_non_null_type(PARAMS) {
505         INIT(non_null_type);
506         parse_non_null_type_1(ARGS); OR
507         parse_non_null_type_2(ARGS);
508         EXIT;
509 }
510
511 RET parse_non_null_type_1(PARAMS) {
512         INIT(non_null_type);
513         parse_named_type(ARGS); REQ;
514         parse_punct(ARGS, '!'); REQ;
515         EXIT;
516 }
517
518 RET parse_non_null_type_2(PARAMS) {
519         INIT(non_null_type);
520         parse_list_type(ARGS); REQ;
521         parse_punct(ARGS, '!'); REQ;
522         EXIT;
523 }
524 */
525
526 RET parse_directives(PARAMS) {
527         INIT(directives);
528         obj->first = parse_directive(ARGS); REQ
529         struct graphql_directive *p = obj->first;
530         do {
531                 p->next = parse_directive(ARGS);
532                 p = p->next;
533         } WHILE_OPT;
534         EXIT;
535 }
536
537 RET parse_directive(PARAMS) {
538         INIT(directive);
539         parse_punct(ARGS, '@'); REQ
540         obj->name = parse_name(ARGS); REQ
541         obj->args = parse_arguments(ARGS); OPT
542         EXIT;
543 }
544
545
546 /* The following functions construct the "leaves" of the abstract syntax tree. */
547
548 RET parse_keyword(PARAMS, const char *keyword, const char *errmsg) {
549         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
550         if (!tok || tok->token_type != 'a') {
551                 *err = errmsg; return NULL;
552         }
553         if (!streq(tok->token_string, keyword)) {
554                 *err = errmsg; return NULL;
555         }
556         CONSUME_ONE;
557         return tok;
558 }
559
560 // Note: a static buffer is used here.
561 RET parse_punct(PARAMS, int punct) {
562         static char punctbuf[16];
563         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
564         if (!tok || tok->token_type != punct) {
565                 if (punct == 0x2026)
566                         sprintf(punctbuf, "expected: '...'");
567                 else
568                         sprintf(punctbuf, "expected: '%c'", punct);
569                 *err = punctbuf; return NULL;
570         }
571         CONSUME_ONE;
572         return tok;
573 }
574
575 RET parse_name(PARAMS) {
576         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
577         if (!tok || tok->token_type != 'a') {
578                 *err = "name expected"; return NULL;
579         }
580         CONSUME_ONE;
581         return tok;
582 }
583
584 RET parse_int(PARAMS) {
585         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
586         if (!tok || tok->token_type != 'i') {
587                 *err = "integer expected"; return NULL;
588         }
589         CONSUME_ONE;
590         return tok;
591 }
592
593 RET parse_float(PARAMS) {
594         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
595         if (!tok || tok->token_type != 'f') {
596                 *err = "float expected"; return NULL;
597         }
598         CONSUME_ONE;
599         return tok;
600 }
601
602 RET parse_string(PARAMS) {
603         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
604         if (!tok || tok->token_type != 's') {
605                 *err = "string expected"; return NULL;
606         }
607         CONSUME_ONE;
608         return tok;
609 }
610
611
612 // Convert input string into tokens.
613 const char *graphql_lex(const char *input, const tal_t *ctx, struct list_head **tokens) {
614
615         unsigned int c;
616         const char *p, *line_beginning;
617         unsigned int line_num = 1;
618         struct list_head *tok_list;
619         struct graphql_token *tok;
620
621         // Initialize token output list.
622         tok_list = tal(ctx, struct list_head);
623         if (tokens)
624                 *tokens = tok_list;
625         list_head_init(tok_list);
626
627         // Note: label and goto are used here like a continue statement except that
628         // it skips iteration, for when characters are fetched in the loop body.
629         p = input;
630         line_beginning = p;
631         do {
632                 c = *p++;
633 newchar:
634                 // Consume line terminators and increment line counter.
635                 if (LINE_TERMINATOR(c)) {
636                         unsigned int c0 = c;
637                         c = *p++;
638                         if (c0 == 10 || c0 == 13)
639                                 line_num++;
640                         if (c0 == 13 && c == 10)
641                                 c = *p++;
642                         line_beginning = p - 1;
643                         goto newchar;
644                 }
645
646                 // Consume other ignored tokens.
647                 if (COMMA(c) || WHITE_SPACE(c)) {
648                         c = *p++;
649                         goto newchar;
650                 }
651                 if (COMMENT(c)) {
652                         while ((c = *p++) != EOF && !EOF_CHAR(c) && COMMENT_CHAR(c))
653                                 ; // No-op
654                         goto newchar;
655                 }
656
657                 // Return success when end is reached.
658                 if (EOF_CHAR(c))
659                         return GRAPHQL_SUCCESS;
660
661                 // Punctuator tokens.
662                 if (PUNCTUATOR(c)) {
663
664                         // Note beginning of token in input.
665                         const char *start = p - 1;
666
667                         // Handle the ... multi-character case.
668                         if (c == '.') {
669                                 c = *p++;
670                                 if (c != '.')
671                                         return "unrecognized punctuator";
672                                 c = *p++;
673                                 if (c != '.')
674                                         return "unrecognized punctuator";
675                                 c = 0x2026;
676                         }
677
678                         tok = tal(tok_list, struct graphql_token);
679                         list_add_tail(tok_list, &tok->list);
680                         tok->token_type = c;
681                         tok->token_specific = c;
682                         tok->token_string = NULL;
683                         tok->source_line = line_num;
684                         tok->source_column = start - line_beginning + 1;
685                         tok->source_len = p - start;
686
687                 } else if (NAME_START(c)) {
688
689                         // Name/identifier tokens.
690                         tok = tal(tok_list, struct graphql_token);
691                         list_add_tail(tok_list, &tok->list);
692                         tok->token_type = 'a';
693                         tok->token_specific = 'a';
694                         // tok->token_string updated below.
695                         tok->source_line = line_num;
696                         tok->source_column = p - line_beginning;
697                         // tok->source_len updated below.
698
699                         // Note the beginning of the name.
700                         const char *name_begin = p - 1;
701                         const char *name_end;
702                         int name_len;
703
704                         // Consume the rest of the token.
705                         do {
706                                 c = *p++;
707                         } while (NAME_CONTINUE(c));
708
709                         // Note the end of the name and calculate the length.
710                         name_end = p - 1;
711                         name_len = name_end - name_begin;
712                         tok->source_len = name_len;
713
714                         // Copy the token string.
715                         tok->token_string = tal_strndup(tok, name_begin, name_len);
716
717                         goto newchar;
718
719                 } else if (DIGIT(c) || c == '-') {
720
721                         // Number tokens.
722                         const char *num_start = p - 1;
723                         char type = 'i';
724
725                         if (c == '-') {
726                                 c = *p++;
727                                 if (!DIGIT(c))
728                                         return "negative sign must precede a number";
729                         }
730
731                         if (c == '0') {
732                                 c = *p++;
733                                 if (DIGIT(c))
734                                         return "leading zeros are not allowed";
735                         } else {
736                                 do {
737                                         c = *p++;
738                                 } while(DIGIT(c));
739                         }
740
741                         if (c == '.') {
742                                 type = 'f';
743                                 if (!DIGIT(*p))
744                                         return "invalid float value fractional part";
745                                 do {
746                                         c = *p++;
747                                 } while(DIGIT(c));
748                         }
749
750                         if (c == 'e' || c == 'E') {
751                                 type = 'f';
752                                 c = *p++;
753                                 if (c == '+' || c == '-')
754                                         c = *p++;
755                                 if (!DIGIT(*p))
756                                         return "invalid float value exponent part";
757                                 do {
758                                         c = *p++;
759                                 } while(DIGIT(c));
760                         }
761
762                         if (c == '.' || NAME_START(c))
763                                 return "invalid numeric value";
764
765                         const char *num_end = p - 1;
766                         int num_len = num_end - num_start;
767
768                         tok = tal(tok_list, struct graphql_token);
769                         list_add_tail(tok_list, &tok->list);
770                         tok->token_type = type;
771                         tok->token_string = tal_strndup(tok, num_start, num_len);
772                         tok->source_line = line_num;
773                         tok->source_column = num_start - line_beginning + 1;
774                         tok->source_len = num_len;
775
776                         goto newchar;
777
778                 } else if (c == '"') {
779
780                         // String tokens.
781                         c = *p++;
782                         const char *str_begin = p - 1;
783                         const char *str_end;
784                         bool str_block = false;
785                         if (c == '"') {
786                                 c = *p++;
787                                 if (c == '"') {
788                                         // block string
789                                         str_block = true;
790                                         str_begin += 2;
791                                         int quotes = 0;
792                                         do {
793                                                 c = *p++;
794                                                 if (c == '\"') quotes++; else quotes = 0;
795                                                 if (quotes == 3 && *(p-4) == '\\') quotes = 0;
796                                         } while (BLOCK_STRING_CHAR(c) && quotes < 3);
797                                         if (quotes == 3) {
798                                                 c = *--p;
799                                                 c = *--p;
800                                         }
801                                         str_end = p - 1;
802                                         if (c != '"')
803                                                 return "unterminated string or invalid character";
804                                         c = *p++;
805                                         if (c != '"')
806                                                 return "invalid string termination";
807                                         c = *p++;
808                                         if (c != '"')
809                                                 return "invalid string termination";
810                                 } else {
811                                         // empty string
812                                         str_end = str_begin;
813                                         --p;
814                                 }
815                         } else {
816                                 // normal string
817                                 --p;
818                                 do {
819                                         c = *p++;
820                                         if (c == '\\') {
821                                                 c = *p++;
822                                                 if (strchr("\"\\/bfnrtu", c)) {
823                                                         if (c == 'u') {
824                                                                 c = *p++;
825                                                                 if (!HEX_DIGIT(c))
826                                                                         return "invalid unicode escape sequence";
827                                                                 c = *p++;
828                                                                 if (!HEX_DIGIT(c))
829                                                                         return "invalid unicode escape sequence";
830                                                                 c = *p++;
831                                                                 if (!HEX_DIGIT(c))
832                                                                         return "invalid unicode escape sequence";
833                                                                 c = *p++;
834                                                                 if (!HEX_DIGIT(c))
835                                                                         return "invalid unicode escape sequence";
836                                                         } else {
837                                                                 c = 'a'; // anything besides a quote to let the loop continue
838                                                         }
839                                                 } else {
840                                                         return "invalid string escape sequence";
841                                                 }
842                                         }
843                                 } while (STRING_CHAR(c));
844                                 if (c != '"')
845                                         return "unterminated string or invalid character";
846                                 str_end = p - 1;
847                         }
848                         int str_len = str_end - str_begin;
849
850                         tok = tal(tok_list, struct graphql_token);
851                         list_add_tail(tok_list, &tok->list);
852                         tok->token_type = 's';
853                         tok->token_specific = 's';
854                         tok->token_string = tal_strndup(tok, str_begin, str_len);
855                         tok->source_line = line_num;
856                         tok->source_column = str_begin - line_beginning + 1;
857                         tok->source_len = str_len;
858
859                         // Process escape sequences. These always shorten the string (so the memory allocation is always enough).
860                         char d;
861                         char *q = tok->token_string;
862                         char *rewrite_dest;
863                         int quotes = 0;
864                         while ((d = *q++)) {
865                                 if (str_block) {
866                                         if (d == '\"') quotes++; else quotes = 0;
867                                         if (quotes == 3 && *(q-4) == '\\') {
868                                                 quotes = 0;
869                                                 rewrite_dest = q - 4;
870                                                 cpystr(rewrite_dest, q - 3);
871                                         }
872                                 } else {
873                                         if (d == '\\') {
874                                                 rewrite_dest = q - 1;
875                                                 d = *q++;
876                                                 switch (d) {
877                                                 case '\"':
878                                                         *rewrite_dest++ = '\"';
879                                                         cpystr(rewrite_dest, q--);
880                                                         break;
881                                                 case 'b':
882                                                         *rewrite_dest++ = '\b';
883                                                         cpystr(rewrite_dest, q--);
884                                                         break;
885                                                 case 'f':
886                                                         *rewrite_dest++ = '\f';
887                                                         cpystr(rewrite_dest, q--);
888                                                         break;
889                                                 case 'n':
890                                                         *rewrite_dest++ = '\n';
891                                                         cpystr(rewrite_dest, q--);
892                                                         break;
893                                                 case 'r':
894                                                         *rewrite_dest++ = '\r';
895                                                         cpystr(rewrite_dest, q--);
896                                                         break;
897                                                 case 't':
898                                                         *rewrite_dest++ = '\t';
899                                                         cpystr(rewrite_dest, q--);
900                                                         break;
901                                                 case 'u': {
902                                                                 // Insert escaped character using UTF-8 multi-byte encoding.
903                                                                 char buf[] = {*q++, *q++, *q++, *q++, 0};
904                                                                 int code_point = strtol(buf, 0, 16);
905                                                                 int bytes = utf8_encode(code_point, rewrite_dest);
906                                                                 rewrite_dest += bytes;
907                                                                 cpystr(rewrite_dest, q--);
908                                                         }
909                                                         break;
910                                                 default:
911                                                         cpystr(rewrite_dest, --q);
912                                                 }
913                                         }
914                                 }
915                         }
916                         if (str_block) {
917                                 // Strip leading lines.
918                                 q = tok->token_string;
919                                 for (;;) {
920                                         d = *q++;
921                                         while (WHITE_SPACE(d))
922                                                 d = *q++;
923                                         if (LINE_TERMINATOR(d)) {
924                                                 while (LINE_TERMINATOR(d))
925                                                         d = *q++;
926                                                 cpystr(tok->token_string, q - 1);
927                                                 q = tok->token_string;
928                                         } else
929                                                 break;
930                                 }
931
932                                 // Strip trailing lines.
933                                 q = tok->token_string + strlen(tok->token_string);
934                                 for (;;) {
935                                         d = *--q;
936                                         while (WHITE_SPACE(d))
937                                                 d = *--q;
938                                         if (LINE_TERMINATOR(d)) {
939                                                 while (LINE_TERMINATOR(d))
940                                                         d = *--q;
941                                                 *++q = 0;
942                                         } else
943                                                 break;
944                                 }
945
946                                 // Look for common indentation.
947                                 char *this_indent_start;
948                                 const char *this_indent_end;
949                                 const char *common_indent_start = NULL;
950                                 const char *common_indent_end;
951                                 const char *r;
952                                 q = tok->token_string;
953                                 do {
954                                         d = *q++;
955                                         this_indent_start = q - 1;
956                                         while (WHITE_SPACE(d))
957                                                 d = *q++;
958                                         this_indent_end = q - 1;
959                                         if (LINE_TERMINATOR(d)) {
960                                                 while (LINE_TERMINATOR(d))
961                                                         d = *q++;
962                                                 continue;
963                                         }
964                                         if (EOF_CHAR(d))
965                                                 continue;
966
967                                         if (common_indent_start == NULL) {
968                                                 common_indent_start = this_indent_start;
969                                                 common_indent_end = this_indent_end;
970                                         }
971                                         for (r = this_indent_start; r < this_indent_end && (r - this_indent_start + common_indent_start < common_indent_end); r++) {
972                                                 if (*r != *(r - this_indent_start + common_indent_start))
973                                                         break;
974                                         }
975                                         common_indent_end = r - this_indent_start + common_indent_start;
976
977                                         while (!LINE_TERMINATOR(d) && !EOF_CHAR(d))
978                                                 d = *q++;
979                                         while (LINE_TERMINATOR(d))
980                                                 d = *q++;
981                                         --q;
982
983                                 } while (d);
984
985                                 // Remove common indentation.
986                                 int common_indent_len = common_indent_end - common_indent_start;
987                                 if (common_indent_len > 0) {
988                                         q = tok->token_string;
989                                         do {
990                                                 d = *q++;
991                                                 this_indent_start = q - 1;
992                                                 while (WHITE_SPACE(d))
993                                                         d = *q++;
994                                                 this_indent_end = q - 1;
995                                                 if (LINE_TERMINATOR(d)) {
996                                                         while (LINE_TERMINATOR(d))
997                                                                 d = *q++;
998                                                         continue;
999                                                 }
1000                                                 if (EOF_CHAR(d))
1001                                                         continue;
1002
1003                                                 while (!LINE_TERMINATOR(d) && !EOF_CHAR(d))
1004                                                         d = *q++;
1005                                                 --q;
1006
1007                                                 cpystr(this_indent_start, this_indent_start + common_indent_len);
1008                                                 q -= common_indent_len;
1009                                                 d = *q++;
1010
1011                                                 while (LINE_TERMINATOR(d))
1012                                                         d = *q++;
1013                                                 --q;
1014
1015                                         } while (d);
1016                                 }
1017                         }
1018                         c = *p++;
1019                         goto newchar;
1020
1021                 } else {
1022                         return "invalid source character encountered";
1023                 }
1024
1025         } while (!EOF_CHAR(c));
1026
1027         return "unexpected end-of-input encountered";
1028 }
1029
1030
1031