]> git.ozlabs.org Git - ccan/blob - ccan/graphql/graphql.c
e2b70ef01789ff41ff19cb1dbc37ef6e99f04726
[ccan] / ccan / graphql / graphql.c
1
2 #include "graphql.h"
3
4 #include "ccan/tal/str/str.h"
5 #include "ccan/utf8/utf8.h"
6
7
8 // GraphQL character classes
9
10 #define SOURCE_CHAR(c) ((c) == 9 || (c) == 10 || (c) == 13 || ((c) >= 32 && (c) <= 65535))
11 #define WHITE_SPACE(c) ((c) == 9 || (c) == 32)
12 #define LINE_TERMINATOR(c) ((c) == 10 || (c) == 13)
13 #define COMMENT(c) ((c) == 35)
14 #define COMMENT_CHAR(c) (SOURCE_CHAR(c) && !LINE_TERMINATOR(c))
15 #define STRING_CHAR(c) (SOURCE_CHAR(c) && !LINE_TERMINATOR(c) && (c)!='"' && (c)!='\\')
16 #define BLOCK_STRING_CHAR(c) (SOURCE_CHAR(c))
17 #define COMMA(c) ((c) == 44)
18 #define EOF_CHAR(c) ((c) == 0 || (c) == 4)
19 #define PUNCTUATOR(c) (strchr("!$&().:=@[]{|}", c))
20 #define HEX_DIGIT(c) (DIGIT(c) || ((c) >= 0x61 && (c) <= 0x66) || ((c) >= 0x41 && (c) <= 0x46))
21 #define DIGIT(c) ((c) >= 0x30 && (c) <= 0x39)
22 #define NAME_START(c) (((c) >= 0x61 && (c) <= 0x7A) || ((c) >= 0x41 && (c) <= 0x5A) || (c) == 0x5F)
23 #define NAME_CONTINUE(c) (NAME_START(c) || DIGIT(c))
24
25
26 // Parser shorthands
27
28 #define RET void *
29 #define PARAMS struct list_head *tokens, struct list_head *used, const char **err
30 #define ARGS tokens, used, err
31 #define INIT(type) \
32         struct graphql_token *rollback_top = list_top(tokens, struct graphql_token, list); \
33         struct graphql_##type *obj = tal(tokens, struct graphql_##type); memset(obj, 0, sizeof(struct graphql_##type)); \
34
35 #define EXIT \
36         exit_label: \
37         if (*err) obj = tal_free(obj); \
38         return obj; \
39
40 #define CONSUME_ONE { list_add(used, list_pop(tokens, struct graphql_token, list)); }
41 #define RESTORE_ONE { list_add(tokens, list_pop(used, struct graphql_token, list)); }
42 #define ROLLBACK(args) { while (list_top(tokens, struct graphql_token, list) != rollback_top) { RESTORE_ONE; } }
43 #define OR if (!*err) goto exit_label; *err = NULL;
44 #define REQ if (*err) { ROLLBACK(args); goto exit_label; }
45 #define OPT *err = NULL;
46 #define WHILE_OPT while(!*err); *err = NULL;
47 #define LOOKAHEAD(args, tok) struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
48 #define MSG(msg) if (*err) *err = msg;
49
50
51 // Parser functions
52
53 RET parse_document(PARAMS);
54 RET parse_definition(PARAMS);
55 RET parse_executable_document(PARAMS);
56 RET parse_executable_definition(PARAMS);
57 RET parse_operation_definition(PARAMS);
58 RET parse_operation_type(PARAMS);
59 RET parse_selection_set(PARAMS);
60 RET parse_selection(PARAMS);
61 RET parse_field(PARAMS);
62 RET parse_arguments(PARAMS);
63 RET parse_argument(PARAMS);
64 RET parse_alias(PARAMS);
65 RET parse_fragment_spread(PARAMS);
66 RET parse_fragment_definition(PARAMS);
67 RET parse_fragment_name(PARAMS);
68 RET parse_type_condition(PARAMS);
69 RET parse_inline_fragment(PARAMS);
70 RET parse_value(PARAMS);
71 RET parse_int_value(PARAMS);
72 RET parse_negative_sign(PARAMS);
73 RET parse_non_zero_digit(PARAMS);
74 RET parse_float_value(PARAMS);
75 RET parse_boolean_value(PARAMS);
76 RET parse_string_value(PARAMS);
77 RET parse_string_character(PARAMS);
78 RET parse_escaped_unicode(PARAMS);
79 RET parse_escaped_character(PARAMS);
80 RET parse_block_string_character(PARAMS);
81 RET parse_null_value(PARAMS);
82 RET parse_enum_value(PARAMS);
83 RET parse_list_value(PARAMS);
84 RET parse_object_value(PARAMS);
85 RET parse_object_field(PARAMS);
86 RET parse_variable(PARAMS);
87 RET parse_variable_definitions(PARAMS);
88 RET parse_variable_definition(PARAMS);
89 RET parse_default_value(PARAMS);
90 RET parse_type(PARAMS);
91 RET parse_named_type(PARAMS);
92 RET parse_list_type(PARAMS);
93 RET parse_non_null_type(PARAMS);
94 RET parse_non_null_type_1(PARAMS);
95 RET parse_non_null_type_2(PARAMS);
96 RET parse_directives(PARAMS);
97 RET parse_directive(PARAMS);
98 RET parse_type_system_document(PARAMS);
99 RET parse_type_system_definition(PARAMS);
100 RET parse_type_system_extension_document(PARAMS);
101 RET parse_type_system_definition_or_extension(PARAMS);
102 RET parse_type_system_extension(PARAMS);
103 RET parse_description(PARAMS);
104 RET parse_schema_definition(PARAMS);
105 RET parse_root_operation_type_definition(PARAMS);
106 RET parse_schema_extension(PARAMS);
107 RET parse_type_definition(PARAMS);
108 RET parse_type_extension(PARAMS);
109 RET parse_scalar_type_definition(PARAMS);
110 RET parse_scalar_type_extension(PARAMS);
111 RET parse_object_type_definition(PARAMS);
112 RET parse_implements_interfaces(PARAMS);
113 RET parse_fields_definition(PARAMS);
114 RET parse_field_definition(PARAMS);
115 RET parse_arguments_definition(PARAMS);
116 RET parse_input_value_definition(PARAMS);
117 RET parse_object_type_extension(PARAMS);
118 RET parse_interface_type_definition(PARAMS);
119 RET parse_interface_type_extension(PARAMS);
120 RET parse_union_type_definition(PARAMS);
121 RET parse_union_member_types(PARAMS);
122 RET parse_union_type_extension(PARAMS);
123 RET parse_enum_type_definition(PARAMS);
124 RET parse_enum_values_definition(PARAMS);
125 RET parse_enum_value_definition(PARAMS);
126 RET parse_enum_type_extension(PARAMS);
127 RET parse_input_object_type_definition(PARAMS);
128 RET parse_input_fields_definition(PARAMS);
129 RET parse_directive_definition(PARAMS);
130 RET parse_directive_locations(PARAMS);
131 RET parse_directive_location(PARAMS);
132 RET parse_executable_directive_location(PARAMS);
133 RET parse_type_system_directive_location(PARAMS);
134
135 RET parse_keyword(PARAMS, const char *keyword, const char *errmsg);
136 RET parse_punct(PARAMS, int punct);
137 RET parse_name(PARAMS);
138 RET parse_int(PARAMS);
139 RET parse_float(PARAMS);
140 RET parse_string(PARAMS);
141
142 // Convert input string into AST.
143 const char *graphql_lexparse(const char *input, const tal_t *ctx, struct list_head **tokens, struct graphql_executable_document **doc) {
144         const char *err = graphql_lex(input, ctx, tokens);
145         if (!err)
146                 err = graphql_parse(*tokens, doc);
147         return err;
148 }
149
150 // Convert lexed tokens into AST.
151 const char *graphql_parse(struct list_head *tokens, struct graphql_executable_document **doc) {
152         struct list_head used = LIST_HEAD_INIT(used);
153         const char *err = NULL;
154         *doc = parse_executable_document(tokens, &used, &err);
155         if (err)
156                 return err;
157 }
158
159 /* The following parser functions follow special rules:
160  *      - The declaration is standardized with RET and PARAMS
161  *      - The "err" argument is assumed to be NULL upon entrance
162  *      - The "err" argument is set on failure
163  *      - If the function fails to parse, then "tokens" shall be as it was upon entrance
164  *      - INIT and EXIT macros are used
165  *      - Macros such as REQ and OPT facilitate readability and conciseness
166  */
167
168 RET parse_document(PARAMS) {
169         INIT(document);
170         obj->first_def = parse_definition(ARGS); REQ
171         struct graphql_definition *p = obj->first_def;
172         do {
173                 p->next_def = parse_definition(ARGS);
174                 p = p->next_def;
175         } WHILE_OPT;
176         EXIT;
177 }
178
179 RET parse_definition(PARAMS) {
180         INIT(definition);
181         obj->executable_def = parse_executable_definition(ARGS);
182 /*      OR
183         obj->type_system_def = parse_type_system_definition_or_extension(ARGS);
184         // NOTE: Optional type system is not (yet) implemented.
185 */
186         EXIT;
187 }
188
189 RET parse_executable_document(PARAMS) {
190         INIT(executable_document);
191         obj->first_def = parse_executable_definition(ARGS); REQ
192         struct graphql_executable_definition *p = obj->first_def;
193         do {
194                 p->next_def = parse_executable_definition(ARGS);
195                 p = p->next_def;
196         } WHILE_OPT;
197         EXIT;
198 }
199
200 RET parse_executable_definition(PARAMS) {
201         INIT(executable_definition);
202         obj->op_def = parse_operation_definition(ARGS); MSG("invalid operation or fragment definition"); OR
203         obj->frag_def = parse_fragment_definition(ARGS); MSG("invalid operation or fragment definition"); 
204         EXIT;
205 }
206
207 RET parse_operation_definition(PARAMS) {
208         INIT(operation_definition);
209         obj->op_type = parse_operation_type(ARGS);
210         if (!*err) {
211                 obj->op_name = parse_name(ARGS); OPT
212                 obj->vars = parse_variable_definitions(ARGS); OPT
213                 obj->directives = parse_directives(ARGS); OPT
214         } else
215                 *err = NULL;
216         obj->sel_set = parse_selection_set(ARGS);
217         if (*err) ROLLBACK(ARGS);
218         EXIT;
219 }
220
221 RET parse_operation_type(PARAMS) {
222         INIT(operation_type);
223         const char *errmsg = "expected: query, mutation, or subscription";
224         obj->op_type = parse_keyword(ARGS, "query", errmsg); OR
225         obj->op_type = parse_keyword(ARGS, "mutation", errmsg); OR
226         obj->op_type = parse_keyword(ARGS, "subscription", errmsg);
227         EXIT;
228 }
229
230 RET parse_selection_set(PARAMS) {
231         INIT(selection_set);
232         parse_punct(ARGS, '{'); REQ;
233         obj->first = parse_selection(ARGS); REQ;
234         struct graphql_selection *p = obj->first;
235         parse_punct(ARGS, '}');
236         while (*err) {
237                 *err = NULL;
238                 p->next = parse_selection(ARGS); MSG("expected: selection or '}'"); REQ;
239                 p = p->next;
240                 parse_punct(ARGS, '}');
241         }
242         EXIT;
243 }
244
245 RET parse_selection(PARAMS) {
246         INIT(selection);
247         obj->field = parse_field(ARGS); OR
248         obj->frag_spread = parse_fragment_spread(ARGS); OR
249         obj->inline_frag = parse_inline_fragment(ARGS);
250         MSG("expected: field, fragment spread, or inline fragment");
251         EXIT;
252 }
253
254 RET parse_field(PARAMS) {
255         INIT(field);
256         obj->alias = parse_alias(ARGS); OPT
257         obj->name = parse_name(ARGS); REQ
258         obj->args = parse_arguments(ARGS); OPT
259         obj->directives = parse_directives(ARGS); OPT
260         obj->sel_set = parse_selection_set(ARGS); OPT
261         EXIT;
262 }
263
264 RET parse_arguments(PARAMS) {
265         INIT(arguments);
266         parse_punct(ARGS, '('); REQ
267         obj->first = parse_argument(ARGS); REQ
268         struct graphql_argument *p = obj->first;
269         parse_punct(ARGS, ')');
270         while (*err) {
271                 *err = NULL;
272                 p->next = parse_argument(ARGS); MSG("expected: argument or ')'"); REQ;
273                 p = p->next;
274                 parse_punct(ARGS, ')');
275         }
276         EXIT;
277 }
278
279 RET parse_argument(PARAMS) {
280         INIT(argument);
281         obj->name = parse_name(ARGS); REQ
282         parse_punct(ARGS, ':'); REQ
283         obj->val = parse_value(ARGS); REQ
284         EXIT;
285 }
286
287 RET parse_alias(PARAMS) {
288         INIT(alias);
289         obj->name = parse_name(ARGS); REQ
290         parse_punct(ARGS, ':'); REQ
291         EXIT;
292 }
293
294 RET parse_fragment_spread(PARAMS) {
295         INIT(fragment_spread);
296         parse_punct(ARGS, 0x2026); REQ // ...
297         obj->name = parse_fragment_name(ARGS); REQ
298         obj->directives = parse_directives(ARGS); OPT
299         EXIT;
300 }
301
302 RET parse_fragment_definition(PARAMS) {
303         INIT(fragment_definition);
304         parse_keyword(ARGS, "fragment", "fragment expected"); REQ
305         obj->name = parse_fragment_name(ARGS); REQ
306         obj->type_cond = parse_type_condition(ARGS); REQ
307         obj->directives = parse_directives(ARGS); OPT
308         obj->sel_set = parse_selection_set(ARGS); REQ
309         EXIT;
310 }
311
312 RET parse_fragment_name(PARAMS) {
313         INIT(fragment_name);
314         obj->name = parse_name(ARGS); REQ
315         struct graphql_token *tok = list_top(used, struct graphql_token, list);
316         if (streq(tok->token_string, "on")) {
317                 *err = "invalid fragment name";
318                 ROLLBACK(ARGS);
319         }
320         EXIT;
321 }
322
323 RET parse_type_condition(PARAMS) {
324         INIT(type_condition);
325         parse_keyword(ARGS, "on", "expected: 'on'"); REQ
326         obj->named_type = parse_named_type(ARGS); REQ
327         EXIT;
328 }
329
330 RET parse_inline_fragment(PARAMS) {
331         INIT(inline_fragment);
332         parse_punct(ARGS, 0x2026); REQ // ...
333         obj->type_cond = parse_type_condition(ARGS); OPT
334         obj->directives = parse_directives(ARGS); OPT
335         obj->sel_set = parse_selection_set(ARGS); REQ
336         EXIT;
337 }
338
339 RET parse_value(PARAMS) {
340         INIT(value);
341         obj->var = parse_variable(ARGS); // FIXME: if not const
342         OR
343         obj->int_val = parse_int_value(ARGS); OR
344         obj->float_val = parse_float_value(ARGS); OR
345         obj->str_val = parse_string_value(ARGS); OR
346         obj->bool_val = parse_boolean_value(ARGS); OR
347         obj->null_val = parse_null_value(ARGS); OR
348         obj->enum_val = parse_enum_value(ARGS); OR
349         obj->list_val = parse_list_value(ARGS); OR
350         obj->obj_val = parse_object_value(ARGS);
351         EXIT;
352 }
353
354 RET parse_int_value(PARAMS) {
355         INIT(int_value);
356         obj->val = parse_int(ARGS);
357         EXIT;
358 }
359
360 RET parse_float_value(PARAMS) {
361         INIT(float_value);
362         obj->val = parse_float(ARGS);
363         EXIT;
364 }
365
366 RET parse_boolean_value(PARAMS) {
367         INIT(boolean_value);
368         obj->val = parse_keyword(ARGS, "true", "invalid boolean value"); OR
369         obj->val = parse_keyword(ARGS, "false", "invalid boolean value");
370         EXIT;
371 }
372
373 RET parse_string_value(PARAMS) {
374         INIT(string_value);
375         obj->val = parse_string(ARGS);
376         EXIT;
377 }
378
379 RET parse_null_value(PARAMS) {
380         INIT(null_value);
381         obj->val = parse_keyword(ARGS, "null", "null expected");
382         EXIT;
383 }
384
385 RET parse_enum_value(PARAMS) {
386         INIT(enum_value);
387         obj->val = parse_name(ARGS); REQ
388         struct graphql_token *tok = list_top(used, struct graphql_token, list);
389         if (streq(tok->token_string, "true")
390          || streq(tok->token_string, "false")
391          || streq(tok->token_string, "null")) {
392                 *err = "enum value cannot be true, false, or null";
393                 ROLLBACK(ARGS);
394         }
395         EXIT;
396 }
397
398 RET parse_list_value(PARAMS) {
399         INIT(list_value);
400         parse_punct(ARGS, '['); REQ
401         parse_punct(ARGS, ']');
402         while (*err) {
403                 *err = NULL;
404                 parse_value(ARGS); MSG("expected: value or ']'"); REQ
405                 parse_punct(ARGS, ']');
406         }
407         EXIT;
408 }
409
410 RET parse_object_value(PARAMS) {
411         INIT(object_value);
412         parse_punct(ARGS, '{'); REQ
413         parse_punct(ARGS, '}');
414         struct graphql_object_field *p = NULL;
415         while (*err) {
416                 *err = NULL;
417                 if (!p) {
418                         obj->first = p = parse_object_field(ARGS); MSG("expected: object field or '}'"); REQ
419                 } else {
420                         p->next = parse_object_field(ARGS); MSG("expected: object field or '}'"); REQ
421                         p = p->next;
422                 }
423                 parse_punct(ARGS, '}');
424         }
425         EXIT;
426 }
427
428 RET parse_object_field(PARAMS) {
429         INIT(object_field);
430         obj->name = parse_name(ARGS); REQ
431         parse_punct(ARGS, ':'); REQ
432         obj->val = parse_value(ARGS); REQ
433         EXIT;
434 }
435
436 RET parse_variable(PARAMS) {
437         INIT(variable);
438         parse_punct(ARGS, '$'); REQ
439         obj->name = parse_name(ARGS); REQ
440         EXIT;
441 }
442
443 RET parse_variable_definitions(PARAMS) {
444         INIT(variable_definitions);
445         parse_punct(ARGS, '('); REQ
446         obj->first = parse_variable_definition(ARGS); REQ
447         struct graphql_variable_definition *p = obj->first;
448         parse_punct(ARGS, ')');
449         while (*err) {
450                 *err = NULL;
451                 p->next = parse_variable_definition(ARGS); MSG("expected: variable definition or ')'"); REQ
452                 p = p->next;
453                 parse_punct(ARGS, ')');
454         }
455         EXIT;
456 }
457
458 RET parse_variable_definition(PARAMS) {
459         INIT(variable_definition);
460         obj->var = parse_variable(ARGS); REQ
461         parse_punct(ARGS, ':'); REQ
462         obj->type = parse_type(ARGS); REQ
463         obj->default_val = parse_default_value(ARGS); OPT
464         obj->directives = parse_directives(ARGS); OPT
465         EXIT;
466 }
467
468 RET parse_default_value(PARAMS) {
469         INIT(default_value);
470         parse_punct(ARGS, '='); REQ
471         obj->val = parse_value(ARGS); REQ
472         EXIT;
473 }
474
475 RET parse_type(PARAMS) {
476         INIT(type);
477         obj->named = parse_named_type(ARGS);
478 /*
479         OR
480         obj->list = parse_list_type(ARGS); OR
481         obj->non_null = parse_non_null_type(ARGS);
482 */
483         EXIT;
484 }
485
486 RET parse_named_type(PARAMS) {
487         INIT(named_type);
488         obj->name = parse_name(ARGS);
489         EXIT;
490 }
491
492 /*
493 RET parse_list_type(PARAMS) {
494         INIT(list_type);
495         parse_punct(ARGS, '['); REQ
496         parse_type(ARGS); REQ
497         parse_punct(ARGS, ']'); REQ
498         EXIT;
499 }
500
501 RET parse_non_null_type(PARAMS) {
502         INIT(non_null_type);
503         parse_non_null_type_1(ARGS); OR
504         parse_non_null_type_2(ARGS);
505         EXIT;
506 }
507
508 RET parse_non_null_type_1(PARAMS) {
509         INIT(non_null_type);
510         parse_named_type(ARGS); REQ;
511         parse_punct(ARGS, '!'); REQ;
512         EXIT;
513 }
514
515 RET parse_non_null_type_2(PARAMS) {
516         INIT(non_null_type);
517         parse_list_type(ARGS); REQ;
518         parse_punct(ARGS, '!'); REQ;
519         EXIT;
520 }
521 */
522
523 RET parse_directives(PARAMS) {
524         INIT(directives);
525         obj->first = parse_directive(ARGS); REQ
526         struct graphql_directive *p = obj->first;
527         do {
528                 p->next = parse_directive(ARGS);
529                 p = p->next;
530         } WHILE_OPT;
531         EXIT;
532 }
533
534 RET parse_directive(PARAMS) {
535         INIT(directive);
536         parse_punct(ARGS, '@'); REQ
537         obj->name = parse_name(ARGS); REQ
538         obj->args = parse_arguments(ARGS); OPT
539         EXIT;
540 }
541
542
543 /* The following functions construct the "leaves" of the abstract syntax tree. */
544
545 RET parse_keyword(PARAMS, const char *keyword, const char *errmsg) {
546         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
547         if (!tok || tok->token_type != 'a') {
548                 *err = errmsg; return;
549         }
550         if (!streq(tok->token_string, keyword)) {
551                 *err = errmsg; return;
552         }
553         CONSUME_ONE(ARGS);
554         return tok;
555 }
556
557 // Note: a static buffer is used here.
558 RET parse_punct(PARAMS, int punct) {
559         static char punctbuf[16];
560         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
561         if (!tok || tok->token_type != punct) {
562                 if (punct == 0x2026)
563                         sprintf(punctbuf, "expected: '...'");
564                 else
565                         sprintf(punctbuf, "expected: '%c'", punct);
566                 *err = punctbuf; return;
567         }
568         CONSUME_ONE(ARGS);
569         return tok;
570 }
571
572 RET parse_name(PARAMS) {
573         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
574         if (!tok || tok->token_type != 'a') {
575                 *err = "name expected"; return 0;
576         }
577         CONSUME_ONE(ARGS);
578         return tok;
579 }
580
581 RET parse_int(PARAMS) {
582         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
583         if (!tok || tok->token_type != 'i') {
584                 *err = "integer expected"; return;
585         }
586         CONSUME_ONE(ARGS);
587         return tok;
588 }
589
590 RET parse_float(PARAMS) {
591         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
592         if (!tok || tok->token_type != 'f') {
593                 *err = "float expected"; return;
594         }
595         CONSUME_ONE(ARGS);
596         return tok;
597 }
598
599 RET parse_string(PARAMS) {
600         struct graphql_token *tok = list_top(tokens, struct graphql_token, list);
601         if (!tok || tok->token_type != 's') {
602                 *err = "string expected"; return;
603         }
604         CONSUME_ONE(ARGS);
605         return tok;
606 }
607
608
609 // Convert input string into tokens.
610 const char *graphql_lex(const char *input, const tal_t *ctx, struct list_head **tokens) {
611
612         unsigned int c;
613         const char *p, *line_beginning;
614         unsigned int line_num = 1;
615         struct list_head *tok_list;
616         struct graphql_token *tok;
617
618         // Initialize token output list.
619         tok_list = tal(ctx, struct list_head);
620         if (tokens)
621                 *tokens = tok_list;
622         list_head_init(tok_list);
623
624         // Note: label and goto are used here like a continue statement except that
625         // it skips iteration, for when characters are fetched in the loop body.
626         p = input;
627         line_beginning = p;
628         do {
629                 c = *p++;
630 newchar:
631                 // Consume line terminators and increment line counter.
632                 if (LINE_TERMINATOR(c)) {
633                         unsigned int c0 = c;
634                         c = *p++;
635                         if (c0 == 10 || c0 == 13)
636                                 line_num++;
637                         if (c0 == 13 && c == 10)
638                                 c = *p++;
639                         line_beginning = p - 1;
640                         goto newchar;
641                 }
642
643                 // Consume other ignored tokens.
644                 if (COMMA(c) || WHITE_SPACE(c)) {
645                         c = *p++;
646                         goto newchar;
647                 }
648                 if (COMMENT(c)) {
649                         while ((c = *p++) != EOF && !EOF_CHAR(c) && COMMENT_CHAR(c))
650                                 ; // No-op
651                         goto newchar;
652                 }
653
654                 // Return success when end is reached.
655                 if (EOF_CHAR(c))
656                         return GRAPHQL_SUCCESS;
657
658                 // Punctuator tokens.
659                 if (PUNCTUATOR(c)) {
660
661                         // Note beginning of token in input.
662                         const char *start = p - 1;
663
664                         // Handle the ... multi-character case.
665                         if (c == '.') {
666                                 c = *p++;
667                                 if (c != '.')
668                                         return "unrecognized punctuator";
669                                 c = *p++;
670                                 if (c != '.')
671                                         return "unrecognized punctuator";
672                                 c = 0x2026;
673                         }
674
675                         tok = tal(tok_list, struct graphql_token);
676                         list_add_tail(tok_list, tok);
677                         tok->token_type = c;
678                         tok->token_specific = c;
679                         tok->token_string = NULL;
680                         tok->source_line = line_num;
681                         tok->source_column = start - line_beginning + 1;
682                         tok->source_len = p - start;
683
684                 } else if (NAME_START(c)) {
685
686                         // Name/identifier tokens.
687                         tok = tal(tok_list, struct graphql_token);
688                         list_add_tail(tok_list, tok);
689                         tok->token_type = 'a';
690                         tok->token_specific = 'a';
691                         // tok->token_string updated below.
692                         tok->source_line = line_num;
693                         tok->source_column = p - line_beginning;
694                         // tok->source_len updated below.
695
696                         // Note the beginning of the name.
697                         const char *name_begin = p - 1;
698                         const char *name_end;
699                         int name_len;
700
701                         // Consume the rest of the token.
702                         do {
703                                 c = *p++;
704                         } while (NAME_CONTINUE(c));
705
706                         // Note the end of the name and calculate the length.
707                         name_end = p - 1;
708                         name_len = name_end - name_begin;
709                         tok->source_len = name_len;
710
711                         // Copy the token string.
712                         tok->token_string = tal_strndup(tok, name_begin, name_len);
713
714                         goto newchar;
715
716                 } else if (DIGIT(c) || c == '-') {
717
718                         // Number tokens.
719                         const char *num_start = p - 1;
720                         char type = 'i';
721
722                         if (c == '-') {
723                                 c = *p++;
724                                 if (!DIGIT(c))
725                                         return "negative sign must precede a number";
726                         }
727
728                         if (c == '0') {
729                                 c = *p++;
730                                 if (DIGIT(c))
731                                         return "leading zeros are not allowed";
732                         } else {
733                                 do {
734                                         c = *p++;
735                                 } while(DIGIT(c));
736                         }
737
738                         if (c == '.') {
739                                 type = 'f';
740                                 if (!DIGIT(*p))
741                                         return "invalid float value fractional part";
742                                 do {
743                                         c = *p++;
744                                 } while(DIGIT(c));
745                         }
746
747                         if (c == 'e' || c == 'E') {
748                                 type = 'f';
749                                 c = *p++;
750                                 if (c == '+' || c == '-')
751                                         c = *p++;
752                                 if (!DIGIT(*p))
753                                         return "invalid float value exponent part";
754                                 do {
755                                         c = *p++;
756                                 } while(DIGIT(c));
757                         }
758
759                         if (c == '.' || NAME_START(c))
760                                 return "invalid numeric value";
761
762                         const char *num_end = p - 1;
763                         int num_len = num_end - num_start;
764
765                         tok = tal(tok_list, struct graphql_token);
766                         list_add_tail(tok_list, tok);
767                         tok->token_type = type;
768                         tok->token_string = tal_strndup(tok, num_start, num_len);
769                         tok->source_line = line_num;
770                         tok->source_column = num_start - line_beginning + 1;
771                         tok->source_len = num_len;
772
773                         goto newchar;
774
775                 } else if (c == '"') {
776
777                         // String tokens.
778                         c = *p++;
779                         const char *str_begin = p - 1;
780                         const char *str_end;
781                         bool str_block = false;
782                         if (c == '"') {
783                                 c = *p++;
784                                 if (c == '"') {
785                                         // block string
786                                         str_block = true;
787                                         str_begin += 2;
788                                         int quotes = 0;
789                                         do {
790                                                 c = *p++;
791                                                 if (c == '\"') quotes++; else quotes = 0;
792                                                 if (quotes == 3 && *(p-4) == '\\') quotes = 0;
793                                         } while (BLOCK_STRING_CHAR(c) && quotes < 3);
794                                         if (quotes == 3) {
795                                                 c = *--p;
796                                                 c = *--p;
797                                         }
798                                         str_end = p - 1;
799                                         if (c != '"')
800                                                 return "unterminated string or invalid character";
801                                         c = *p++;
802                                         if (c != '"')
803                                                 return "invalid string termination";
804                                         c = *p++;
805                                         if (c != '"')
806                                                 return "invalid string termination";
807                                 } else {
808                                         // empty string
809                                         str_end = str_begin;
810                                         --p;
811                                 }
812                         } else {
813                                 // normal string
814                                 --p;
815                                 do {
816                                         c = *p++;
817                                         if (c == '\\') {
818                                                 c = *p++;
819                                                 if (strchr("\"\\/bfnrtu", c)) {
820                                                         if (c == 'u') {
821                                                                 c = *p++;
822                                                                 if (!HEX_DIGIT(c))
823                                                                         return "invalid unicode escape sequence";
824                                                                 c = *p++;
825                                                                 if (!HEX_DIGIT(c))
826                                                                         return "invalid unicode escape sequence";
827                                                                 c = *p++;
828                                                                 if (!HEX_DIGIT(c))
829                                                                         return "invalid unicode escape sequence";
830                                                                 c = *p++;
831                                                                 if (!HEX_DIGIT(c))
832                                                                         return "invalid unicode escape sequence";
833                                                         } else {
834                                                                 c = 'a'; // anything besides a quote to let the loop continue
835                                                         }
836                                                 } else {
837                                                         return "invalid string escape sequence";
838                                                 }
839                                         }
840                                 } while (STRING_CHAR(c));
841                                 if (c != '"')
842                                         return "unterminated string or invalid character";
843                                 str_end = p - 1;
844                         }
845                         int str_len = str_end - str_begin;
846
847                         tok = tal(tok_list, struct graphql_token);
848                         list_add_tail(tok_list, tok);
849                         tok->token_type = 's';
850                         tok->token_specific = 's';
851                         tok->token_string = tal_strndup(tok, str_begin, str_len);
852                         tok->source_line = line_num;
853                         tok->source_column = str_begin - line_beginning + 1;
854                         tok->source_len = str_len;
855
856                         // Process escape sequences. These always shorten the string (so the memory allocation is always enough).
857                         char d;
858                         char *q = tok->token_string;
859                         char *rewrite_dest;
860                         int quotes = 0;
861                         while (d = *q++) {
862                                 if (str_block) {
863                                         if (d == '\"') quotes++; else quotes = 0;
864                                         if (quotes == 3 && *(q-4) == '\\') {
865                                                 quotes = 0;
866                                                 rewrite_dest = q - 4;
867                                                 strcpy(rewrite_dest, q - 3);
868                                         }
869                                 } else {
870                                         if (d == '\\') {
871                                                 rewrite_dest = q - 1;
872                                                 d = *q++;
873                                                 switch (d) {
874                                                 case '\"':
875                                                         *rewrite_dest++ = '\"';
876                                                         strcpy(rewrite_dest, q--);
877                                                         break;
878                                                 case 'b':
879                                                         *rewrite_dest++ = '\b';
880                                                         strcpy(rewrite_dest, q--);
881                                                         break;
882                                                 case 'f':
883                                                         *rewrite_dest++ = '\f';
884                                                         strcpy(rewrite_dest, q--);
885                                                         break;
886                                                 case 'n':
887                                                         *rewrite_dest++ = '\n';
888                                                         strcpy(rewrite_dest, q--);
889                                                         break;
890                                                 case 'r':
891                                                         *rewrite_dest++ = '\r';
892                                                         strcpy(rewrite_dest, q--);
893                                                         break;
894                                                 case 't':
895                                                         *rewrite_dest++ = '\t';
896                                                         strcpy(rewrite_dest, q--);
897                                                         break;
898                                                 case 'u': {
899                                                                 // Insert escaped character using UTF-8 multi-byte encoding.
900                                                                 char buf[] = {*q++, *q++, *q++, *q++, 0};
901                                                                 int code_point = strtol(buf, 0, 16);
902                                                                 int bytes = utf8_encode(code_point, rewrite_dest);
903                                                                 rewrite_dest += bytes;
904                                                                 strcpy(rewrite_dest, q--);
905                                                         }
906                                                         break;
907                                                 default:
908                                                         strcpy(rewrite_dest, --q);
909                                                 }
910                                         }
911                                 }
912                         }
913                         if (str_block) {
914                                 // Strip leading lines.
915                                 q = tok->token_string;
916                                 for (;;) {
917                                         d = *q++;
918                                         while (WHITE_SPACE(d))
919                                                 d = *q++;
920                                         if (LINE_TERMINATOR(d)) {
921                                                 while (LINE_TERMINATOR(d))
922                                                         d = *q++;
923                                                 strcpy(tok->token_string, q - 1);
924                                                 q = tok->token_string;
925                                         } else
926                                                 break;
927                                 }
928
929                                 // Strip trailing lines.
930                                 q = tok->token_string + strlen(tok->token_string);
931                                 for (;;) {
932                                         d = *--q;
933                                         while (WHITE_SPACE(d))
934                                                 d = *--q;
935                                         if (LINE_TERMINATOR(d)) {
936                                                 while (LINE_TERMINATOR(d))
937                                                         d = *--q;
938                                                 *++q = 0;
939                                         } else
940                                                 break;
941                                 }
942
943                                 // Look for common indentation.
944                                 char *this_indent_start;
945                                 const char *this_indent_end;
946                                 const char *common_indent_start = NULL;
947                                 const char *common_indent_end;
948                                 const char *r;
949                                 q = tok->token_string;
950                                 do {
951                                         d = *q++;
952                                         this_indent_start = q - 1;
953                                         while (WHITE_SPACE(d))
954                                                 d = *q++;
955                                         this_indent_end = q - 1;
956                                         if (LINE_TERMINATOR(d)) {
957                                                 while (LINE_TERMINATOR(d))
958                                                         d = *q++;
959                                                 continue;
960                                         }
961                                         if (EOF_CHAR(d))
962                                                 continue;
963
964                                         if (common_indent_start == NULL) {
965                                                 common_indent_start = this_indent_start;
966                                                 common_indent_end = this_indent_end;
967                                         }
968                                         for (r = this_indent_start; r < this_indent_end && (r - this_indent_start + common_indent_start < common_indent_end); r++) {
969                                                 if (*r != *(r - this_indent_start + common_indent_start))
970                                                         break;
971                                         }
972                                         common_indent_end = r - this_indent_start + common_indent_start;
973
974                                         while (!LINE_TERMINATOR(d) && !EOF_CHAR(d))
975                                                 d = *q++;
976                                         while (LINE_TERMINATOR(d))
977                                                 d = *q++;
978                                         --q;
979
980                                 } while (d);
981
982                                 // Remove common indentation.
983                                 int common_indent_len = common_indent_end - common_indent_start;
984                                 if (common_indent_len > 0) {
985                                         q = tok->token_string;
986                                         do {
987                                                 d = *q++;
988                                                 this_indent_start = q - 1;
989                                                 while (WHITE_SPACE(d))
990                                                         d = *q++;
991                                                 this_indent_end = q - 1;
992                                                 if (LINE_TERMINATOR(d)) {
993                                                         while (LINE_TERMINATOR(d))
994                                                                 d = *q++;
995                                                         continue;
996                                                 }
997                                                 if (EOF_CHAR(d))
998                                                         continue;
999
1000                                                 while (!LINE_TERMINATOR(d) && !EOF_CHAR(d))
1001                                                         d = *q++;
1002                                                 --q;
1003
1004                                                 strcpy(this_indent_start, this_indent_start + common_indent_len);
1005                                                 q -= common_indent_len;
1006                                                 d = *q++;
1007
1008                                                 while (LINE_TERMINATOR(d))
1009                                                         d = *q++;
1010                                                 --q;
1011
1012                                         } while (d);
1013                                 }
1014                         }
1015                         c = *p++;
1016                         goto newchar;
1017
1018                 } else {
1019                         return "invalid source character encountered";
1020                 }
1021
1022         } while (!EOF_CHAR(c));
1023
1024         return "unexpected end-of-input encountered";
1025 }
1026
1027
1028