]> git.ozlabs.org Git - ccan/blob - ccan/graphql/graphql.c
base64: fix for unsigned chars (e.g. ARM).
[ccan] / ccan / graphql / graphql.c
1 /* MIT (BSD) license - see LICENSE file for details */
2 #include "graphql.h"
3
4 #include "ccan/tal/str/str.h"
5 #include "ccan/utf8/utf8.h"
6
7
8 /* GraphQL character classes
9  *
10  * These definitions are meant to reflect the GraphQL specification as
11  * literally as possible.
12  */
13 #define SOURCE_CHAR(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || ((c) >= 32 && (c) <= 65535))
14 #define WHITE_SPACE(c) ((c) == '\t' || (c) == ' ')
15 #define LINE_TERMINATOR(c) ((c) == '\n' || (c) == '\r')
16 #define COMMENT(c) ((c) == '#')
17 #define COMMENT_CHAR(c) (SOURCE_CHAR(c) && !LINE_TERMINATOR(c))
18 #define STRING_CHAR(c) (SOURCE_CHAR(c) && !LINE_TERMINATOR(c) && (c)!='"' && (c)!='\\')
19 #define BLOCK_STRING_CHAR(c) (SOURCE_CHAR(c))
20 #define COMMA(c) ((c) == ',')
21 #define EOF_CHAR(c) ((c) == 0 || (c) == 4)
22 #define PUNCTUATOR(c) (strchr("!$&().:=@[]{|}", c))
23 #define HEX_DIGIT(c) (DIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
24 #define DIGIT(c) ((c) >= '0' && (c) <= '9')
25 #define NAME_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_')
26 #define NAME_CONTINUE(c) (NAME_START(c) || DIGIT(c))
27
28 // Helper for copying an overlapping string, since strcpy() is not safe for that
29 #define cpystr(d,s) { char *cpystr_p; char *cpystr_q; for(cpystr_p = (s), cpystr_q = (d); *cpystr_p;) *cpystr_q++ = *cpystr_p++; *cpystr_q++ = *cpystr_p++; }
30
31 /* Parser shorthands
32  *
33  * These shorthands are motivated by the parser functions, so they can be
34  * written in a format that corresponds closely to the specification.
35  */
36 #define RET static void *
37 #define PARAMS struct list_head *tokens, struct list_head *used, const char **err
38 #define ARGS tokens, used, err
39 #define INIT(type) \
40         struct graphql_token *rollback_top = list_top(tokens, struct graphql_token, node); \
41         struct graphql_##type *obj = talz(tokens, struct graphql_##type); \
42         (void)rollback_top; /* avoids unused variable warning */ \
43
44 #define EXIT \
45         goto exit_label; /* avoids unused label warning */ \
46         exit_label: \
47         if (*err) obj = tal_free(obj); \
48         return obj; \
49
50 #define CONSUME_ONE list_add(used, &list_pop(tokens, struct graphql_token, node)->node);
51 #define RESTORE_ONE list_add(tokens, &list_pop(used, struct graphql_token, node)->node);
52 #define ROLLBACK(args) while (list_top(tokens, struct graphql_token, node) != rollback_top) { RESTORE_ONE; }
53 #define OR if (!*err) goto exit_label; *err = NULL;
54 #define REQ if (*err) { ROLLBACK(args); goto exit_label; }
55 #define OPT *err = NULL;
56 #define WHILE_OPT while(!*err); *err = NULL;
57 #define LOOKAHEAD(args, tok) struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
58 #define MSG(msg) if (*err) *err = msg;
59
60
61 /* The following parser functions are written in a way that corresponds to the
62  * grammar defined in the GraphQL specification. The code is not intended to
63  * look like normal C code; it's designed for parsing clarity rather than C
64  * style. Think of it as something generated rather than something to read.
65  * For that reason, the functions follow special rules:
66  *
67  *      - The declaration is standardized with RET and PARAMS
68  *      - The "err" argument is assumed to be NULL upon entrance
69  *      - The "err" argument is set on failure
70  *      - If the function fails to parse, then "tokens" shall be as it was upon entrance
71  *      - INIT and EXIT macros are used
72  *      - Macros such as REQ and OPT facilitate readability and conciseness
73  */
74
75 /* The following functions construct the "leaves" of the abstract syntax tree. */
76
77 RET parse_keyword(PARAMS, const char *keyword, const char *errmsg) {
78         struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
79         if (!tok || tok->token_type != 'a') {
80                 *err = errmsg; return NULL;
81         }
82         if (!streq(tok->token_string, keyword)) {
83                 *err = errmsg; return NULL;
84         }
85         CONSUME_ONE;
86         return tok;
87 }
88
89 // Note: a static buffer is used here.
90 RET parse_punct(PARAMS, int punct) {
91         static char punctbuf[16];
92         struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
93         if (!tok || tok->token_type != punct) {
94                 if (punct == PUNCT_SPREAD)
95                         sprintf(punctbuf, "expected: '...'");
96                 else
97                         sprintf(punctbuf, "expected: '%c'", punct);
98                 *err = punctbuf; return NULL;
99         }
100         CONSUME_ONE;
101         return tok;
102 }
103
104 RET parse_name(PARAMS) {
105         struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
106         if (!tok || tok->token_type != 'a') {
107                 *err = "name expected"; return NULL;
108         }
109         CONSUME_ONE;
110         return tok;
111 }
112
113 RET parse_int(PARAMS) {
114         struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
115         if (!tok || tok->token_type != 'i') {
116                 *err = "integer expected"; return NULL;
117         }
118         CONSUME_ONE;
119         return tok;
120 }
121
122 RET parse_float(PARAMS) {
123         struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
124         if (!tok || tok->token_type != 'f') {
125                 *err = "float expected"; return NULL;
126         }
127         CONSUME_ONE;
128         return tok;
129 }
130
131 RET parse_string(PARAMS) {
132         struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
133         if (!tok || tok->token_type != 's') {
134                 *err = "string expected"; return NULL;
135         }
136         CONSUME_ONE;
137         return tok;
138 }
139
140 // The following functions create the branches of the AST.
141
142 /*
143 RET parse_non_null_type_2(PARAMS) {
144         INIT(non_null_type);
145         parse_list_type(ARGS); REQ;
146         parse_punct(ARGS, '!'); REQ;
147         EXIT;
148 }
149
150 RET parse_non_null_type_1(PARAMS) {
151         INIT(non_null_type);
152         parse_named_type(ARGS); REQ;
153         parse_punct(ARGS, '!'); REQ;
154         EXIT;
155 }
156
157 RET parse_non_null_type(PARAMS) {
158         INIT(non_null_type);
159         parse_non_null_type_1(ARGS); OR
160         parse_non_null_type_2(ARGS);
161         EXIT;
162 }
163
164 RET parse_list_type(PARAMS) {
165         INIT(list_type);
166         parse_punct(ARGS, '['); REQ
167         parse_type(ARGS); REQ
168         parse_punct(ARGS, ']'); REQ
169         EXIT;
170 }
171 */
172
173 RET parse_named_type(PARAMS) {
174         INIT(named_type);
175         obj->name = parse_name(ARGS);
176         EXIT;
177 }
178
179 RET parse_type(PARAMS) {
180         INIT(type);
181         obj->named = parse_named_type(ARGS);
182 /*
183         OR
184         obj->list = parse_list_type(ARGS); OR
185         obj->non_null = parse_non_null_type(ARGS);
186 */
187         EXIT;
188 }
189
190 RET parse_variable(PARAMS) {
191         INIT(variable);
192         parse_punct(ARGS, '$'); REQ
193         obj->name = parse_name(ARGS); REQ
194         EXIT;
195 }
196
197 RET parse_value(PARAMS);
198
199 RET parse_list_value(PARAMS) {
200         INIT(list_value);
201         parse_punct(ARGS, '['); REQ
202         parse_punct(ARGS, ']');
203         while (*err) {
204                 *err = NULL;
205                 parse_value(ARGS); MSG("expected: value or ']'"); REQ
206                 parse_punct(ARGS, ']');
207         }
208         EXIT;
209 }
210
211 RET parse_enum_value(PARAMS) {
212         INIT(enum_value);
213         obj->val = parse_name(ARGS); REQ
214         struct graphql_token *tok = list_top(used, struct graphql_token, node);
215         if (streq(tok->token_string, "true")
216          || streq(tok->token_string, "false")
217          || streq(tok->token_string, "null")) {
218                 *err = "enum value cannot be true, false, or null";
219                 ROLLBACK(ARGS);
220         }
221         EXIT;
222 }
223
224 RET parse_null_value(PARAMS) {
225         INIT(null_value);
226         obj->val = parse_keyword(ARGS, "null", "null expected");
227         EXIT;
228 }
229
230 RET parse_string_value(PARAMS) {
231         INIT(string_value);
232         obj->val = parse_string(ARGS);
233         EXIT;
234 }
235
236 RET parse_boolean_value(PARAMS) {
237         INIT(boolean_value);
238         obj->val = parse_keyword(ARGS, "true", "invalid boolean value"); OR
239         obj->val = parse_keyword(ARGS, "false", "invalid boolean value");
240         EXIT;
241 }
242
243 RET parse_float_value(PARAMS) {
244         INIT(float_value);
245         obj->val = parse_float(ARGS);
246         EXIT;
247 }
248
249 RET parse_int_value(PARAMS) {
250         INIT(int_value);
251         obj->val = parse_int(ARGS);
252         EXIT;
253 }
254
255 RET parse_object_field(PARAMS) {
256         INIT(object_field);
257         obj->name = parse_name(ARGS); REQ
258         parse_punct(ARGS, ':'); REQ
259         obj->val = parse_value(ARGS); REQ
260         EXIT;
261 }
262
263 RET parse_object_value(PARAMS) {
264         INIT(object_value);
265         parse_punct(ARGS, '{'); REQ
266         parse_punct(ARGS, '}');
267         struct graphql_object_field *p = NULL;
268         while (*err) {
269                 *err = NULL;
270                 if (!p) {
271                         obj->first = p = parse_object_field(ARGS); MSG("expected: object field or '}'"); REQ
272                 } else {
273                         p->next = parse_object_field(ARGS); MSG("expected: object field or '}'"); REQ
274                         p = p->next;
275                 }
276                 parse_punct(ARGS, '}');
277         }
278         EXIT;
279 }
280
281 RET parse_default_value(PARAMS) {
282         INIT(default_value);
283         parse_punct(ARGS, '='); REQ
284         obj->val = parse_value(ARGS); REQ
285         EXIT;
286 }
287
288 RET parse_value(PARAMS) {
289         INIT(value);
290         obj->var = parse_variable(ARGS); // FIXME: if not const
291         OR
292         obj->int_val = parse_int_value(ARGS); OR
293         obj->float_val = parse_float_value(ARGS); OR
294         obj->str_val = parse_string_value(ARGS); OR
295         obj->bool_val = parse_boolean_value(ARGS); OR
296         obj->null_val = parse_null_value(ARGS); OR
297         obj->enum_val = parse_enum_value(ARGS); OR
298         obj->list_val = parse_list_value(ARGS); OR
299         obj->obj_val = parse_object_value(ARGS);
300         EXIT;
301 }
302
303 RET parse_type_condition(PARAMS) {
304         INIT(type_condition);
305         parse_keyword(ARGS, "on", "expected: 'on'"); REQ
306         obj->named_type = parse_named_type(ARGS); REQ
307         EXIT;
308 }
309
310 RET parse_fragment_name(PARAMS) {
311         INIT(fragment_name);
312         obj->name = parse_name(ARGS); REQ
313         struct graphql_token *tok = list_top(used, struct graphql_token, node);
314         if (streq(tok->token_string, "on")) {
315                 *err = "invalid fragment name";
316                 ROLLBACK(ARGS);
317         }
318         EXIT;
319 }
320
321 RET parse_alias(PARAMS) {
322         INIT(alias);
323         obj->name = parse_name(ARGS); REQ
324         parse_punct(ARGS, ':'); REQ
325         EXIT;
326 }
327
328 RET parse_argument(PARAMS) {
329         INIT(argument);
330         obj->name = parse_name(ARGS); REQ
331         parse_punct(ARGS, ':'); REQ
332         obj->val = parse_value(ARGS); REQ
333         EXIT;
334 }
335
336 RET parse_arguments(PARAMS) {
337         INIT(arguments);
338         parse_punct(ARGS, '('); REQ
339         obj->first = parse_argument(ARGS); REQ
340         struct graphql_argument *p = obj->first;
341         parse_punct(ARGS, ')');
342         while (*err) {
343                 *err = NULL;
344                 p->next = parse_argument(ARGS); MSG("expected: argument or ')'"); REQ;
345                 p = p->next;
346                 parse_punct(ARGS, ')');
347         }
348         EXIT;
349 }
350
351 RET parse_directive(PARAMS) {
352         INIT(directive);
353         parse_punct(ARGS, '@'); REQ
354         obj->name = parse_name(ARGS); REQ
355         obj->args = parse_arguments(ARGS); OPT
356         EXIT;
357 }
358
359 RET parse_directives(PARAMS) {
360         INIT(directives);
361         obj->first = parse_directive(ARGS); REQ
362         struct graphql_directive *p = obj->first;
363         do {
364                 p->next = parse_directive(ARGS);
365                 p = p->next;
366         } WHILE_OPT;
367         EXIT;
368 }
369
370 RET parse_fragment_spread(PARAMS) {
371         INIT(fragment_spread);
372         parse_punct(ARGS, PUNCT_SPREAD); REQ
373         obj->name = parse_fragment_name(ARGS); REQ
374         obj->directives = parse_directives(ARGS); OPT
375         EXIT;
376 }
377
378 RET parse_variable_definition(PARAMS) {
379         INIT(variable_definition);
380         obj->var = parse_variable(ARGS); REQ
381         parse_punct(ARGS, ':'); REQ
382         obj->type = parse_type(ARGS); REQ
383         obj->default_val = parse_default_value(ARGS); OPT
384         obj->directives = parse_directives(ARGS); OPT
385         EXIT;
386 }
387
388 RET parse_variable_definitions(PARAMS) {
389         INIT(variable_definitions);
390         parse_punct(ARGS, '('); REQ
391         obj->first = parse_variable_definition(ARGS); REQ
392         struct graphql_variable_definition *p = obj->first;
393         parse_punct(ARGS, ')');
394         while (*err) {
395                 *err = NULL;
396                 p->next = parse_variable_definition(ARGS); MSG("expected: variable definition or ')'"); REQ
397                 p = p->next;
398                 parse_punct(ARGS, ')');
399         }
400         EXIT;
401 }
402
403 RET parse_selection_set(PARAMS);
404
405 RET parse_fragment_definition(PARAMS) {
406         INIT(fragment_definition);
407         parse_keyword(ARGS, "fragment", "fragment expected"); REQ
408         obj->name = parse_fragment_name(ARGS); REQ
409         obj->type_cond = parse_type_condition(ARGS); REQ
410         obj->directives = parse_directives(ARGS); OPT
411         obj->sel_set = parse_selection_set(ARGS); REQ
412         EXIT;
413 }
414
415 RET parse_inline_fragment(PARAMS) {
416         INIT(inline_fragment);
417         parse_punct(ARGS, PUNCT_SPREAD); REQ
418         obj->type_cond = parse_type_condition(ARGS); OPT
419         obj->directives = parse_directives(ARGS); OPT
420         obj->sel_set = parse_selection_set(ARGS); REQ
421         EXIT;
422 }
423
424 RET parse_field(PARAMS) {
425         INIT(field);
426         obj->alias = parse_alias(ARGS); OPT
427         obj->name = parse_name(ARGS); REQ
428         obj->args = parse_arguments(ARGS); OPT
429         obj->directives = parse_directives(ARGS); OPT
430         obj->sel_set = parse_selection_set(ARGS); OPT
431         EXIT;
432 }
433
434 RET parse_selection(PARAMS) {
435         INIT(selection);
436         obj->field = parse_field(ARGS); OR
437         obj->frag_spread = parse_fragment_spread(ARGS); OR
438         obj->inline_frag = parse_inline_fragment(ARGS);
439         MSG("expected: field, fragment spread, or inline fragment");
440         EXIT;
441 }
442
443 RET parse_selection_set(PARAMS) {
444         INIT(selection_set);
445         parse_punct(ARGS, '{'); REQ;
446         obj->first = parse_selection(ARGS); REQ;
447         struct graphql_selection *p = obj->first;
448         parse_punct(ARGS, '}');
449         while (*err) {
450                 *err = NULL;
451                 p->next = parse_selection(ARGS); MSG("expected: selection or '}'"); REQ;
452                 p = p->next;
453                 parse_punct(ARGS, '}');
454         }
455         EXIT;
456 }
457
458 RET parse_operation_type(PARAMS) {
459         INIT(operation_type);
460         const char *errmsg = "expected: query, mutation, or subscription";
461         obj->op_type = parse_keyword(ARGS, "query", errmsg); OR
462         obj->op_type = parse_keyword(ARGS, "mutation", errmsg); OR
463         obj->op_type = parse_keyword(ARGS, "subscription", errmsg);
464         EXIT;
465 }
466
467 RET parse_operation_definition(PARAMS) {
468         INIT(operation_definition);
469         obj->op_type = parse_operation_type(ARGS);
470         if (!*err) {
471                 obj->op_name = parse_name(ARGS); OPT
472                 obj->vars = parse_variable_definitions(ARGS); OPT
473                 obj->directives = parse_directives(ARGS); OPT
474         } else
475                 *err = NULL;
476         obj->sel_set = parse_selection_set(ARGS);
477         if (*err) ROLLBACK(ARGS);
478         EXIT;
479 }
480
481 RET parse_executable_definition(PARAMS) {
482         INIT(executable_definition);
483         obj->op_def = parse_operation_definition(ARGS); MSG("invalid operation or fragment definition"); OR
484         obj->frag_def = parse_fragment_definition(ARGS); MSG("invalid operation or fragment definition"); 
485         EXIT;
486 }
487
488 RET parse_executable_document(PARAMS) {
489         INIT(executable_document);
490         obj->first_def = parse_executable_definition(ARGS); REQ
491         struct graphql_executable_definition *p = obj->first_def;
492         do {
493                 p->next_def = parse_executable_definition(ARGS);
494                 p = p->next_def;
495         } WHILE_OPT;
496         EXIT;
497 }
498
499 RET parse_definition(PARAMS) {
500         INIT(definition);
501         obj->executable_def = parse_executable_definition(ARGS);
502 /*      OR
503         obj->type_system_def = parse_type_system_definition_or_extension(ARGS);
504         // NOTE: Optional type system is not (yet) implemented.
505 */
506         EXIT;
507 }
508
509 RET parse_document(PARAMS) {
510         INIT(document);
511         obj->first_def = parse_definition(ARGS); REQ
512         struct graphql_definition *p = obj->first_def;
513         do {
514                 p->next_def = parse_definition(ARGS);
515                 p = p->next_def;
516         } WHILE_OPT;
517         EXIT;
518 }
519 void *currently_unused = parse_document; // to hide the warning till this is used
520
521 /* Convert input string into tokens.
522  *
523  * All data (i.e. the list and the tokens it contains) are allocated to the
524  * specified tal context.
525  */
526 const char *graphql_lex(const tal_t *ctx, const char *input, struct list_head **tokens) {
527
528         unsigned int c;
529         const char *p, *line_beginning;
530         unsigned int line_num = 1;
531         struct list_head *tok_list;
532         struct graphql_token *tok;
533
534         // Initialize token output list.
535         tok_list = tal(ctx, struct list_head);
536         if (tokens)
537                 *tokens = tok_list;
538         list_head_init(tok_list);
539
540         // Note: label and goto are used here like a continue statement except that
541         // it skips iteration, for when characters are fetched in the loop body.
542         p = input;
543         line_beginning = p;
544         do {
545                 c = *p++;
546 newchar:
547                 // Consume line terminators and increment line counter.
548                 if (LINE_TERMINATOR(c)) {
549                         unsigned int c0 = c;
550                         c = *p++;
551                         if (c0 == 10 || c0 == 13)
552                                 line_num++;
553                         if (c0 == 13 && c == 10)
554                                 c = *p++;
555                         line_beginning = p - 1;
556                         goto newchar;
557                 }
558
559                 // Consume other ignored tokens.
560                 if (COMMA(c) || WHITE_SPACE(c)) {
561                         c = *p++;
562                         goto newchar;
563                 }
564                 if (COMMENT(c)) {
565                         while (!EOF_CHAR(c) && COMMENT_CHAR(c))
566                                 c = *p++;
567                         goto newchar;
568                 }
569
570                 // Return success when end is reached.
571                 if (EOF_CHAR(c))
572                         return GRAPHQL_SUCCESS;
573
574                 // Punctuator tokens.
575                 if (PUNCTUATOR(c)) {
576
577                         // Note beginning of token in input.
578                         const char *start = p - 1;
579
580                         // Handle the ... multi-character case.
581                         if (c == '.') {
582                                 c = *p++;
583                                 if (c != '.')
584                                         return "unrecognized punctuator";
585                                 c = *p++;
586                                 if (c != '.')
587                                         return "unrecognized punctuator";
588                                 c = PUNCT_SPREAD;
589                         }
590
591                         tok = talz(tok_list, struct graphql_token);
592                         list_add_tail(tok_list, &tok->node);
593                         tok->token_type = c;
594                         tok->token_string = NULL;
595                         tok->source_line = line_num;
596                         tok->source_column = start - line_beginning + 1;
597                         tok->source_offset = start - input;
598                         tok->source_len = p - start;
599
600                 } else if (NAME_START(c)) {
601
602                         // Name/identifier tokens.
603                         tok = talz(tok_list, struct graphql_token);
604                         list_add_tail(tok_list, &tok->node);
605                         tok->token_type = 'a';
606                         // tok->token_string updated below.
607                         tok->source_line = line_num;
608                         tok->source_column = p - line_beginning;
609                         // tok->source_len updated below.
610
611                         // Note the beginning of the name.
612                         const char *name_begin = p - 1;
613                         const char *name_end;
614                         int name_len;
615
616                         // Consume the rest of the token.
617                         do {
618                                 c = *p++;
619                         } while (NAME_CONTINUE(c));
620
621                         // Note the end of the name and calculate the length.
622                         name_end = p - 1;
623                         name_len = name_end - name_begin;
624                         tok->source_offset = name_begin - input;
625                         tok->source_len = name_len;
626
627                         // Copy the token string.
628                         tok->token_string = tal_strndup(tok, name_begin, name_len);
629
630                         goto newchar;
631
632                 } else if (DIGIT(c) || c == '-') {
633
634                         // Number tokens.
635                         const char *num_start = p - 1;
636                         char type = 'i';
637
638                         if (c == '-') {
639                                 c = *p++;
640                                 if (!DIGIT(c))
641                                         return "negative sign must precede a number";
642                         }
643
644                         if (c == '0') {
645                                 c = *p++;
646                                 if (DIGIT(c))
647                                         return "leading zeros are not allowed";
648                         } else {
649                                 do {
650                                         c = *p++;
651                                 } while(DIGIT(c));
652                         }
653
654                         if (c == '.') {
655                                 type = 'f';
656                                 if (!DIGIT(*p))
657                                         return "invalid float value fractional part";
658                                 do {
659                                         c = *p++;
660                                 } while(DIGIT(c));
661                         }
662
663                         if (c == 'e' || c == 'E') {
664                                 type = 'f';
665                                 c = *p++;
666                                 if (c == '+' || c == '-')
667                                         c = *p++;
668                                 if (!DIGIT(*p))
669                                         return "invalid float value exponent part";
670                                 do {
671                                         c = *p++;
672                                 } while(DIGIT(c));
673                         }
674
675                         if (c == '.' || NAME_START(c))
676                                 return "invalid numeric value";
677
678                         const char *num_end = p - 1;
679                         int num_len = num_end - num_start;
680
681                         tok = talz(tok_list, struct graphql_token);
682                         list_add_tail(tok_list, &tok->node);
683                         tok->token_type = type;
684                         tok->token_string = tal_strndup(tok, num_start, num_len);
685                         tok->source_line = line_num;
686                         tok->source_column = num_start - line_beginning + 1;
687                         tok->source_offset = num_start - input;
688                         tok->source_len = num_len;
689
690                         goto newchar;
691
692                 } else if (c == '"') {
693
694                         // String tokens.
695                         c = *p++;
696                         const char *str_begin = p - 1;
697                         const char *str_end;
698                         bool str_block = false;
699                         if (c == '"') {
700                                 c = *p++;
701                                 if (c == '"') {
702                                         // block string
703                                         str_block = true;
704                                         str_begin += 2;
705                                         int quotes = 0;
706                                         do {
707                                                 c = *p++;
708                                                 if (c == '\"') quotes++; else quotes = 0;
709                                                 if (quotes == 3 && *(p-4) == '\\') quotes = 0;
710                                         } while (BLOCK_STRING_CHAR(c) && quotes < 3);
711                                         if (quotes == 3) {
712                                                 c = *--p;
713                                                 c = *--p;
714                                         }
715                                         str_end = p - 1;
716                                         if (c != '"')
717                                                 return "unterminated string or invalid character";
718                                         c = *p++;
719                                         if (c != '"')
720                                                 return "invalid string termination";
721                                         c = *p++;
722                                         if (c != '"')
723                                                 return "invalid string termination";
724                                 } else {
725                                         // empty string
726                                         str_end = str_begin;
727                                         --p;
728                                 }
729                         } else {
730                                 // normal string
731                                 --p;
732                                 do {
733                                         c = *p++;
734                                         if (c == '\\') {
735                                                 c = *p++;
736                                                 if (strchr("\"\\/bfnrtu", c)) {
737                                                         if (c == 'u') {
738                                                                 c = *p++;
739                                                                 if (!HEX_DIGIT(c))
740                                                                         return "invalid unicode escape sequence";
741                                                                 c = *p++;
742                                                                 if (!HEX_DIGIT(c))
743                                                                         return "invalid unicode escape sequence";
744                                                                 c = *p++;
745                                                                 if (!HEX_DIGIT(c))
746                                                                         return "invalid unicode escape sequence";
747                                                                 c = *p++;
748                                                                 if (!HEX_DIGIT(c))
749                                                                         return "invalid unicode escape sequence";
750                                                         } else {
751                                                                 c = 'a'; // anything besides a quote to let the loop continue
752                                                         }
753                                                 } else {
754                                                         return "invalid string escape sequence";
755                                                 }
756                                         }
757                                 } while (STRING_CHAR(c));
758                                 if (c != '"')
759                                         return "unterminated string or invalid character";
760                                 str_end = p - 1;
761                         }
762                         int str_len = str_end - str_begin;
763
764                         tok = talz(tok_list, struct graphql_token);
765                         list_add_tail(tok_list, &tok->node);
766                         tok->token_type = 's';
767                         tok->token_string = tal_strndup(tok, str_begin, str_len);
768                         tok->source_line = line_num;
769                         tok->source_column = str_begin - line_beginning + 1;
770                         tok->source_offset = str_begin - input;
771                         tok->source_len = str_len;
772
773                         // Process escape sequences. These always shorten the string (so the memory allocation is always enough).
774                         char d;
775                         char *q = tok->token_string;
776                         char *rewrite_dest;
777                         int quotes = 0;
778                         while ((d = *q++)) {
779                                 if (str_block) {
780                                         if (d == '\"') quotes++; else quotes = 0;
781                                         if (quotes == 3 && *(q-4) == '\\') {
782                                                 quotes = 0;
783                                                 rewrite_dest = q - 4;
784                                                 cpystr(rewrite_dest, q - 3);
785                                         }
786                                 } else {
787                                         if (d == '\\') {
788                                                 rewrite_dest = q - 1;
789                                                 d = *q++;
790                                                 switch (d) {
791                                                 case '\"':
792                                                         *rewrite_dest++ = '\"';
793                                                         cpystr(rewrite_dest, q--);
794                                                         break;
795                                                 case 'b':
796                                                         *rewrite_dest++ = '\b';
797                                                         cpystr(rewrite_dest, q--);
798                                                         break;
799                                                 case 'f':
800                                                         *rewrite_dest++ = '\f';
801                                                         cpystr(rewrite_dest, q--);
802                                                         break;
803                                                 case 'n':
804                                                         *rewrite_dest++ = '\n';
805                                                         cpystr(rewrite_dest, q--);
806                                                         break;
807                                                 case 'r':
808                                                         *rewrite_dest++ = '\r';
809                                                         cpystr(rewrite_dest, q--);
810                                                         break;
811                                                 case 't':
812                                                         *rewrite_dest++ = '\t';
813                                                         cpystr(rewrite_dest, q--);
814                                                         break;
815                                                 case 'u': {
816                                                                 // Insert escaped character using UTF-8 multi-byte encoding.
817                                                                 char buf[5], *b = buf;
818                                                                 for (int i = 0; i < 4; i++)
819                                                                         *b++ = *q++;
820                                                                 *b = 0;
821                                                                 int code_point = strtol(buf, 0, 16);
822                                                                 int bytes = utf8_encode(code_point, rewrite_dest);
823                                                                 // note: if bytes == 0
824                                                                 // due to encoding failure,
825                                                                 // the following will safely
826                                                                 // eliminate the invalid char.
827                                                                 rewrite_dest += bytes;
828                                                                 cpystr(rewrite_dest, q--);
829                                                         }
830                                                         break;
831                                                 default:
832                                                         cpystr(rewrite_dest, --q);
833                                                 }
834                                         }
835                                 }
836                         }
837                         if (str_block) {
838                                 // Strip leading lines.
839                                 q = tok->token_string;
840                                 for (;;) {
841                                         d = *q++;
842                                         while (WHITE_SPACE(d))
843                                                 d = *q++;
844                                         if (LINE_TERMINATOR(d)) {
845                                                 while (LINE_TERMINATOR(d))
846                                                         d = *q++;
847                                                 cpystr(tok->token_string, q - 1);
848                                                 q = tok->token_string;
849                                         } else
850                                                 break;
851                                 }
852
853                                 // Strip trailing lines.
854                                 q = tok->token_string + strlen(tok->token_string);
855                                 for (;;) {
856                                         d = *--q;
857                                         while (WHITE_SPACE(d))
858                                                 d = *--q;
859                                         if (LINE_TERMINATOR(d)) {
860                                                 while (LINE_TERMINATOR(d))
861                                                         d = *--q;
862                                                 *++q = 0;
863                                         } else
864                                                 break;
865                                 }
866
867                                 // Look for common indentation.
868                                 char *this_indent_start;
869                                 const char *this_indent_end;
870                                 const char *common_indent_start = NULL;
871                                 const char *common_indent_end = common_indent_start;
872                                 const char *r;
873                                 q = tok->token_string;
874                                 do {
875                                         d = *q++;
876                                         this_indent_start = q - 1;
877                                         while (WHITE_SPACE(d))
878                                                 d = *q++;
879                                         this_indent_end = q - 1;
880                                         if (LINE_TERMINATOR(d)) {
881                                                 while (LINE_TERMINATOR(d))
882                                                         d = *q++;
883                                                 continue;
884                                         }
885                                         if (EOF_CHAR(d))
886                                                 continue;
887
888                                         if (common_indent_start == NULL) {
889                                                 common_indent_start = this_indent_start;
890                                                 common_indent_end = this_indent_end;
891                                         }
892                                         for (r = this_indent_start; r < this_indent_end && (r - this_indent_start + common_indent_start < common_indent_end); r++) {
893                                                 if (*r != *(r - this_indent_start + common_indent_start))
894                                                         break;
895                                         }
896                                         common_indent_end = r - this_indent_start + common_indent_start;
897
898                                         while (!LINE_TERMINATOR(d) && !EOF_CHAR(d))
899                                                 d = *q++;
900                                         while (LINE_TERMINATOR(d))
901                                                 d = *q++;
902                                         --q;
903
904                                 } while (d);
905
906                                 // Remove common indentation.
907                                 int common_indent_len = common_indent_end - common_indent_start;
908                                 if (common_indent_len > 0) {
909                                         q = tok->token_string;
910                                         do {
911                                                 d = *q++;
912                                                 this_indent_start = q - 1;
913                                                 while (WHITE_SPACE(d))
914                                                         d = *q++;
915                                                 this_indent_end = q - 1;
916                                                 if (LINE_TERMINATOR(d)) {
917                                                         while (LINE_TERMINATOR(d))
918                                                                 d = *q++;
919                                                         continue;
920                                                 }
921                                                 if (EOF_CHAR(d))
922                                                         continue;
923
924                                                 while (!LINE_TERMINATOR(d) && !EOF_CHAR(d))
925                                                         d = *q++;
926                                                 --q;
927
928                                                 cpystr(this_indent_start, this_indent_start + common_indent_len);
929                                                 q -= common_indent_len;
930                                                 d = *q++;
931
932                                                 while (LINE_TERMINATOR(d))
933                                                         d = *q++;
934                                                 --q;
935
936                                         } while (d);
937                                 }
938                         }
939                         c = *p++;
940                         goto newchar;
941
942                 } else {
943                         return "invalid source character encountered";
944                 }
945
946         } while (!EOF_CHAR(c));
947
948         return "unexpected end-of-input encountered";
949 }
950
951 // Convert lexed tokens into AST.
952 const char *graphql_parse(struct list_head *tokens, struct graphql_executable_document **doc) {
953         struct list_head used = LIST_HEAD_INIT(used);
954         const char *err = NULL;
955         *doc = parse_executable_document(tokens, &used, &err);
956         return err;
957 }
958
959 // Convert input string into AST.
960 const char *graphql_lexparse(const tal_t *ctx, const char *input, struct list_head **tokens, struct graphql_executable_document **doc) {
961         const char *err = graphql_lex(ctx, input, tokens);
962         if (!err)
963                 err = graphql_parse(*tokens, doc);
964         return err;
965 }
966
967
968