1 /* MIT (BSD) license - see LICENSE file for details */
4 #include "ccan/tal/str/str.h"
5 #include "ccan/utf8/utf8.h"
8 /* GraphQL character classes
10 * These definitions are meant to reflect the GraphQL specification as
11 * literally as possible.
13 #define SOURCE_CHAR(c) ((c) == '\t' || (c) == '\n' || (c) == '\r' || ((c) >= 32 && (c) <= 65535))
14 #define WHITE_SPACE(c) ((c) == '\t' || (c) == ' ')
15 #define LINE_TERMINATOR(c) ((c) == '\n' || (c) == '\r')
16 #define COMMENT(c) ((c) == '#')
17 #define COMMENT_CHAR(c) (SOURCE_CHAR(c) && !LINE_TERMINATOR(c))
18 #define STRING_CHAR(c) (SOURCE_CHAR(c) && !LINE_TERMINATOR(c) && (c)!='"' && (c)!='\\')
19 #define BLOCK_STRING_CHAR(c) (SOURCE_CHAR(c))
20 #define COMMA(c) ((c) == ',')
21 #define EOF_CHAR(c) ((c) == 0 || (c) == 4)
22 #define PUNCTUATOR(c) (strchr("!$&().:=@[]{|}", c))
23 #define HEX_DIGIT(c) (DIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
24 #define DIGIT(c) ((c) >= '0' && (c) <= '9')
25 #define NAME_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_')
26 #define NAME_CONTINUE(c) (NAME_START(c) || DIGIT(c))
28 // Helper for copying an overlapping string, since strcpy() is not safe for that
29 #define cpystr(d,s) { char *cpystr_p; char *cpystr_q; for(cpystr_p = (s), cpystr_q = (d); *cpystr_p;) *cpystr_q++ = *cpystr_p++; *cpystr_q++ = *cpystr_p++; }
33 * These shorthands are motivated by the parser functions, so they can be
34 * written in a format that corresponds closely to the specification.
36 #define RET static void *
37 #define PARAMS struct list_head *tokens, struct list_head *used, const char **err
38 #define ARGS tokens, used, err
40 struct graphql_token *rollback_top = list_top(tokens, struct graphql_token, node); \
41 struct graphql_##type *obj = talz(tokens, struct graphql_##type); \
42 (void)rollback_top; /* avoids unused variable warning */ \
45 goto exit_label; /* avoids unused label warning */ \
47 if (*err) obj = tal_free(obj); \
50 #define CONSUME_ONE list_add(used, &list_pop(tokens, struct graphql_token, node)->node);
51 #define RESTORE_ONE list_add(tokens, &list_pop(used, struct graphql_token, node)->node);
52 #define ROLLBACK(args) while (list_top(tokens, struct graphql_token, node) != rollback_top) { RESTORE_ONE; }
53 #define OR if (!*err) goto exit_label; *err = NULL;
54 #define REQ if (*err) { ROLLBACK(args); goto exit_label; }
55 #define OPT *err = NULL;
56 #define WHILE_OPT while(!*err); *err = NULL;
57 #define LOOKAHEAD(args, tok) struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
58 #define MSG(msg) if (*err) *err = msg;
61 /* The following parser functions are written in a way that corresponds to the
62 * grammar defined in the GraphQL specification. The code is not intended to
63 * look like normal C code; it's designed for parsing clarity rather than C
64 * style. Think of it as something generated rather than something to read.
65 * For that reason, the functions follow special rules:
67 * - The declaration is standardized with RET and PARAMS
68 * - The "err" argument is assumed to be NULL upon entrance
69 * - The "err" argument is set on failure
70 * - If the function fails to parse, then "tokens" shall be as it was upon entrance
71 * - INIT and EXIT macros are used
72 * - Macros such as REQ and OPT facilitate readability and conciseness
75 /* The following functions construct the "leaves" of the abstract syntax tree. */
77 RET parse_keyword(PARAMS, const char *keyword, const char *errmsg) {
78 struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
79 if (!tok || tok->token_type != 'a') {
80 *err = errmsg; return NULL;
82 if (!streq(tok->token_string, keyword)) {
83 *err = errmsg; return NULL;
89 // Note: a static buffer is used here.
90 RET parse_punct(PARAMS, int punct) {
91 static char punctbuf[16];
92 struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
93 if (!tok || tok->token_type != punct) {
94 if (punct == PUNCT_SPREAD)
95 sprintf(punctbuf, "expected: '...'");
97 sprintf(punctbuf, "expected: '%c'", punct);
98 *err = punctbuf; return NULL;
104 RET parse_name(PARAMS) {
105 struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
106 if (!tok || tok->token_type != 'a') {
107 *err = "name expected"; return NULL;
113 RET parse_int(PARAMS) {
114 struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
115 if (!tok || tok->token_type != 'i') {
116 *err = "integer expected"; return NULL;
122 RET parse_float(PARAMS) {
123 struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
124 if (!tok || tok->token_type != 'f') {
125 *err = "float expected"; return NULL;
131 RET parse_string(PARAMS) {
132 struct graphql_token *tok = list_top(tokens, struct graphql_token, node);
133 if (!tok || tok->token_type != 's') {
134 *err = "string expected"; return NULL;
140 // The following functions create the branches of the AST.
143 RET parse_non_null_type_2(PARAMS) {
145 parse_list_type(ARGS); REQ;
146 parse_punct(ARGS, '!'); REQ;
150 RET parse_non_null_type_1(PARAMS) {
152 parse_named_type(ARGS); REQ;
153 parse_punct(ARGS, '!'); REQ;
157 RET parse_non_null_type(PARAMS) {
159 parse_non_null_type_1(ARGS); OR
160 parse_non_null_type_2(ARGS);
164 RET parse_list_type(PARAMS) {
166 parse_punct(ARGS, '['); REQ
167 parse_type(ARGS); REQ
168 parse_punct(ARGS, ']'); REQ
173 RET parse_named_type(PARAMS) {
175 obj->name = parse_name(ARGS);
179 RET parse_type(PARAMS) {
181 obj->named = parse_named_type(ARGS);
184 obj->list = parse_list_type(ARGS); OR
185 obj->non_null = parse_non_null_type(ARGS);
190 RET parse_variable(PARAMS) {
192 parse_punct(ARGS, '$'); REQ
193 obj->name = parse_name(ARGS); REQ
197 RET parse_value(PARAMS);
199 RET parse_list_value(PARAMS) {
201 parse_punct(ARGS, '['); REQ
202 parse_punct(ARGS, ']');
205 parse_value(ARGS); MSG("expected: value or ']'"); REQ
206 parse_punct(ARGS, ']');
211 RET parse_enum_value(PARAMS) {
213 obj->val = parse_name(ARGS); REQ
214 struct graphql_token *tok = list_top(used, struct graphql_token, node);
215 if (streq(tok->token_string, "true")
216 || streq(tok->token_string, "false")
217 || streq(tok->token_string, "null")) {
218 *err = "enum value cannot be true, false, or null";
224 RET parse_null_value(PARAMS) {
226 obj->val = parse_keyword(ARGS, "null", "null expected");
230 RET parse_string_value(PARAMS) {
232 obj->val = parse_string(ARGS);
236 RET parse_boolean_value(PARAMS) {
238 obj->val = parse_keyword(ARGS, "true", "invalid boolean value"); OR
239 obj->val = parse_keyword(ARGS, "false", "invalid boolean value");
243 RET parse_float_value(PARAMS) {
245 obj->val = parse_float(ARGS);
249 RET parse_int_value(PARAMS) {
251 obj->val = parse_int(ARGS);
255 RET parse_object_field(PARAMS) {
257 obj->name = parse_name(ARGS); REQ
258 parse_punct(ARGS, ':'); REQ
259 obj->val = parse_value(ARGS); REQ
263 RET parse_object_value(PARAMS) {
265 parse_punct(ARGS, '{'); REQ
266 parse_punct(ARGS, '}');
267 struct graphql_object_field *p = NULL;
271 obj->first = p = parse_object_field(ARGS); MSG("expected: object field or '}'"); REQ
273 p->next = parse_object_field(ARGS); MSG("expected: object field or '}'"); REQ
276 parse_punct(ARGS, '}');
281 RET parse_default_value(PARAMS) {
283 parse_punct(ARGS, '='); REQ
284 obj->val = parse_value(ARGS); REQ
288 RET parse_value(PARAMS) {
290 obj->var = parse_variable(ARGS); // FIXME: if not const
292 obj->int_val = parse_int_value(ARGS); OR
293 obj->float_val = parse_float_value(ARGS); OR
294 obj->str_val = parse_string_value(ARGS); OR
295 obj->bool_val = parse_boolean_value(ARGS); OR
296 obj->null_val = parse_null_value(ARGS); OR
297 obj->enum_val = parse_enum_value(ARGS); OR
298 obj->list_val = parse_list_value(ARGS); OR
299 obj->obj_val = parse_object_value(ARGS);
303 RET parse_type_condition(PARAMS) {
304 INIT(type_condition);
305 parse_keyword(ARGS, "on", "expected: 'on'"); REQ
306 obj->named_type = parse_named_type(ARGS); REQ
310 RET parse_fragment_name(PARAMS) {
312 obj->name = parse_name(ARGS); REQ
313 struct graphql_token *tok = list_top(used, struct graphql_token, node);
314 if (streq(tok->token_string, "on")) {
315 *err = "invalid fragment name";
321 RET parse_alias(PARAMS) {
323 obj->name = parse_name(ARGS); REQ
324 parse_punct(ARGS, ':'); REQ
328 RET parse_argument(PARAMS) {
330 obj->name = parse_name(ARGS); REQ
331 parse_punct(ARGS, ':'); REQ
332 obj->val = parse_value(ARGS); REQ
336 RET parse_arguments(PARAMS) {
338 parse_punct(ARGS, '('); REQ
339 obj->first = parse_argument(ARGS); REQ
340 struct graphql_argument *p = obj->first;
341 parse_punct(ARGS, ')');
344 p->next = parse_argument(ARGS); MSG("expected: argument or ')'"); REQ;
346 parse_punct(ARGS, ')');
351 RET parse_directive(PARAMS) {
353 parse_punct(ARGS, '@'); REQ
354 obj->name = parse_name(ARGS); REQ
355 obj->args = parse_arguments(ARGS); OPT
359 RET parse_directives(PARAMS) {
361 obj->first = parse_directive(ARGS); REQ
362 struct graphql_directive *p = obj->first;
364 p->next = parse_directive(ARGS);
370 RET parse_fragment_spread(PARAMS) {
371 INIT(fragment_spread);
372 parse_punct(ARGS, PUNCT_SPREAD); REQ
373 obj->name = parse_fragment_name(ARGS); REQ
374 obj->directives = parse_directives(ARGS); OPT
378 RET parse_variable_definition(PARAMS) {
379 INIT(variable_definition);
380 obj->var = parse_variable(ARGS); REQ
381 parse_punct(ARGS, ':'); REQ
382 obj->type = parse_type(ARGS); REQ
383 obj->default_val = parse_default_value(ARGS); OPT
384 obj->directives = parse_directives(ARGS); OPT
388 RET parse_variable_definitions(PARAMS) {
389 INIT(variable_definitions);
390 parse_punct(ARGS, '('); REQ
391 obj->first = parse_variable_definition(ARGS); REQ
392 struct graphql_variable_definition *p = obj->first;
393 parse_punct(ARGS, ')');
396 p->next = parse_variable_definition(ARGS); MSG("expected: variable definition or ')'"); REQ
398 parse_punct(ARGS, ')');
403 RET parse_selection_set(PARAMS);
405 RET parse_fragment_definition(PARAMS) {
406 INIT(fragment_definition);
407 parse_keyword(ARGS, "fragment", "fragment expected"); REQ
408 obj->name = parse_fragment_name(ARGS); REQ
409 obj->type_cond = parse_type_condition(ARGS); REQ
410 obj->directives = parse_directives(ARGS); OPT
411 obj->sel_set = parse_selection_set(ARGS); REQ
415 RET parse_inline_fragment(PARAMS) {
416 INIT(inline_fragment);
417 parse_punct(ARGS, PUNCT_SPREAD); REQ
418 obj->type_cond = parse_type_condition(ARGS); OPT
419 obj->directives = parse_directives(ARGS); OPT
420 obj->sel_set = parse_selection_set(ARGS); REQ
424 RET parse_field(PARAMS) {
426 obj->alias = parse_alias(ARGS); OPT
427 obj->name = parse_name(ARGS); REQ
428 obj->args = parse_arguments(ARGS); OPT
429 obj->directives = parse_directives(ARGS); OPT
430 obj->sel_set = parse_selection_set(ARGS); OPT
434 RET parse_selection(PARAMS) {
436 obj->field = parse_field(ARGS); OR
437 obj->frag_spread = parse_fragment_spread(ARGS); OR
438 obj->inline_frag = parse_inline_fragment(ARGS);
439 MSG("expected: field, fragment spread, or inline fragment");
443 RET parse_selection_set(PARAMS) {
445 parse_punct(ARGS, '{'); REQ;
446 obj->first = parse_selection(ARGS); REQ;
447 struct graphql_selection *p = obj->first;
448 parse_punct(ARGS, '}');
451 p->next = parse_selection(ARGS); MSG("expected: selection or '}'"); REQ;
453 parse_punct(ARGS, '}');
458 RET parse_operation_type(PARAMS) {
459 INIT(operation_type);
460 const char *errmsg = "expected: query, mutation, or subscription";
461 obj->op_type = parse_keyword(ARGS, "query", errmsg); OR
462 obj->op_type = parse_keyword(ARGS, "mutation", errmsg); OR
463 obj->op_type = parse_keyword(ARGS, "subscription", errmsg);
467 RET parse_operation_definition(PARAMS) {
468 INIT(operation_definition);
469 obj->op_type = parse_operation_type(ARGS);
471 obj->op_name = parse_name(ARGS); OPT
472 obj->vars = parse_variable_definitions(ARGS); OPT
473 obj->directives = parse_directives(ARGS); OPT
476 obj->sel_set = parse_selection_set(ARGS);
477 if (*err) ROLLBACK(ARGS);
481 RET parse_executable_definition(PARAMS) {
482 INIT(executable_definition);
483 obj->op_def = parse_operation_definition(ARGS); MSG("invalid operation or fragment definition"); OR
484 obj->frag_def = parse_fragment_definition(ARGS); MSG("invalid operation or fragment definition");
488 RET parse_executable_document(PARAMS) {
489 INIT(executable_document);
490 obj->first_def = parse_executable_definition(ARGS); REQ
491 struct graphql_executable_definition *p = obj->first_def;
493 p->next_def = parse_executable_definition(ARGS);
499 RET parse_definition(PARAMS) {
501 obj->executable_def = parse_executable_definition(ARGS);
503 obj->type_system_def = parse_type_system_definition_or_extension(ARGS);
504 // NOTE: Optional type system is not (yet) implemented.
509 RET parse_document(PARAMS) {
511 obj->first_def = parse_definition(ARGS); REQ
512 struct graphql_definition *p = obj->first_def;
514 p->next_def = parse_definition(ARGS);
519 void *currently_unused = parse_document; // to hide the warning till this is used
521 /* Convert input string into tokens.
523 * All data (i.e. the list and the tokens it contains) are allocated to the
524 * specified tal context.
526 const char *graphql_lex(const tal_t *ctx, const char *input, struct list_head **tokens) {
529 const char *p, *line_beginning;
530 unsigned int line_num = 1;
531 struct list_head *tok_list;
532 struct graphql_token *tok;
534 // Initialize token output list.
535 tok_list = tal(ctx, struct list_head);
538 list_head_init(tok_list);
540 // Note: label and goto are used here like a continue statement except that
541 // it skips iteration, for when characters are fetched in the loop body.
547 // Consume line terminators and increment line counter.
548 if (LINE_TERMINATOR(c)) {
551 if (c0 == 10 || c0 == 13)
553 if (c0 == 13 && c == 10)
555 line_beginning = p - 1;
559 // Consume other ignored tokens.
560 if (COMMA(c) || WHITE_SPACE(c)) {
565 while (!EOF_CHAR(c) && COMMENT_CHAR(c))
570 // Return success when end is reached.
572 return GRAPHQL_SUCCESS;
574 // Punctuator tokens.
577 // Note beginning of token in input.
578 const char *start = p - 1;
580 // Handle the ... multi-character case.
584 return "unrecognized punctuator";
587 return "unrecognized punctuator";
591 tok = talz(tok_list, struct graphql_token);
592 list_add_tail(tok_list, &tok->node);
594 tok->token_string = NULL;
595 tok->source_line = line_num;
596 tok->source_column = start - line_beginning + 1;
597 tok->source_offset = start - input;
598 tok->source_len = p - start;
600 } else if (NAME_START(c)) {
602 // Name/identifier tokens.
603 tok = talz(tok_list, struct graphql_token);
604 list_add_tail(tok_list, &tok->node);
605 tok->token_type = 'a';
606 // tok->token_string updated below.
607 tok->source_line = line_num;
608 tok->source_column = p - line_beginning;
609 // tok->source_len updated below.
611 // Note the beginning of the name.
612 const char *name_begin = p - 1;
613 const char *name_end;
616 // Consume the rest of the token.
619 } while (NAME_CONTINUE(c));
621 // Note the end of the name and calculate the length.
623 name_len = name_end - name_begin;
624 tok->source_offset = name_begin - input;
625 tok->source_len = name_len;
627 // Copy the token string.
628 tok->token_string = tal_strndup(tok, name_begin, name_len);
632 } else if (DIGIT(c) || c == '-') {
635 const char *num_start = p - 1;
641 return "negative sign must precede a number";
647 return "leading zeros are not allowed";
657 return "invalid float value fractional part";
663 if (c == 'e' || c == 'E') {
666 if (c == '+' || c == '-')
669 return "invalid float value exponent part";
675 if (c == '.' || NAME_START(c))
676 return "invalid numeric value";
678 const char *num_end = p - 1;
679 int num_len = num_end - num_start;
681 tok = talz(tok_list, struct graphql_token);
682 list_add_tail(tok_list, &tok->node);
683 tok->token_type = type;
684 tok->token_string = tal_strndup(tok, num_start, num_len);
685 tok->source_line = line_num;
686 tok->source_column = num_start - line_beginning + 1;
687 tok->source_offset = num_start - input;
688 tok->source_len = num_len;
692 } else if (c == '"') {
696 const char *str_begin = p - 1;
698 bool str_block = false;
708 if (c == '\"') quotes++; else quotes = 0;
709 if (quotes == 3 && *(p-4) == '\\') quotes = 0;
710 } while (BLOCK_STRING_CHAR(c) && quotes < 3);
717 return "unterminated string or invalid character";
720 return "invalid string termination";
723 return "invalid string termination";
736 if (strchr("\"\\/bfnrtu", c)) {
740 return "invalid unicode escape sequence";
743 return "invalid unicode escape sequence";
746 return "invalid unicode escape sequence";
749 return "invalid unicode escape sequence";
751 c = 'a'; // anything besides a quote to let the loop continue
754 return "invalid string escape sequence";
757 } while (STRING_CHAR(c));
759 return "unterminated string or invalid character";
762 int str_len = str_end - str_begin;
764 tok = talz(tok_list, struct graphql_token);
765 list_add_tail(tok_list, &tok->node);
766 tok->token_type = 's';
767 tok->token_string = tal_strndup(tok, str_begin, str_len);
768 tok->source_line = line_num;
769 tok->source_column = str_begin - line_beginning + 1;
770 tok->source_offset = str_begin - input;
771 tok->source_len = str_len;
773 // Process escape sequences. These always shorten the string (so the memory allocation is always enough).
775 char *q = tok->token_string;
780 if (d == '\"') quotes++; else quotes = 0;
781 if (quotes == 3 && *(q-4) == '\\') {
783 rewrite_dest = q - 4;
784 cpystr(rewrite_dest, q - 3);
788 rewrite_dest = q - 1;
792 *rewrite_dest++ = '\"';
793 cpystr(rewrite_dest, q--);
796 *rewrite_dest++ = '\b';
797 cpystr(rewrite_dest, q--);
800 *rewrite_dest++ = '\f';
801 cpystr(rewrite_dest, q--);
804 *rewrite_dest++ = '\n';
805 cpystr(rewrite_dest, q--);
808 *rewrite_dest++ = '\r';
809 cpystr(rewrite_dest, q--);
812 *rewrite_dest++ = '\t';
813 cpystr(rewrite_dest, q--);
816 // Insert escaped character using UTF-8 multi-byte encoding.
817 char buf[5], *b = buf;
818 for (int i = 0; i < 4; i++)
821 int code_point = strtol(buf, 0, 16);
822 int bytes = utf8_encode(code_point, rewrite_dest);
823 // note: if bytes == 0
824 // due to encoding failure,
825 // the following will safely
826 // eliminate the invalid char.
827 rewrite_dest += bytes;
828 cpystr(rewrite_dest, q--);
832 cpystr(rewrite_dest, --q);
838 // Strip leading lines.
839 q = tok->token_string;
842 while (WHITE_SPACE(d))
844 if (LINE_TERMINATOR(d)) {
845 while (LINE_TERMINATOR(d))
847 cpystr(tok->token_string, q - 1);
848 q = tok->token_string;
853 // Strip trailing lines.
854 q = tok->token_string + strlen(tok->token_string);
857 while (WHITE_SPACE(d))
859 if (LINE_TERMINATOR(d)) {
860 while (LINE_TERMINATOR(d))
867 // Look for common indentation.
868 char *this_indent_start;
869 const char *this_indent_end;
870 const char *common_indent_start = NULL;
871 const char *common_indent_end = common_indent_start;
873 q = tok->token_string;
876 this_indent_start = q - 1;
877 while (WHITE_SPACE(d))
879 this_indent_end = q - 1;
880 if (LINE_TERMINATOR(d)) {
881 while (LINE_TERMINATOR(d))
888 if (common_indent_start == NULL) {
889 common_indent_start = this_indent_start;
890 common_indent_end = this_indent_end;
892 for (r = this_indent_start; r < this_indent_end && (r - this_indent_start + common_indent_start < common_indent_end); r++) {
893 if (*r != *(r - this_indent_start + common_indent_start))
896 common_indent_end = r - this_indent_start + common_indent_start;
898 while (!LINE_TERMINATOR(d) && !EOF_CHAR(d))
900 while (LINE_TERMINATOR(d))
906 // Remove common indentation.
907 int common_indent_len = common_indent_end - common_indent_start;
908 if (common_indent_len > 0) {
909 q = tok->token_string;
912 this_indent_start = q - 1;
913 while (WHITE_SPACE(d))
915 this_indent_end = q - 1;
916 if (LINE_TERMINATOR(d)) {
917 while (LINE_TERMINATOR(d))
924 while (!LINE_TERMINATOR(d) && !EOF_CHAR(d))
928 cpystr(this_indent_start, this_indent_start + common_indent_len);
929 q -= common_indent_len;
932 while (LINE_TERMINATOR(d))
943 return "invalid source character encountered";
946 } while (!EOF_CHAR(c));
948 return "unexpected end-of-input encountered";
951 // Convert lexed tokens into AST.
952 const char *graphql_parse(struct list_head *tokens, struct graphql_executable_document **doc) {
953 struct list_head used = LIST_HEAD_INIT(used);
954 const char *err = NULL;
955 *doc = parse_executable_document(tokens, &used, &err);
959 // Convert input string into AST.
960 const char *graphql_lexparse(const tal_t *ctx, const char *input, struct list_head **tokens, struct graphql_executable_document **doc) {
961 const char *err = graphql_lex(ctx, input, tokens);
963 err = graphql_parse(*tokens, doc);