From fa64b4599366818ea546c7db026f37d987d181a8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 8 Jan 2011 13:15:35 +1030 Subject: [PATCH] str_talloc: strreg Useful wrapper for extended POSIX regular expressions. --- ccan/str_talloc/_info | 1 + ccan/str_talloc/str_talloc.c | 48 ++++++++++++++++++++ ccan/str_talloc/str_talloc.h | 43 ++++++++++++++++++ ccan/str_talloc/test/run-strreg.c | 75 +++++++++++++++++++++++++++++++ tools/Makefile | 2 +- 5 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 ccan/str_talloc/test/run-strreg.c diff --git a/ccan/str_talloc/_info b/ccan/str_talloc/_info index 186075f2..f6214ce7 100644 --- a/ccan/str_talloc/_info +++ b/ccan/str_talloc/_info @@ -44,6 +44,7 @@ int main(int argc, char *argv[]) return 1; if (strcmp(argv[1], "depends") == 0) { + printf("ccan/str\n"); printf("ccan/talloc\n"); printf("ccan/noerr\n"); return 0; diff --git a/ccan/str_talloc/str_talloc.c b/ccan/str_talloc/str_talloc.c index d1d2f44d..e2d12df8 100644 --- a/ccan/str_talloc/str_talloc.c +++ b/ccan/str_talloc/str_talloc.c @@ -4,7 +4,12 @@ #include #include #include "str_talloc.h" +#include +#include +#include +#include #include +#include char **strsplit(const void *ctx, const char *string, const char *delims, unsigned int *nump) @@ -41,3 +46,46 @@ char *strjoin(const void *ctx, char *strings[], const char *delim) } return ret; } + +bool strreg(const void *ctx, const char *string, const char *regex, ...) +{ + size_t nmatch = 1 + strcount(regex, "("); + regmatch_t *matches = talloc_array(ctx, regmatch_t, nmatch); + regex_t r; + bool ret; + + if (!matches || regcomp(&r, regex, REG_EXTENDED) != 0) + return false; + + if (regexec(&r, string, nmatch, matches, 0) == 0) { + unsigned int i; + va_list ap; + + ret = true; + va_start(ap, regex); + for (i = 1; i < nmatch; i++) { + char **arg; + arg = va_arg(ap, char **); + if (arg) { + /* eg. ([a-z])? can give "no match". */ + if (matches[i].rm_so == -1) + *arg = NULL; + else { + *arg = talloc_strndup(ctx, + string + matches[i].rm_so, + matches[i].rm_eo + - matches[i].rm_so); + if (!*arg) { + ret = false; + break; + } + } + } + } + va_end(ap); + } else + ret = false; + talloc_free(matches); + regfree(&r); + return ret; +} diff --git a/ccan/str_talloc/str_talloc.h b/ccan/str_talloc/str_talloc.h index 50cea2dc..3c65f9f8 100644 --- a/ccan/str_talloc/str_talloc.h +++ b/ccan/str_talloc/str_talloc.h @@ -63,4 +63,47 @@ char **strsplit(const void *ctx, const char *string, const char *delims, * } */ char *strjoin(const void *ctx, char *strings[], const char *delim); + +/** + * strreg - match and extract from a string via (extended) regular expressions. + * @ctx: the context to tallocate from (often NULL) + * @string: the string to try to match. + * @regex: the regular expression to match. + * ...: pointers to strings to allocate for subexpressions. + * + * Returns true if we matched, in which case any parenthesized + * expressions in @regex are allocated and placed in the char ** + * arguments following @regex. NULL arguments mean the match is not + * saved. The order of the strings is the order + * of opening braces in the expression: in the case of repeated + * expressions (eg "([a-z])*") the last one is saved, in the case of + * non-existent matches (eg "([a-z]*)?") the pointer is set to NULL. + * + * Allocation failures or malformed regular expressions return false. + * + * See Also: + * regcomp(3), regex(3). + * + * Example: + * // Given 'My name is Rusty' outputs 'Hello Rusty!' + * // Given 'my first name is Rusty Russell' outputs 'Hello Rusty Russell!' + * // Given 'My name isnt Rusty Russell' outputs 'Hello there!' + * int main(int argc, char *argv[]) + * { + * char *person, *input; + * + * // Join args and trim trailing space. + * input = strjoin(NULL, argv+1, " "); + * if (strlen(input) != 0) + * input[strlen(input)-1] = '\0'; + * + * if (strreg(NULL, input, "[Mm]y (first )?name is ([A-Za-z ]+)", + * NULL, &person)) + * printf("Hello %s!\n", person); + * else + * printf("Hello there!\n"); + * return 0; + * } + */ +bool strreg(const void *ctx, const char *string, const char *regex, ...); #endif /* CCAN_STR_TALLOC_H */ diff --git a/ccan/str_talloc/test/run-strreg.c b/ccan/str_talloc/test/run-strreg.c new file mode 100644 index 00000000..8cfa2d4c --- /dev/null +++ b/ccan/str_talloc/test/run-strreg.c @@ -0,0 +1,75 @@ +#include +#include +#include + +int main(int argc, char *argv[]) +{ + void *ctx = talloc_init("toplevel"); + unsigned int top_blocks = talloc_total_blocks(ctx); + char *a, *b; + /* If it accesses this, it will crash. */ + char **invalid = (char **)1L; + + plan_tests(25); + /* Simple matching. */ + ok1(strreg(ctx, "hello world!", "hello") == true); + ok1(strreg(ctx, "hello world!", "hi") == false); + + /* No parentheses means we don't use any extra args. */ + ok1(strreg(ctx, "hello world!", "hello", invalid) == true); + ok1(strreg(ctx, "hello world!", "hi", invalid) == false); + + ok1(strreg(ctx, "hello world!", "[a-z]+", invalid) == true); + ok1(strreg(ctx, "hello world!", "([a-z]+)", &a, invalid) == true); + /* Found string */ + ok1(streq(a, "hello")); + /* Allocated off ctx */ + ok1(talloc_find_parent_byname(a, "toplevel") == ctx); + talloc_free(a); + + ok1(strreg(ctx, "hello world!", "([a-z]*) ([a-z]+)", + &a, &b, invalid) == true); + ok1(streq(a, "hello")); + ok1(streq(b, "world")); + ok1(talloc_find_parent_byname(a, "toplevel") == ctx); + ok1(talloc_find_parent_byname(b, "toplevel") == ctx); + talloc_free(a); + talloc_free(b); + + /* * after parentheses returns last match. */ + ok1(strreg(ctx, "hello world!", "([a-z])* ([a-z]+)", + &a, &b, invalid) == true); + ok1(streq(a, "o")); + ok1(streq(b, "world")); + talloc_free(a); + talloc_free(b); + + /* Nested parentheses are ordered by open brace. */ + ok1(strreg(ctx, "hello world!", "(([a-z]*) world)", + &a, &b, invalid) == true); + ok1(streq(a, "hello world")); + ok1(streq(b, "hello")); + talloc_free(a); + talloc_free(b); + + /* Nested parentheses are ordered by open brace. */ + ok1(strreg(ctx, "hello world!", "(([a-z]*) world)", + &a, &b, invalid) == true); + ok1(streq(a, "hello world")); + ok1(streq(b, "hello")); + talloc_free(a); + talloc_free(b); + + /* NULL means we're not interested. */ + ok1(strreg(ctx, "hello world!", "((hello|goodbye) world)", + &a, NULL, invalid) == true); + ok1(streq(a, "hello world")); + talloc_free(a); + + /* No leaks! */ + ok1(talloc_total_blocks(ctx) == top_blocks); + talloc_free(ctx); + talloc_disable_null_tracking(); + + return exit_status(); +} diff --git a/tools/Makefile b/tools/Makefile index d2c18aa1..a981e404 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,6 +1,6 @@ ALL_TOOLS = tools/configurator/configurator tools/ccan_depends tools/doc_extract tools/namespacize tools/ccanlint/ccanlint -DEP_OBJS = tools/depends.o tools/compile.o tools/tools.o ccan/str_talloc/str_talloc.o ccan/grab_file/grab_file.o ccan/talloc/talloc.o ccan/noerr/noerr.o ccan/read_write_all/read_write_all.o +DEP_OBJS = tools/depends.o tools/compile.o tools/tools.o ccan/str_talloc/str_talloc.o ccan/str/str.o ccan/grab_file/grab_file.o ccan/talloc/talloc.o ccan/noerr/noerr.o ccan/read_write_all/read_write_all.o .PHONY: tools tools: $(ALL_TOOLS) -- 2.39.2