From d873aaec1339baf45c37db7cbaa2d687656343ba Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 22 Nov 2012 11:42:22 +1030 Subject: [PATCH] tal/str: new module. Tal-enhanced string routines, copied from str_talloc (which I also wrote). Signed-off-by: Rusty Russell --- Makefile | 3 + Makefile-ccan | 1 + ccan/tal/str/LICENSE | 1 + ccan/tal/str/_info | 52 ++++++++++++++ ccan/tal/str/str.c | 106 +++++++++++++++++++++++++++++ ccan/tal/str/str.h | 119 +++++++++++++++++++++++++++++++++ ccan/tal/str/test/run-strreg.c | 95 ++++++++++++++++++++++++++ ccan/tal/str/test/run.c | 65 ++++++++++++++++++ 8 files changed, 442 insertions(+) create mode 120000 ccan/tal/str/LICENSE create mode 100644 ccan/tal/str/_info create mode 100644 ccan/tal/str/str.c create mode 100644 ccan/tal/str/str.h create mode 100644 ccan/tal/str/test/run-strreg.c create mode 100644 ccan/tal/str/test/run.c diff --git a/Makefile b/Makefile index 1e089827..b885875a 100644 --- a/Makefile +++ b/Makefile @@ -59,6 +59,9 @@ summary-fastcheck-%: tools/ccanlint/ccanlint $(OBJFILES) summary-fastcheck-antithread/%: tools/ccanlint/ccanlint $(OBJFILES) tools/ccanlint/ccanlint -x tests_pass_valgrind -x tests_compile_coverage -s ccan/antithread/$* +summary-fastcheck-tal/%: tools/ccanlint/ccanlint $(OBJFILES) + tools/ccanlint/ccanlint -x tests_pass_valgrind -x tests_compile_coverage -s ccan/tal/$* + ccan/%/info: ccan/%/_info $(CC) $(CCAN_CFLAGS) -o $@ -x c $< diff --git a/Makefile-ccan b/Makefile-ccan index 6641e53e..e16c87a8 100644 --- a/Makefile-ccan +++ b/Makefile-ccan @@ -72,6 +72,7 @@ MODS_NORMAL_WITH_SRC := antithread \ str_talloc \ take \ tal \ + tal/str \ talloc \ talloc_link \ tally \ diff --git a/ccan/tal/str/LICENSE b/ccan/tal/str/LICENSE new file mode 120000 index 00000000..2b1feca5 --- /dev/null +++ b/ccan/tal/str/LICENSE @@ -0,0 +1 @@ +../../../licenses/BSD-MIT \ No newline at end of file diff --git a/ccan/tal/str/_info b/ccan/tal/str/_info new file mode 100644 index 00000000..63081a1b --- /dev/null +++ b/ccan/tal/str/_info @@ -0,0 +1,52 @@ +#include +#include +#include "config.h" + +/** + * tal/str - string helper routines which use tal + * + * This is a grab bag of functions for string operations, designed to enhance + * the standard string.h; these are separated from the non-tal-needing + * string utilities in "str.h". + * + * Example: + * #include + * #include + * #include + * + * // Dumb demo program to double-linespace a file. + * int main(int argc, char *argv[]) + * { + * char *textfile; + * char **lines; + * + * // Grab lines in file. + * textfile = grab_file(NULL, argv[1], NULL); + * if (!textfile) + * err(1, "Failed reading %s", argv[1]); + * lines = strsplit(textfile, textfile, "\n", STR_EMPTY_OK); + * + * // Join them back together with two linefeeds. + * printf("%s", strjoin(textfile, lines, "\n\n", STR_TRAIL)); + * + * // Free everything, just because we can. + * tal_free(textfile); + * return 0; + * } + * + * License: BSD-MIT + * Author: Rusty Russell + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/str\n"); + printf("ccan/tal\n"); + return 0; + } + + return 1; +} diff --git a/ccan/tal/str/str.c b/ccan/tal/str/str.c new file mode 100644 index 00000000..0c07002b --- /dev/null +++ b/ccan/tal/str/str.c @@ -0,0 +1,106 @@ +/* Licensed under BSD-MIT - see LICENSE file for details */ +#include +#include +#include +#include +#include +#include "str.h" +#include +#include +#include +#include +#include +#include + +char **strsplit(const void *ctx, const char *string, const char *delims, + enum strsplit flags) +{ + char **lines = NULL; + size_t max = 64, num = 0; + + lines = tal_arr(ctx, char *, max+1); + + if (flags == STR_NO_EMPTY) + string += strspn(string, delims); + + while (*string != '\0') { + size_t len = strcspn(string, delims), dlen; + + lines[num] = tal_arr(lines, char, len + 1); + memcpy(lines[num], string, len); + lines[num][len] = '\0'; + string += len; + dlen = strspn(string, delims); + if (flags == STR_EMPTY_OK && dlen) + dlen = 1; + string += dlen; + if (++num == max) + tal_resize(&lines, max*=2 + 1); + } + lines[num] = NULL; + return lines; +} + +char *strjoin(const void *ctx, char *strings[], const char *delim, + enum strjoin flags) +{ + unsigned int i; + char *ret = tal_strdup(ctx, ""); + size_t totlen = 0, dlen = strlen(delim); + + for (i = 0; strings[i]; i++) { + size_t len = strlen(strings[i]); + if (flags == STR_NO_TRAIL && !strings[i+1]) + dlen = 0; + tal_resize(&ret, totlen + len + dlen + 1); + memcpy(ret + totlen, strings[i], len); + totlen += len; + memcpy(ret + totlen, delim, dlen); + totlen += dlen; + } + ret[totlen] = '\0'; + return ret; +} + +bool strreg(const void *ctx, const char *string, const char *regex, ...) +{ + size_t nmatch = 1 + strcount(regex, "("); + regmatch_t *matches = tal_arr(ctx, regmatch_t, nmatch); + regex_t r; + bool ret; + + if (!matches || regcomp(&r, regex, REG_EXTENDED) != 0) + return false; + + if (regexec(&r, string, nmatch, matches, 0) == 0) { + unsigned int i; + va_list ap; + + ret = true; + va_start(ap, regex); + for (i = 1; i < nmatch; i++) { + char **arg; + arg = va_arg(ap, char **); + if (arg) { + /* eg. ([a-z])? can give "no match". */ + if (matches[i].rm_so == -1) + *arg = NULL; + else { + *arg = tal_strndup(ctx, + string + matches[i].rm_so, + matches[i].rm_eo + - matches[i].rm_so); + if (!*arg) { + ret = false; + break; + } + } + } + } + va_end(ap); + } else + ret = false; + tal_free(matches); + regfree(&r); + return ret; +} diff --git a/ccan/tal/str/str.h b/ccan/tal/str/str.h new file mode 100644 index 00000000..30086fde --- /dev/null +++ b/ccan/tal/str/str.h @@ -0,0 +1,119 @@ +/* Licensed under BSD-MIT - see LICENSE file for details */ +#ifndef CCAN_STR_TAL_H +#define CCAN_STR_TAL_H +#include +#include +#include +#include + +enum strsplit { + STR_EMPTY_OK, + STR_NO_EMPTY +}; + +/** + * strsplit - Split string into an array of substrings + * @ctx: the parent to tal from (often NULL) + * @string: the string to split + * @delims: delimiters where lines should be split. + * @flags: whether to include empty substrings. + * + * This function splits a single string into multiple strings. The + * original string is untouched: an array is allocated (using tal) + * pointing to copies of each substring. Multiple delimiters result + * in empty substrings. By definition, no delimiters will appear in + * the substrings. + * + * The final char * in the array will be NULL. + * + * Example: + * #include + * ... + * static unsigned int count_long_lines(const char *string) + * { + * char **lines; + * unsigned int i, long_lines = 0; + * + * // Can only fail on out-of-memory. + * lines = strsplit(NULL, string, "\n", STR_NO_EMPTY); + * for (i = 0; lines[i] != NULL; i++) + * if (strlen(lines[i]) > 80) + * long_lines++; + * tal_free(lines); + * return long_lines; + * } + */ +char **strsplit(const void *ctx, const char *string, const char *delims, + enum strsplit flags); + +enum strjoin { + STR_TRAIL, + STR_NO_TRAIL +}; + +/** + * strjoin - Join an array of substrings into one long string + * @ctx: the context to tal from (often NULL) + * @strings: the NULL-terminated array of strings to join + * @delim: the delimiter to insert between the strings + * @flags: whether to add a delimieter to the end + * + * This function joins an array of strings into a single string. The + * return value is allocated using tal. Each string in @strings is + * followed by a copy of @delim. + * + * Example: + * // Append the string "--EOL" to each line. + * static char *append_to_all_lines(const char *string) + * { + * char **lines, *ret; + * + * lines = strsplit(NULL, string, "\n", STR_EMPTY_OK); + * ret = strjoin(NULL, lines, "-- EOL\n", STR_TRAIL); + * tal_free(lines); + * return ret; + * } + */ +char *strjoin(const void *ctx, char *strings[], const char *delim, + enum strjoin flags); + +/** + * strreg - match and extract from a string via (extended) regular expressions. + * @ctx: the context to tal from (often NULL) + * @string: the string to try to match. + * @regex: the regular expression to match. + * ...: pointers to strings to allocate for subexpressions. + * + * Returns true if we matched, in which case any parenthesized + * expressions in @regex are allocated and placed in the char ** + * arguments following @regex. NULL arguments mean the match is not + * saved. The order of the strings is the order + * of opening braces in the expression: in the case of repeated + * expressions (eg "([a-z])*") the last one is saved, in the case of + * non-existent matches (eg "([a-z]*)?") the pointer is set to NULL. + * + * Allocation failures or malformed regular expressions return false. + * + * See Also: + * regcomp(3), regex(3). + * + * Example: + * // Given 'My name is Rusty' outputs 'Hello Rusty!' + * // Given 'my first name is Rusty Russell' outputs 'Hello Rusty Russell!' + * // Given 'My name isnt Rusty Russell' outputs 'Hello there!' + * int main(int argc, char *argv[]) + * { + * char *person, *input; + * + * // Join args and trim trailing space. + * input = strjoin(NULL, argv+1, " ", STR_NO_TRAIL); + * if (strreg(NULL, input, "[Mm]y (first )?name is ([A-Za-z ]+)", + * NULL, &person)) + * printf("Hello %s!\n", person); + * else + * printf("Hello there!\n"); + * return 0; + * } + */ +bool strreg(const void *ctx, const char *string, const char *regex, ...); +#endif /* CCAN_STR_TAL_H */ diff --git a/ccan/tal/str/test/run-strreg.c b/ccan/tal/str/test/run-strreg.c new file mode 100644 index 00000000..a2a7dd95 --- /dev/null +++ b/ccan/tal/str/test/run-strreg.c @@ -0,0 +1,95 @@ +#include +#include +#include + +static unsigned int tal_total_blocks(tal_t *ctx) +{ + unsigned int num = 1; + tal_t *i; + + for (i = tal_first(ctx); i; i = tal_next(ctx, i)) + num++; + return num; +} + +static bool find_parent(tal_t *child, tal_t *parent) +{ + tal_t *i; + + for (i = child; i; i = tal_parent(i)) + if (i == parent) + return true; + + return false; +} + +int main(int argc, char *argv[]) +{ + void *ctx = tal_strdup(NULL, "toplevel"); + unsigned int top_blocks = tal_total_blocks(ctx); + char *a, *b; + /* If it accesses this, it will crash. */ + char **invalid = (char **)1L; + + plan_tests(25); + /* Simple matching. */ + ok1(strreg(ctx, "hello world!", "hello") == true); + ok1(strreg(ctx, "hello world!", "hi") == false); + + /* No parentheses means we don't use any extra args. */ + ok1(strreg(ctx, "hello world!", "hello", invalid) == true); + ok1(strreg(ctx, "hello world!", "hi", invalid) == false); + + ok1(strreg(ctx, "hello world!", "[a-z]+", invalid) == true); + ok1(strreg(ctx, "hello world!", "([a-z]+)", &a, invalid) == true); + /* Found string */ + ok1(streq(a, "hello")); + /* Allocated off ctx */ + ok1(find_parent(a, ctx)); + tal_free(a); + + ok1(strreg(ctx, "hello world!", "([a-z]*) ([a-z]+)", + &a, &b, invalid) == true); + ok1(streq(a, "hello")); + ok1(streq(b, "world")); + ok1(find_parent(a, ctx)); + ok1(find_parent(b, ctx)); + tal_free(a); + tal_free(b); + + /* * after parentheses returns last match. */ + ok1(strreg(ctx, "hello world!", "([a-z])* ([a-z]+)", + &a, &b, invalid) == true); + ok1(streq(a, "o")); + ok1(streq(b, "world")); + tal_free(a); + tal_free(b); + + /* Nested parentheses are ordered by open brace. */ + ok1(strreg(ctx, "hello world!", "(([a-z]*) world)", + &a, &b, invalid) == true); + ok1(streq(a, "hello world")); + ok1(streq(b, "hello")); + tal_free(a); + tal_free(b); + + /* Nested parentheses are ordered by open brace. */ + ok1(strreg(ctx, "hello world!", "(([a-z]*) world)", + &a, &b, invalid) == true); + ok1(streq(a, "hello world")); + ok1(streq(b, "hello")); + tal_free(a); + tal_free(b); + + /* NULL means we're not interested. */ + ok1(strreg(ctx, "hello world!", "((hello|goodbye) world)", + &a, NULL, invalid) == true); + ok1(streq(a, "hello world")); + tal_free(a); + + /* No leaks! */ + ok1(tal_total_blocks(ctx) == top_blocks); + tal_free(ctx); + + return exit_status(); +} diff --git a/ccan/tal/str/test/run.c b/ccan/tal/str/test/run.c new file mode 100644 index 00000000..755bceaf --- /dev/null +++ b/ccan/tal/str/test/run.c @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) + +static const char *substrings[] += { "far", "bar", "baz", "b", "ba", "z", "ar", NULL }; + +int main(int argc, char *argv[]) +{ + char **split, *str; + void *ctx; + + plan_tests(24); + split = strsplit(NULL, "hello world", " ", STR_EMPTY_OK); + ok1(!strcmp(split[0], "hello")); + ok1(!strcmp(split[1], "")); + ok1(!strcmp(split[2], "world")); + ok1(split[3] == NULL); + tal_free(split); + + split = strsplit(NULL, "hello world", " ", STR_NO_EMPTY); + ok1(!strcmp(split[0], "hello")); + ok1(!strcmp(split[1], "world")); + ok1(split[2] == NULL); + tal_free(split); + + split = strsplit(NULL, " hello world", " ", STR_NO_EMPTY); + ok1(!strcmp(split[0], "hello")); + ok1(!strcmp(split[1], "world")); + ok1(split[2] == NULL); + tal_free(split); + + split = strsplit(NULL, "hello world", "o ", STR_EMPTY_OK); + ok1(!strcmp(split[0], "hell")); + ok1(!strcmp(split[1], "")); + ok1(!strcmp(split[2], "")); + ok1(!strcmp(split[3], "w")); + ok1(!strcmp(split[4], "rld")); + ok1(split[5] == NULL); + + ctx = split; + split = strsplit(ctx, "hello world", "o ", STR_EMPTY_OK); + ok1(tal_parent(split) == ctx); + tal_free(ctx); + + str = strjoin(NULL, (char **)substrings, ", ", STR_TRAIL); + ok1(!strcmp(str, "far, bar, baz, b, ba, z, ar, ")); + ctx = str; + str = strjoin(ctx, (char **)substrings, "", STR_TRAIL); + ok1(!strcmp(str, "farbarbazbbazar")); + ok1(tal_parent(str) == ctx); + str = strjoin(ctx, (char **)substrings, ", ", STR_NO_TRAIL); + ok1(tal_parent(str) == ctx); + ok1(!strcmp(str, "far, bar, baz, b, ba, z, ar")); + str = strjoin(ctx, (char **)substrings, "", STR_NO_TRAIL); + ok1(!strcmp(str, "farbarbazbbazar")); + ok1(tal_parent(str) == ctx); + tal_free(ctx); + + return exit_status(); +} -- 2.39.2