tal/str: new module.
authorRusty Russell <rusty@rustcorp.com.au>
Thu, 22 Nov 2012 01:12:22 +0000 (11:42 +1030)
committerRusty Russell <rusty@rustcorp.com.au>
Thu, 22 Nov 2012 01:12:22 +0000 (11:42 +1030)
Tal-enhanced string routines, copied from str_talloc (which I also wrote).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Makefile
Makefile-ccan
ccan/tal/str/LICENSE [new symlink]
ccan/tal/str/_info [new file with mode: 0644]
ccan/tal/str/str.c [new file with mode: 0644]
ccan/tal/str/str.h [new file with mode: 0644]
ccan/tal/str/test/run-strreg.c [new file with mode: 0644]
ccan/tal/str/test/run.c [new file with mode: 0644]

index 1e08982704956b7ba0465c4dd6062f7071534048..b885875a4d6b3fcfa73c1c0b839c0f0db9ceee05 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -59,6 +59,9 @@ summary-fastcheck-%: tools/ccanlint/ccanlint $(OBJFILES)
 summary-fastcheck-antithread/%: tools/ccanlint/ccanlint $(OBJFILES)
        tools/ccanlint/ccanlint -x tests_pass_valgrind -x tests_compile_coverage -s ccan/antithread/$*
 
+summary-fastcheck-tal/%: tools/ccanlint/ccanlint $(OBJFILES)
+       tools/ccanlint/ccanlint -x tests_pass_valgrind -x tests_compile_coverage -s ccan/tal/$*
+
 ccan/%/info: ccan/%/_info
        $(CC) $(CCAN_CFLAGS) -o $@ -x c $<
 
index 6641e53e6bf6c8228840a5a81a2014ca6cefb795..e16c87a8696a0fbaa0e2fe841e7636c1ba8256d8 100644 (file)
@@ -72,6 +72,7 @@ MODS_NORMAL_WITH_SRC := antithread \
        str_talloc \
        take \
        tal \
+       tal/str \
        talloc \
        talloc_link \
        tally \
diff --git a/ccan/tal/str/LICENSE b/ccan/tal/str/LICENSE
new file mode 120000 (symlink)
index 0000000..2b1feca
--- /dev/null
@@ -0,0 +1 @@
+../../../licenses/BSD-MIT
\ No newline at end of file
diff --git a/ccan/tal/str/_info b/ccan/tal/str/_info
new file mode 100644 (file)
index 0000000..63081a1
--- /dev/null
@@ -0,0 +1,52 @@
+#include <stdio.h>
+#include <string.h>
+#include "config.h"
+
+/**
+ * tal/str - string helper routines which use tal
+ *
+ * This is a grab bag of functions for string operations, designed to enhance
+ * the standard string.h; these are separated from the non-tal-needing
+ * string utilities in "str.h".
+ *
+ * Example:
+ *     #include <ccan/tal/str/str.h>
+ *     #include <ccan/grab_file/grab_file.h>
+ *     #include <err.h>
+ *
+ *     // Dumb demo program to double-linespace a file.
+ *     int main(int argc, char *argv[])
+ *     {
+ *             char *textfile;
+ *             char **lines;
+ *
+ *             // Grab lines in file.
+ *             textfile = grab_file(NULL, argv[1], NULL);
+ *             if (!textfile)
+ *                     err(1, "Failed reading %s", argv[1]);
+ *             lines = strsplit(textfile, textfile, "\n", STR_EMPTY_OK);
+ *
+ *             // Join them back together with two linefeeds.
+ *             printf("%s", strjoin(textfile, lines, "\n\n", STR_TRAIL));
+ *
+ *             // Free everything, just because we can.
+ *             tal_free(textfile);
+ *             return 0;
+ *     }
+ *
+ * License: BSD-MIT
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ */
+int main(int argc, char *argv[])
+{
+       if (argc != 2)
+               return 1;
+
+       if (strcmp(argv[1], "depends") == 0) {
+               printf("ccan/str\n");
+               printf("ccan/tal\n");
+               return 0;
+       }
+
+       return 1;
+}
diff --git a/ccan/tal/str/str.c b/ccan/tal/str/str.c
new file mode 100644 (file)
index 0000000..0c07002
--- /dev/null
@@ -0,0 +1,106 @@
+/* Licensed under BSD-MIT - see LICENSE file for details */
+#include <unistd.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "str.h"
+#include <sys/types.h>
+#include <regex.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <ccan/tal/tal.h>
+#include <ccan/str/str.h>
+
+char **strsplit(const void *ctx, const char *string, const char *delims,
+               enum strsplit flags)
+{
+       char **lines = NULL;
+       size_t max = 64, num = 0;
+
+       lines = tal_arr(ctx, char *, max+1);
+
+       if (flags == STR_NO_EMPTY)
+               string += strspn(string, delims);
+
+       while (*string != '\0') {
+               size_t len = strcspn(string, delims), dlen;
+
+               lines[num] = tal_arr(lines, char, len + 1);
+               memcpy(lines[num], string, len);
+               lines[num][len] = '\0';
+               string += len;
+               dlen = strspn(string, delims);
+               if (flags == STR_EMPTY_OK && dlen)
+                       dlen = 1;
+               string += dlen;
+               if (++num == max)
+                       tal_resize(&lines, max*=2 + 1);
+       }
+       lines[num] = NULL;
+       return lines;
+}
+
+char *strjoin(const void *ctx, char *strings[], const char *delim,
+             enum strjoin flags)
+{
+       unsigned int i;
+       char *ret = tal_strdup(ctx, "");
+       size_t totlen = 0, dlen = strlen(delim);
+
+       for (i = 0; strings[i]; i++) {
+               size_t len = strlen(strings[i]);
+               if (flags == STR_NO_TRAIL && !strings[i+1])
+                       dlen = 0;
+               tal_resize(&ret, totlen + len + dlen + 1);
+               memcpy(ret + totlen, strings[i], len);
+               totlen += len;
+               memcpy(ret + totlen, delim, dlen);
+               totlen += dlen;
+       }
+       ret[totlen] = '\0';
+       return ret;
+}
+
+bool strreg(const void *ctx, const char *string, const char *regex, ...)
+{
+       size_t nmatch = 1 + strcount(regex, "(");
+       regmatch_t *matches = tal_arr(ctx, regmatch_t, nmatch);
+       regex_t r;
+       bool ret;
+
+       if (!matches || regcomp(&r, regex, REG_EXTENDED) != 0)
+               return false;
+
+       if (regexec(&r, string, nmatch, matches, 0) == 0) {
+               unsigned int i;
+               va_list ap;
+
+               ret = true;
+               va_start(ap, regex);
+               for (i = 1; i < nmatch; i++) {
+                       char **arg;
+                       arg = va_arg(ap, char **);
+                       if (arg) {
+                               /* eg. ([a-z])? can give "no match". */
+                               if (matches[i].rm_so == -1)
+                                       *arg = NULL;
+                               else {
+                                       *arg = tal_strndup(ctx,
+                                                     string + matches[i].rm_so,
+                                                     matches[i].rm_eo
+                                                     - matches[i].rm_so);
+                                       if (!*arg) {
+                                               ret = false;
+                                               break;
+                                       }
+                               }
+                       }
+               }
+               va_end(ap);
+       } else
+               ret = false;
+       tal_free(matches);
+       regfree(&r);
+       return ret;
+}
diff --git a/ccan/tal/str/str.h b/ccan/tal/str/str.h
new file mode 100644 (file)
index 0000000..30086fd
--- /dev/null
@@ -0,0 +1,119 @@
+/* Licensed under BSD-MIT - see LICENSE file for details */
+#ifndef CCAN_STR_TAL_H
+#define CCAN_STR_TAL_H
+#include <ccan/tal/tal.h>
+#include <ccan/tal/tal.h>
+#include <string.h>
+#include <stdbool.h>
+
+enum strsplit {
+       STR_EMPTY_OK,
+       STR_NO_EMPTY
+};
+
+/**
+ * strsplit - Split string into an array of substrings
+ * @ctx: the parent to tal from (often NULL)
+ * @string: the string to split
+ * @delims: delimiters where lines should be split.
+ * @flags: whether to include empty substrings.
+ *
+ * This function splits a single string into multiple strings.  The
+ * original string is untouched: an array is allocated (using tal)
+ * pointing to copies of each substring.  Multiple delimiters result
+ * in empty substrings.  By definition, no delimiters will appear in
+ * the substrings.
+ *
+ * The final char * in the array will be NULL.
+ *
+ * Example:
+ *     #include <ccan/tal/str/str.h>
+ *     ...
+ *     static unsigned int count_long_lines(const char *string)
+ *     {
+ *             char **lines;
+ *             unsigned int i, long_lines = 0;
+ *
+ *             // Can only fail on out-of-memory.
+ *             lines = strsplit(NULL, string, "\n", STR_NO_EMPTY);
+ *             for (i = 0; lines[i] != NULL; i++)
+ *                     if (strlen(lines[i]) > 80)
+ *                             long_lines++;
+ *             tal_free(lines);
+ *             return long_lines;
+ *     }
+ */
+char **strsplit(const void *ctx, const char *string, const char *delims,
+               enum strsplit flags);
+
+enum strjoin {
+       STR_TRAIL,
+       STR_NO_TRAIL
+};
+
+/**
+ * strjoin - Join an array of substrings into one long string
+ * @ctx: the context to tal from (often NULL)
+ * @strings: the NULL-terminated array of strings to join
+ * @delim: the delimiter to insert between the strings
+ * @flags: whether to add a delimieter to the end
+ *
+ * This function joins an array of strings into a single string.  The
+ * return value is allocated using tal.  Each string in @strings is
+ * followed by a copy of @delim.
+ *
+ * Example:
+ *     // Append the string "--EOL" to each line.
+ *     static char *append_to_all_lines(const char *string)
+ *     {
+ *             char **lines, *ret;
+ *
+ *             lines = strsplit(NULL, string, "\n", STR_EMPTY_OK);
+ *             ret = strjoin(NULL, lines, "-- EOL\n", STR_TRAIL);
+ *             tal_free(lines);
+ *             return ret;
+ *     }
+ */
+char *strjoin(const void *ctx, char *strings[], const char *delim,
+             enum strjoin flags);
+
+/**
+ * strreg - match and extract from a string via (extended) regular expressions.
+ * @ctx: the context to tal from (often NULL)
+ * @string: the string to try to match.
+ * @regex: the regular expression to match.
+ * ...: pointers to strings to allocate for subexpressions.
+ *
+ * Returns true if we matched, in which case any parenthesized
+ * expressions in @regex are allocated and placed in the char **
+ * arguments following @regex.  NULL arguments mean the match is not
+ * saved.  The order of the strings is the order
+ * of opening braces in the expression: in the case of repeated
+ * expressions (eg "([a-z])*") the last one is saved, in the case of
+ * non-existent matches (eg "([a-z]*)?") the pointer is set to NULL.
+ *
+ * Allocation failures or malformed regular expressions return false.
+ *
+ * See Also:
+ *     regcomp(3), regex(3).
+ *
+ * Example:
+ *     // Given 'My name is Rusty' outputs 'Hello Rusty!'
+ *     // Given 'my first name is Rusty Russell' outputs 'Hello Rusty Russell!'
+ *     // Given 'My name isnt Rusty Russell' outputs 'Hello there!'
+ *     int main(int argc, char *argv[])
+ *     {
+ *             char *person, *input;
+ *
+ *             // Join args and trim trailing space.
+ *             input = strjoin(NULL, argv+1, " ", STR_NO_TRAIL);
+ *             if (strreg(NULL, input, "[Mm]y (first )?name is ([A-Za-z ]+)",
+ *                        NULL, &person))
+ *                     printf("Hello %s!\n", person);
+ *             else
+ *                     printf("Hello there!\n");
+ *             return 0;
+ *     }
+ */
+bool strreg(const void *ctx, const char *string, const char *regex, ...);
+#endif /* CCAN_STR_TAL_H */
diff --git a/ccan/tal/str/test/run-strreg.c b/ccan/tal/str/test/run-strreg.c
new file mode 100644 (file)
index 0000000..a2a7dd9
--- /dev/null
@@ -0,0 +1,95 @@
+#include <ccan/tal/str/str.h>
+#include <ccan/tal/str/str.c>
+#include <ccan/tap/tap.h>
+
+static unsigned int tal_total_blocks(tal_t *ctx)
+{
+       unsigned int num = 1;
+       tal_t *i;
+
+       for (i = tal_first(ctx); i; i = tal_next(ctx, i))
+               num++;
+       return num;
+}
+
+static bool find_parent(tal_t *child, tal_t *parent)
+{
+       tal_t *i;
+
+       for (i = child; i; i = tal_parent(i))
+               if (i == parent)
+                       return true;
+
+       return false;
+}
+
+int main(int argc, char *argv[])
+{
+       void *ctx = tal_strdup(NULL, "toplevel");
+       unsigned int top_blocks = tal_total_blocks(ctx);
+       char *a, *b;
+       /* If it accesses this, it will crash. */
+       char **invalid = (char **)1L;
+
+       plan_tests(25);
+       /* Simple matching. */
+       ok1(strreg(ctx, "hello world!", "hello") == true);
+       ok1(strreg(ctx, "hello world!", "hi") == false);
+
+       /* No parentheses means we don't use any extra args. */
+       ok1(strreg(ctx, "hello world!", "hello", invalid) == true);
+       ok1(strreg(ctx, "hello world!", "hi", invalid) == false);
+
+       ok1(strreg(ctx, "hello world!", "[a-z]+", invalid) == true);
+       ok1(strreg(ctx, "hello world!", "([a-z]+)", &a, invalid) == true);
+       /* Found string */
+       ok1(streq(a, "hello"));
+       /* Allocated off ctx */
+       ok1(find_parent(a, ctx));
+       tal_free(a);
+
+       ok1(strreg(ctx, "hello world!", "([a-z]*) ([a-z]+)",
+                  &a, &b, invalid) == true);
+       ok1(streq(a, "hello"));
+       ok1(streq(b, "world"));
+       ok1(find_parent(a, ctx));
+       ok1(find_parent(b, ctx));
+       tal_free(a);
+       tal_free(b);
+
+       /* * after parentheses returns last match. */
+       ok1(strreg(ctx, "hello world!", "([a-z])* ([a-z]+)",
+                  &a, &b, invalid) == true);
+       ok1(streq(a, "o"));
+       ok1(streq(b, "world"));
+       tal_free(a);
+       tal_free(b);
+
+       /* Nested parentheses are ordered by open brace. */
+       ok1(strreg(ctx, "hello world!", "(([a-z]*) world)",
+                  &a, &b, invalid) == true);
+       ok1(streq(a, "hello world"));
+       ok1(streq(b, "hello"));
+       tal_free(a);
+       tal_free(b);
+
+       /* Nested parentheses are ordered by open brace. */
+       ok1(strreg(ctx, "hello world!", "(([a-z]*) world)",
+                  &a, &b, invalid) == true);
+       ok1(streq(a, "hello world"));
+       ok1(streq(b, "hello"));
+       tal_free(a);
+       tal_free(b);
+
+       /* NULL means we're not interested. */
+       ok1(strreg(ctx, "hello world!", "((hello|goodbye) world)",
+                  &a, NULL, invalid) == true);
+       ok1(streq(a, "hello world"));
+       tal_free(a);
+
+       /* No leaks! */
+       ok1(tal_total_blocks(ctx) == top_blocks);
+       tal_free(ctx);
+
+       return exit_status();
+}
diff --git a/ccan/tal/str/test/run.c b/ccan/tal/str/test/run.c
new file mode 100644 (file)
index 0000000..755bcea
--- /dev/null
@@ -0,0 +1,65 @@
+#include <ccan/tal/str/str.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <ccan/tal/str/str.c>
+#include <ccan/tap/tap.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+
+static const char *substrings[]
+= { "far", "bar", "baz", "b", "ba", "z", "ar", NULL };
+
+int main(int argc, char *argv[])
+{
+       char **split, *str;
+       void *ctx;
+
+       plan_tests(24);
+       split = strsplit(NULL, "hello  world", " ", STR_EMPTY_OK);
+       ok1(!strcmp(split[0], "hello"));
+       ok1(!strcmp(split[1], ""));
+       ok1(!strcmp(split[2], "world"));
+       ok1(split[3] == NULL);
+       tal_free(split);
+
+       split = strsplit(NULL, "hello  world", " ", STR_NO_EMPTY);
+       ok1(!strcmp(split[0], "hello"));
+       ok1(!strcmp(split[1], "world"));
+       ok1(split[2] == NULL);
+       tal_free(split);
+
+       split = strsplit(NULL, "  hello  world", " ", STR_NO_EMPTY);
+       ok1(!strcmp(split[0], "hello"));
+       ok1(!strcmp(split[1], "world"));
+       ok1(split[2] == NULL);
+       tal_free(split);
+
+       split = strsplit(NULL, "hello  world", "o ", STR_EMPTY_OK);
+       ok1(!strcmp(split[0], "hell"));
+       ok1(!strcmp(split[1], ""));
+       ok1(!strcmp(split[2], ""));
+       ok1(!strcmp(split[3], "w"));
+       ok1(!strcmp(split[4], "rld"));
+       ok1(split[5] == NULL);
+
+       ctx = split;
+       split = strsplit(ctx, "hello  world", "o ", STR_EMPTY_OK);
+       ok1(tal_parent(split) == ctx);
+       tal_free(ctx);
+
+       str = strjoin(NULL, (char **)substrings, ", ", STR_TRAIL);
+       ok1(!strcmp(str, "far, bar, baz, b, ba, z, ar, "));
+       ctx = str;
+       str = strjoin(ctx, (char **)substrings, "", STR_TRAIL);
+       ok1(!strcmp(str, "farbarbazbbazar"));
+       ok1(tal_parent(str) == ctx);
+       str = strjoin(ctx, (char **)substrings, ", ", STR_NO_TRAIL);
+       ok1(tal_parent(str) == ctx);
+       ok1(!strcmp(str, "far, bar, baz, b, ba, z, ar"));
+       str = strjoin(ctx, (char **)substrings, "", STR_NO_TRAIL);
+       ok1(!strcmp(str, "farbarbazbbazar"));
+       ok1(tal_parent(str) == ctx);
+       tal_free(ctx);
+
+       return exit_status();
+}