From: Kevin Locke Date: Sun, 27 Nov 2016 00:44:01 +0000 (-0700) Subject: edit_distance: Rename ED_STACK_ELEMS ED_STACK_DIST_VALS X-Git-Url: http://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=346058c002415e57c6ed36efcfacb6813150aaa7 edit_distance: Rename ED_STACK_ELEMS ED_STACK_DIST_VALS The previous name was misleading, since it does not define the number of elements (ed_elem) on the stack, but rather the number of distance values (ed_dist). Rename to make this more clear and add more documentation about what it does and how best to define it. Note: This is an API change for custom-compiled versions, but since the module has only been included for a couple days I don't think it's worth a back-compat #ifdef at this point. Signed-off-by: Kevin Locke --- diff --git a/ccan/edit_distance/edit_distance-params.h b/ccan/edit_distance/edit_distance-params.h index f4846188..5893da48 100644 --- a/ccan/edit_distance/edit_distance-params.h +++ b/ccan/edit_distance/edit_distance-params.h @@ -109,18 +109,39 @@ # define ED_HASH_ELEM(e) ((unsigned char)e) /** Maximum value that can be returned from #ED_HASH_ELEM */ # define ED_HASH_MAX UCHAR_MAX -/** Can an array of #ED_HASH_MAX ::ed_dist values be stored on the stack? */ +/** Can an array of #ED_HASH_MAX ::ed_dist values be stored on the stack? + * @see #ED_STACK_DIST_VALS + */ # define ED_HASH_ON_STACK #endif -#ifndef ED_STACK_ELEMS -/** Maximum number of elements that will be allocated on the stack. +#ifndef ED_STACK_DIST_VALS +/** Maximum number of ::ed_dist values that will be allocated on the stack. + * + * The edit distance algorithms which use a dynamic programming (all currently + * supported algorithms) can store intermediate values on the stack to avoid + * the overhead of calling @c malloc. This macro defines the maximum number + * of intermediate distance values which can be stored on the stack for a + * single function call. It should be large enough to cover common cases, + * where possible, and small enough to avoid overflowing the stack or frame + * size limits. The algorithms have the following requirements: * - * Note: The threshold does not need to be tight since with - * ED_STACK_ELEMS > ~100 algorithm cost will dominate @c malloc + * - ed_measure::EDIT_DISTANCE_LCS and ed_measure::EDIT_DISTANCE_LEV require + * @c min(slen,tlen) values when #ED_COST_IS_SYMMETRIC is defined, @c slen + * otherwise. + * - ed_measure::EDIT_DISTANCE_RDL requires @c 2*min(slen,tlen) values when + * #ED_COST_IS_SYMMETRIC is defined, @c 2*slen otherwise. + * - ed_measure::EDIT_DISTANCE_DL requires @c slen*tlen values (in addition to + * the #ED_HASH_MAX values stored on the stack if #ED_HASH_ON_STACK is + * defined). + * + * This value does not need to be a tight bound, since when @c slen*tlen is + * greater than around 10,000 the algorithm cost will dominate the @c malloc * cost anyway. + * + * @see #ED_HASH_ON_STACK */ -# define ED_STACK_ELEMS 512 +# define ED_STACK_DIST_VALS 512 #endif #endif diff --git a/ccan/edit_distance/edit_distance_dl.c b/ccan/edit_distance/edit_distance_dl.c index ed690547..695f50d3 100644 --- a/ccan/edit_distance/edit_distance_dl.c +++ b/ccan/edit_distance/edit_distance_dl.c @@ -15,11 +15,11 @@ ed_dist edit_distance_dl(const ed_elem *src, ed_size slen, { /* Optimization: Avoid malloc when distance matrix can fit on the stack. */ - ed_dist stackdist[ED_STACK_ELEMS]; + ed_dist stackdist[ED_STACK_DIST_VALS]; /* Lowrance-Wagner distance matrix, in row-major order. */ size_t matsize = ((size_t)slen + 2) * (tlen + 2); - ed_dist *distmem = matsize <= ED_STACK_ELEMS ? stackdist : + ed_dist *distmem = matsize <= ED_STACK_DIST_VALS ? stackdist : malloc(matsize * sizeof(ed_dist)); ed_dist *dist = distmem; diff --git a/ccan/edit_distance/edit_distance_lcs.c b/ccan/edit_distance/edit_distance_lcs.c index e612c73c..4bc867e6 100644 --- a/ccan/edit_distance/edit_distance_lcs.c +++ b/ccan/edit_distance/edit_distance_lcs.c @@ -16,10 +16,10 @@ ed_dist edit_distance_lcs(const ed_elem *src, ed_size slen, /* Optimization: Avoid malloc when row of distance matrix can fit on * the stack. */ - ed_dist stackdist[ED_STACK_ELEMS]; + ed_dist stackdist[ED_STACK_DIST_VALS]; /* One row of the Wagner-Fischer distance matrix. */ - ed_dist *dist = slen < ED_STACK_ELEMS ? stackdist : + ed_dist *dist = slen < ED_STACK_DIST_VALS ? stackdist : malloc((slen + 1) * sizeof(ed_dist)); /* Initialize row with cost to delete src[0..i-1] */ diff --git a/ccan/edit_distance/edit_distance_lev.c b/ccan/edit_distance/edit_distance_lev.c index 7638c6dc..fefa9da8 100644 --- a/ccan/edit_distance/edit_distance_lev.c +++ b/ccan/edit_distance/edit_distance_lev.c @@ -16,10 +16,10 @@ ed_dist edit_distance_lev(const ed_elem *src, ed_size slen, /* Optimization: Avoid malloc when row of distance matrix can fit on * the stack. */ - ed_dist stackdist[ED_STACK_ELEMS]; + ed_dist stackdist[ED_STACK_DIST_VALS]; /* One row of the Wagner-Fischer distance matrix. */ - ed_dist *dist = slen < ED_STACK_ELEMS ? stackdist : + ed_dist *dist = slen < ED_STACK_DIST_VALS ? stackdist : malloc((slen + 1) * sizeof(ed_dist)); /* Initialize row with cost to delete src[0..i-1] */ diff --git a/ccan/edit_distance/edit_distance_rdl.c b/ccan/edit_distance/edit_distance_rdl.c index 76ba3ce2..4f098cbe 100644 --- a/ccan/edit_distance/edit_distance_rdl.c +++ b/ccan/edit_distance/edit_distance_rdl.c @@ -16,11 +16,11 @@ ed_dist edit_distance_rdl(const ed_elem *src, ed_size slen, /* Optimization: Avoid malloc when required rows of distance matrix can * fit on the stack. */ - ed_dist stackdist[ED_STACK_ELEMS]; + ed_dist stackdist[ED_STACK_DIST_VALS]; /* Two rows of the Wagner-Fischer distance matrix. */ ed_dist *distmem, *dist, *prevdist; - if (slen < ED_STACK_ELEMS / 2) { + if (slen < ED_STACK_DIST_VALS / 2) { distmem = stackdist; dist = distmem; prevdist = distmem + slen + 1; diff --git a/ccan/edit_distance/test/run-weights.c b/ccan/edit_distance/test/run-weights.c index f6b470d4..5d40b7a7 100644 --- a/ccan/edit_distance/test/run-weights.c +++ b/ccan/edit_distance/test/run-weights.c @@ -224,14 +224,14 @@ static void test_dl(void) static void test_mem_use(void) { char tgt[] = "BC"; - char src[ED_STACK_ELEMS + 1]; - for (size_t i = 0; i < ED_STACK_ELEMS; ++i) { + char src[ED_STACK_DIST_VALS + 1]; + for (size_t i = 0; i < ED_STACK_DIST_VALS; ++i) { src[i] = (char)('A' + (i % 26)); } - src[ED_STACK_ELEMS] = '\0'; + src[ED_STACK_DIST_VALS] = '\0'; for (ed_size tlen = 1; tlen < 3; ++tlen) { - ed_size slen = ED_STACK_ELEMS; + ed_size slen = ED_STACK_DIST_VALS; /* Above threshold, causes allocation */ ok(edit_distance_lcs(src, slen, tgt, tlen) == slen - tlen, "edit_distance_lcs(\"%.3s..., %u, \"%.*s\", %u) == %u",