X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fedit_distance%2Fedit_distance-params.h;fp=ccan%2Fedit_distance%2Fedit_distance-params.h;h=f4846188656c257049730a02ec3eb2f15964f538;hp=0000000000000000000000000000000000000000;hb=e0663bef2ec73ed33b22e5a029284296495aab87;hpb=214086cfafb1f5bf7785a29f4497f3adf196ed8b diff --git a/ccan/edit_distance/edit_distance-params.h b/ccan/edit_distance/edit_distance-params.h new file mode 100644 index 00000000..f4846188 --- /dev/null +++ b/ccan/edit_distance/edit_distance-params.h @@ -0,0 +1,126 @@ +/** @file + * Defines the tunable parameters for the edit_distance functions which can + * be changed at module-compile-time. + * + * The macros in this header can be redefined, either by editing this file or + * providing external definitions. This makes it possible to calculate the + * weighted edit distance with arbitrary weighting. + * + * @copyright 2016 Kevin Locke + * MIT license - see LICENSE file for details + */ +#ifndef CCAN_EDIT_DISTANCE_PARAMS_H +#define CCAN_EDIT_DISTANCE_PARAMS_H + +#ifndef ed_elem +# include /* memchr */ +#endif + +#ifndef ED_HASH_ELEM +# include /* UCHAR_MAX */ +#endif + +#if !defined(ED_DEL_COST) && \ + !defined(ED_INS_COST) && \ + !defined(ED_SUB_COST) && \ + !defined(ED_TRA_COST) +/** Defined if costs are symmetric. + * (i.e. distance(a, b) == distance(b, a) forall a,b) */ +# define ED_COST_IS_SYMMETRIC +#endif + +#ifndef ED_DEL_COST +/** Cost to delete element @p e. */ +# define ED_DEL_COST(e) 1 +/** Defined if the cost to delete is the same for all elements. */ +# define ED_DEL_COST_CONST +#endif + +#ifndef ED_INS_COST +/** Cost to insert element @p e. */ +# define ED_INS_COST(e) 1 +/** Defined if the cost to insert is the same for all elements. */ +# define ED_INS_COST_CONST +#endif + +#ifndef ED_SUB_COST +/** Cost to substitute element @p f for element @p e (i.e. replace @p e with + * @p f ). + * + * All algorithms which use this macro expect that ED_SUB_COST(e, f) + + * ED_SUB_COST(f, g) >= ED_SUB_COST(e, g). They do not check for any + * substitution chains and will not find minimal distances when this assumption + * does not hold. + * + * Although it is possible to set ED_SUB_COST(e, f) >= ED_DEL_COST(e) + + * ED_INS_COST(f). If this is true for all @p e and @p f, consider + * using edit_distance_lcs() instead of edit_distance_lev() which calculates + * the same result more quickly by not considering substitutions which are + * never less costly than delete + insert. + */ +# define ED_SUB_COST(e, f) 1 +/** Defined if the cost to substitute is the same for all element pairs. */ +# define ED_SUB_COST_CONST +#endif + +#ifndef ED_TRA_COST +/** Cost to transpose elements @p ef to @p fe. + * + * @c edit_distance_dl requires 2 * ED_TRA_COST(e, f) >= ED_INS_COST(g) + + * ED_DEL_COST(h) for all @c e,f,g,h. See Trace Theorem 3 of + * Wagner-Lowrance. @cite Wagner75 + */ +# define ED_TRA_COST(e, f) 1 +/** Defined if the cost to transpose is the same for all element pairs. */ +# define ED_TRA_COST_CONST +#endif + +#if defined(DOXYGEN) +/** Optimized array search function for single-array-heavy workloads. + * + * The expression generated by this macro must evaluate to @c true in a boolean + * context when @p elem is contained in the first @p len elements of @p arr and + * @c false otherwise. @p len is guaranteed to be greater than 0. + * + * This macro is currently only used when #ED_INS_COST_CONST and + * #ED_SUB_COST_CONST are defined. + * + * @note + * Only define this macro when the search function is highly optimized and when + * single-element arrays are expected to be passed frequently, because it adds + * a conditional branch to each execution of the distance functions with + * non-empty arguments. + */ +# define ED_HAS_ELEM(arr, elem, len) (memchr(arr, elem, len) != NULL) +#endif + +#ifndef ED_ELEM_EQUAL +/** Are two elements equal? */ +# define ED_ELEM_EQUAL(elem1, elem2) (elem1 == elem2) +#endif + +#if !defined(ED_HASH_ELEM) && !defined(ed_elem) +/** Hash an element to a unique value in [0, ED_HASH_MAX]. + * + * Note: This module does not use a "real" dictionary. Hash collisions are + * not expected nor supported. If elements can not be uniquely mapped to a + * reasonable range, consider implementing a "real" dictionary. + */ +# define ED_HASH_ELEM(e) ((unsigned char)e) +/** Maximum value that can be returned from #ED_HASH_ELEM */ +# define ED_HASH_MAX UCHAR_MAX +/** Can an array of #ED_HASH_MAX ::ed_dist values be stored on the stack? */ +# define ED_HASH_ON_STACK +#endif + +#ifndef ED_STACK_ELEMS +/** Maximum number of elements that will be allocated on the stack. + * + * Note: The threshold does not need to be tight since with + * ED_STACK_ELEMS > ~100 algorithm cost will dominate @c malloc + * cost anyway. + */ +# define ED_STACK_ELEMS 512 +#endif + +#endif