2 * Main header file for edit_distance which defines the module API.
4 * @copyright 2016 Kevin Locke <kevin@kevinlocke.name>
5 * MIT license - see LICENSE file for details
7 #ifndef CCAN_EDIT_DISTANCE_H
8 #define CCAN_EDIT_DISTANCE_H
10 #include <stddef.h> /* size_t */
14 * ed_dist - Type in which the edit distance is expressed.
16 * The name @c ed_dist can be defined to another algebraic type (or this
17 * typedef can be edited) when compiling this module.
19 typedef unsigned int ed_dist;
24 * ed_elem - Element type of arrays for which the edit distance is calculated.
26 * Changing this type often requires changing #ED_HAS_ELEM, #ED_HASH_ELEM, and
27 * #ED_HASH_MAX. It requires changing #ED_ELEM_EQUAL for non-primitive types.
28 * These can be changed in edit_distance-params.h or defined externally.
35 * ed_size - Type in which the array size is expressed.
37 * The name @c ed_size can be defined to another algebraic type (or this
38 * typedef can be edited) when compiling this module. If @c ed_size is a
39 * signed type, the caller must ensure passed values are non-negative.
40 * @c ed_size must be large enough to hold @c max(slen,tlen)+2, it does not
41 * need to be large enough to hold @c slen*tlen.
43 * Note: @c ed_size is only likely to have a noticeable impact in
44 * edit_distance_dl() which maintains an array of @c #ED_HASH_MAX+1 ::ed_size
47 typedef unsigned int ed_size;
51 * ed_measure - Supported edit distance measures.
55 * Longest Common Subsequence (LCS) distance.
57 * The LCS distance is an edit distance measured by the number of
58 * insert and delete operations necessary to turn @p src into @p tgt.
60 * This implementation uses an iterative version of the Wagner-Fischer
61 * algorithm @cite Wagner74 which requires <code>O(slen * tlen)</code>
62 * time and <code>min(slen, tlen) + 1</code> space.
64 EDIT_DISTANCE_LCS = 1,
66 * Levenshtein distance.
68 * The Levenshtein distance is an edit distance measured by the number
69 * of insert, delete, and substitute operations necessary to turn
70 * @p src into @p tgt as described by Vladimir Levenshtein.
73 * This implementation uses a modified version of the Wagner-Fischer
74 * algorithm @cite Wagner74 which requires <code>O(slen * tlen)</code>
75 * time and only <code>min(slen, tlen) + 1</code> space.
79 * Restricted Damerau-Levenshtein distance (aka Optimal String
80 * Alignment distance).
82 * The Restricted Damerau-Levenshtein distance is an edit distance
83 * measured by the number of insert, delete, substitute, and transpose
84 * operations necessary to turn @p src into @p tgt with the restriction
85 * that no substring is edited more than once (equivalently, that no
86 * edits overlap). @cite Boytsov11
88 * This implementation uses a modified version of the Wagner-Fischer
89 * algorithm @cite Wagner74 which requires <code>O(slen * tlen)</code>
90 * time and only <code>2 * min(slen, tlen) + 2</code> space.
94 * Damerau-Levenshtein distance.
96 * The Damerau-Levenshtein distance is an edit distance measured by the
97 * number of insert, delete, substitute, and transpose operations
98 * necessary to turn @p src into @p tgt . @cite Damerau64
100 * This implementation uses the Lowrance-Wagner algorithm @cite Wagner75
101 * which takes <code>O(slen * tlen)</code> time and
102 * <code>O(slen * tlen)</code> space.
108 * edit_distance - Calculates the edit distance between two arrays.
110 * @param src Source array to calculate distance from.
111 * @param slen Number of elements in @p src to consider.
112 * @param tgt Target array to calculate distance to.
113 * @param tlen Number of elements in @p tgt to consider.
114 * @param measure Edit distance measure to calculate.
115 * @return Edit distance from @p src[0..slen-1] to @p tgt[0..tlen-1]. (i.e.
116 * The minimal sum of the weights of the operations necessary to convert
117 * @p src[0..slen-1] into @p tgt[0..tlen-1].)
121 * const char *source = "kitten";
122 * const char *target = "sitting";
123 * assert(edit_distance(source, strlen(source),
124 * target, strlen(target),
125 * EDIT_DISTANCE_DL) == 3);
126 * Example_End: @endcode
128 ed_dist edit_distance(const ed_elem *src, ed_size slen,
129 const ed_elem *tgt, ed_size tlen,
130 enum ed_measure measure);