]> git.ozlabs.org Git - ccan/blob - ccan/edit_distance/test/run-weights.c
f6b470d44f1f5842853d8d13d1d98836f3435335
[ccan] / ccan / edit_distance / test / run-weights.c
1 /** @file
2  * Runnable tests for the edit_distance module using custom costs/weights.
3  *
4  * @copyright 2016 Kevin Locke <kevin@kevinlocke.name>
5  *            MIT license - see LICENSE file for details
6  */
7
8 #include <string.h>
9
10 #include <ccan/build_assert/build_assert.h>
11 #include <ccan/tap/tap.h>
12
13 #define ED_DEL_COST(e) (e == 'a' ? 2 : 1)
14 #define ED_INS_COST(e) (e == 'b' ? 2 : 1)
15 #define ED_SUB_COST(e, f) (f == 'c' && e == 'd' ? 3 : 1)
16 #define ED_TRA_COST(e, f) (e == 'e' && f == 'f' ? 3 : 1)
17
18 #include <ccan/edit_distance/edit_distance.c>
19 #include <ccan/edit_distance/edit_distance_dl.c>
20 #include <ccan/edit_distance/edit_distance_lcs.c>
21 #include <ccan/edit_distance/edit_distance_lev.c>
22 #include <ccan/edit_distance/edit_distance_rdl.c>
23
24 #define edit_distance_lcs_static(arr1, arr2) \
25         edit_distance(arr1, sizeof arr1 - 1, arr2, sizeof arr2 - 1, \
26                         EDIT_DISTANCE_LCS)
27 #define edit_distance_lev_static(arr1, arr2) \
28         edit_distance(arr1, sizeof arr1 - 1, arr2, sizeof arr2 - 1, \
29                         EDIT_DISTANCE_LEV)
30 #define edit_distance_rdl_static(arr1, arr2) \
31         edit_distance(arr1, sizeof arr1 - 1, arr2, sizeof arr2 - 1, \
32                         EDIT_DISTANCE_RDL)
33 #define edit_distance_dl_static(arr1, arr2) \
34         edit_distance(arr1, sizeof arr1 - 1, arr2, sizeof arr2 - 1, \
35                         EDIT_DISTANCE_DL)
36
37 static void test_lcs(void)
38 {
39         /* Trivial cases */
40         ok1(edit_distance_lcs_static("", "") == 0);
41         ok1(edit_distance_lcs_static("a", "") == 2);
42         ok1(edit_distance_lcs_static("", "a") == 1);
43         ok1(edit_distance_lcs_static("a", "a") == 0);
44         ok1(edit_distance_lcs_static("a", "b") == 4);
45         ok1(edit_distance_lcs_static("b", "a") == 2);
46
47         /* Trivial search cases */
48         ok1(edit_distance_lcs_static("a", "bcdef") == 8);
49         ok1(edit_distance_lcs_static("a", "bcadef") == 6);
50         ok1(edit_distance_lcs_static("acdef", "b") == 8);
51         ok1(edit_distance_lcs_static("abcdef", "b") == 6);
52
53         /* Common prefix with single-char distance */
54         ok1(edit_distance_lcs_static("aa", "ab") == 4);
55         ok1(edit_distance_lcs_static("ab", "aa") == 2);
56
57         /* Common suffix with single-char distance */
58         ok1(edit_distance_lcs_static("aa", "ba") == 4);
59         ok1(edit_distance_lcs_static("ba", "aa") == 2);
60
61         /* Non-optimized cases (require Wagner-Fischer matrix) */
62         ok1(edit_distance_lcs_static("ab", "ba") == 3);
63         ok1(edit_distance_lcs_static("abc", "de") == 6);
64         ok1(edit_distance_lcs_static("abc", "def") == 7);
65         ok1(edit_distance_lcs_static("de", "bdef") == 3);
66
67         /* Transposition + Insert */
68         ok1(edit_distance_lcs_static("ca", "abc") == 4);
69
70         /* Insert + Delete + Sub */
71         ok1(edit_distance_lcs_static("abcde", "xcdef") == 5);
72
73         /* Distance depends on multiple deletions in final row. */
74         ok1(edit_distance_lcs_static("aabcc", "bccdd") == 6);
75
76         /* Distance depends on multiple insertions in final column. */
77         ok1(edit_distance_lcs_static("bccdd", "aabcc") == 4);
78 }
79
80 static void test_lev(void)
81 {
82         /* Trivial cases */
83         ok1(edit_distance_lev_static("", "") == 0);
84         ok1(edit_distance_lev_static("a", "") == 2);
85         ok1(edit_distance_lev_static("", "a") == 1);
86         ok1(edit_distance_lev_static("a", "a") == 0);
87         ok1(edit_distance_lev_static("a", "b") == 1);
88         ok1(edit_distance_lev_static("b", "a") == 1);
89
90         /* Trivial search cases */
91         ok1(edit_distance_lev_static("a", "bcdef") == 5);
92         ok1(edit_distance_lev_static("a", "bcadef") == 6);
93         ok1(edit_distance_lev_static("acdef", "b") == 5);
94         ok1(edit_distance_lev_static("abcdef", "b") == 6);
95
96         /* Common prefix with single-char distance */
97         ok1(edit_distance_lev_static("aa", "ab") == 1);
98         ok1(edit_distance_lev_static("ab", "aa") == 1);
99
100         /* Common suffix with single-char distance */
101         ok1(edit_distance_lev_static("aa", "ba") == 1);
102         ok1(edit_distance_lev_static("ba", "aa") == 1);
103
104         /* Non-optimized cases (require Wagner-Fischer matrix) */
105         ok1(edit_distance_lev_static("ab", "ba") == 2);
106         ok1(edit_distance_lev_static("abc", "de") == 3);
107         ok1(edit_distance_lev_static("abc", "def") == 3);
108         ok1(edit_distance_lev_static("de", "bdef") == 3);
109
110         /* Transposition + Insert */
111         ok1(edit_distance_lev_static("ca", "abc") == 3);
112
113         /* Insert + Delete + Sub */
114         ok1(edit_distance_lev_static("abcde", "xcdef") == 3);
115
116         /* Distance depends on multiple deletions in final row. */
117         ok1(edit_distance_lev_static("aabcc", "bccdd") == 5);
118
119         /* Distance depends on multiple insertions in final column. */
120         ok1(edit_distance_lev_static("bccdd", "aabcc") == 4);
121 }
122
123 static void test_rdl(void)
124 {
125         /* Trivial cases */
126         ok1(edit_distance_rdl_static("", "") == 0);
127         ok1(edit_distance_rdl_static("a", "") == 2);
128         ok1(edit_distance_rdl_static("", "a") == 1);
129         ok1(edit_distance_rdl_static("a", "a") == 0);
130         ok1(edit_distance_rdl_static("a", "b") == 1);
131         ok1(edit_distance_rdl_static("b", "a") == 1);
132
133         /* Trivial search cases */
134         ok1(edit_distance_rdl_static("a", "bcdef") == 5);
135         ok1(edit_distance_rdl_static("a", "bcadef") == 6);
136         ok1(edit_distance_rdl_static("acdef", "b") == 5);
137         ok1(edit_distance_rdl_static("abcdef", "b") == 6);
138
139         /* Common prefix with single-char distance */
140         ok1(edit_distance_rdl_static("aa", "ab") == 1);
141         ok1(edit_distance_rdl_static("ab", "aa") == 1);
142
143         /* Common suffix with single-char distance */
144         ok1(edit_distance_rdl_static("aa", "ba") == 1);
145         ok1(edit_distance_rdl_static("ba", "aa") == 1);
146
147         /* Non-optimized cases (require Wagner-Fischer matrix) */
148         ok1(edit_distance_rdl_static("ab", "ba") == 1);
149         ok1(edit_distance_rdl_static("abc", "de") == 3);
150         ok1(edit_distance_rdl_static("abc", "def") == 3);
151         ok1(edit_distance_rdl_static("de", "bdef") == 3);
152
153         /* Transposition + Insert */
154         ok1(edit_distance_rdl_static("ca", "abc") == 3);
155
156         /* Transpose Weight */
157         ok1(edit_distance_rdl_static("ef", "fe") == 2);
158         ok1(edit_distance_rdl_static("fe", "ef") == 1);
159
160         /* Insert + Delete + Sub */
161         ok1(edit_distance_rdl_static("abcde", "xcdef") == 3);
162
163         /* Distance depends on multiple deletions in final row. */
164         ok1(edit_distance_rdl_static("aabcc", "bccdd") == 5);
165
166         /* Distance depends on multiple insertions in final column. */
167         ok1(edit_distance_rdl_static("bccdd", "aabcc") == 4);
168 }
169
170 static void test_dl(void)
171 {
172         /* Trivial cases */
173         ok1(edit_distance_dl_static("", "") == 0);
174         ok1(edit_distance_dl_static("a", "") == 2);
175         ok1(edit_distance_dl_static("", "a") == 1);
176         ok1(edit_distance_dl_static("a", "a") == 0);
177         ok1(edit_distance_dl_static("a", "b") == 1);
178         ok1(edit_distance_dl_static("b", "a") == 1);
179
180         /* Trivial search cases */
181         ok1(edit_distance_dl_static("a", "bcdef") == 5);
182         ok1(edit_distance_dl_static("a", "bcadef") == 6);
183         ok1(edit_distance_dl_static("acdef", "b") == 5);
184         ok1(edit_distance_dl_static("abcdef", "b") == 6);
185
186         /* Common prefix with single-char distance */
187         ok1(edit_distance_dl_static("aa", "ab") == 1);
188         ok1(edit_distance_dl_static("ab", "aa") == 1);
189
190         /* Common suffix with single-char distance */
191         ok1(edit_distance_dl_static("aa", "ba") == 1);
192         ok1(edit_distance_dl_static("ba", "aa") == 1);
193
194         /* Non-optimized cases (require Wagner-Fischer matrix) */
195         ok1(edit_distance_dl_static("ab", "ba") == 1);
196         ok1(edit_distance_dl_static("abc", "de") == 3);
197         ok1(edit_distance_dl_static("abc", "def") == 3);
198         ok1(edit_distance_dl_static("de", "bdef") == 3);
199
200         /* Transposition + Insert */
201         ok1(edit_distance_dl_static("ca", "abc") == 3);
202
203         /* Transpose Weight */
204         ok1(edit_distance_dl_static("ef", "fe") == 2);
205         ok1(edit_distance_dl_static("fe", "ef") == 1);
206
207         /* Insert + Delete + Sub */
208         ok1(edit_distance_dl_static("abcde", "xcdef") == 3);
209
210         /* Distance depends on multiple deletions in final row. */
211         ok1(edit_distance_dl_static("aabcc", "bccdd") == 5);
212
213         /* Distance depends on multiple insertions in final column. */
214         ok1(edit_distance_dl_static("bccdd", "aabcc") == 4);
215 }
216
217 /* Test edit_distance calculation around the stack threshold to ensure memory
218  * is allocated and freed correctly and stack overflow does not occur.
219  *
220  * Note:  This test is done when ED_COST_IS_SYMMETRIC is not defined so that
221  * tgt can be small to make the test run quickly (with ED_COST_IS_SYMMETRIC the
222  * min length would need to be above the threshold).
223  */
224 static void test_mem_use(void)
225 {
226         char tgt[] = "BC";
227         char src[ED_STACK_ELEMS + 1];
228         for (size_t i = 0; i < ED_STACK_ELEMS; ++i) {
229                 src[i] = (char)('A' + (i % 26));
230         }
231         src[ED_STACK_ELEMS] = '\0';
232
233         for (ed_size tlen = 1; tlen < 3; ++tlen) {
234                 ed_size slen = ED_STACK_ELEMS;
235                 /* Above threshold, causes allocation */
236                 ok(edit_distance_lcs(src, slen, tgt, tlen) == slen - tlen,
237                    "edit_distance_lcs(\"%.3s..., %u, \"%.*s\", %u) == %u",
238                    src, slen, (int)tlen, tgt, tlen, slen - tlen);
239                 ok(edit_distance_lev(src, slen, tgt, tlen) == slen - tlen,
240                    "edit_distance_lev(\"%.3s..., %u, \"%.*s\", %u) == %u",
241                    src, slen, (int)tlen, tgt, tlen, slen - tlen);
242                 ok(edit_distance_rdl(src, slen, tgt, tlen) == slen - tlen,
243                    "edit_distance_rdl(\"%.3s..., %u, \"%.*s\", %u) == %u",
244                    src, slen, (int)tlen, tgt, tlen, slen - tlen);
245                 ok(edit_distance_dl(src, slen, tgt, tlen) == slen - tlen,
246                    "edit_distance_dl(\"%.3s..., %u, \"%.*s\", %u) == %u",
247                    src, slen, (int)tlen, tgt, tlen, slen - tlen);
248
249                 /* Below threshold, no allocation */
250                 --slen;
251                 ok(edit_distance_lcs(src, slen, tgt, tlen) == slen - tlen,
252                    "edit_distance_lcs(\"%.3s..., %u, \"%.*s\", %u) == %u",
253                    src, slen, (int)tlen, tgt, tlen, slen - tlen);
254                 ok(edit_distance_lev(src, slen, tgt, tlen) == slen - tlen,
255                    "edit_distance_lev(\"%.3s..., %u, \"%.*s\", %u) == %u",
256                    src, slen, (int)tlen, tgt, tlen, slen - tlen);
257                 ok(edit_distance_rdl(src, slen, tgt, tlen) == slen - tlen,
258                    "edit_distance_rdl(\"%.3s..., %u, \"%.*s\", %u) == %u",
259                    src, slen, (int)tlen, tgt, tlen, slen - tlen);
260                 ok(edit_distance_dl(src, slen, tgt, tlen) == slen - tlen,
261                    "edit_distance_dl(\"%.3s..., %u, \"%.*s\", %u) == %u",
262                    src, slen, (int)tlen, tgt, tlen, slen - tlen);
263         }
264 }
265
266 int main(void)
267 {
268         plan_tests(109);
269
270         test_lcs();
271         test_lev();
272         test_rdl();
273         test_dl();
274
275         test_mem_use();
276
277         /* Unsupported edit distance measure */
278         enum ed_measure badmeasure = (enum ed_measure)-1;
279         ok1(edit_distance("ab", 2, "ba", 2, badmeasure) == (ed_dist)-1);
280
281         return exit_status();
282 }