edit_distance: calculate edit distance between strings
[ccan] / ccan / edit_distance / test / api.c
1 /** @file
2  * Runnable public API tests of the edit_distance module.
3  *
4  * @copyright 2016 Kevin Locke <kevin@kevinlocke.name>
5  *            MIT license - see LICENSE file for details
6  */
7
8 #include <string.h>
9
10 #include <ccan/build_assert/build_assert.h>
11 #include <ccan/edit_distance/edit_distance.h>
12 #include <ccan/tap/tap.h>
13
14 #define edit_distance_lcs_static(arr1, arr2) \
15         edit_distance(arr1, sizeof arr1 - 1, arr2, sizeof arr2 - 1, \
16                         EDIT_DISTANCE_LCS)
17 #define edit_distance_lev_static(arr1, arr2) \
18         edit_distance(arr1, sizeof arr1 - 1, arr2, sizeof arr2 - 1, \
19                         EDIT_DISTANCE_LEV)
20 #define edit_distance_rdl_static(arr1, arr2) \
21         edit_distance(arr1, sizeof arr1 - 1, arr2, sizeof arr2 - 1, \
22                         EDIT_DISTANCE_RDL)
23 #define edit_distance_dl_static(arr1, arr2) \
24         edit_distance(arr1, sizeof arr1 - 1, arr2, sizeof arr2 - 1, \
25                         EDIT_DISTANCE_DL)
26
27 static void test_lcs(void)
28 {
29         /* Trivial cases */
30         ok1(edit_distance_lcs_static("", "") == 0);
31         ok1(edit_distance_lcs_static("a", "") == 1);
32         ok1(edit_distance_lcs_static("", "a") == 1);
33         ok1(edit_distance_lcs_static("a", "a") == 0);
34         ok1(edit_distance_lcs_static("a", "b") == 2);
35         ok1(edit_distance_lcs_static("b", "a") == 2);
36
37         /* Trivial search cases */
38         ok1(edit_distance_lcs_static("a", "bcdef") == 6);
39         ok1(edit_distance_lcs_static("a", "bcadef") == 5);
40         ok1(edit_distance_lcs_static("acdef", "b") == 6);
41         ok1(edit_distance_lcs_static("abcdef", "b") == 5);
42
43         /* Common prefix with single-char distance */
44         ok1(edit_distance_lcs_static("aa", "ab") == 2);
45         ok1(edit_distance_lcs_static("ab", "aa") == 2);
46
47         /* Common suffix with single-char distance */
48         ok1(edit_distance_lcs_static("aa", "ba") == 2);
49         ok1(edit_distance_lcs_static("ba", "aa") == 2);
50
51         /* Non-optimized cases (require Wagner-Fischer matrix) */
52         ok1(edit_distance_lcs_static("ab", "ba") == 2);
53         ok1(edit_distance_lcs_static("abc", "de") == 5);
54         ok1(edit_distance_lcs_static("abc", "def") == 6);
55
56         /* (Restricted) Transposition */
57         ok1(edit_distance_lcs_static("abcd", "ecbf") == 6);
58
59         /* (Unrestricted) Transposition */
60         ok1(edit_distance_lcs_static("ca", "abc") == 3);
61
62         /* Insert + Delete + Sub */
63         ok1(edit_distance_lcs_static("abcde", "xcdef") == 4);
64
65         /* Distance depends on multiple deletions in final row. */
66         ok1(edit_distance_lcs_static("aabcc", "bccdd") == 4);
67
68         /* Distance depends on multiple insertions in final column. */
69         ok1(edit_distance_lcs_static("bccdd", "aabcc") == 4);
70 }
71
72 static void test_lev(void)
73 {
74         /* Trivial cases */
75         ok1(edit_distance_lev_static("", "") == 0);
76         ok1(edit_distance_lev_static("a", "") == 1);
77         ok1(edit_distance_lev_static("", "a") == 1);
78         ok1(edit_distance_lev_static("a", "a") == 0);
79         ok1(edit_distance_lev_static("a", "b") == 1);
80         ok1(edit_distance_lev_static("b", "a") == 1);
81
82         /* Trivial search cases */
83         ok1(edit_distance_lev_static("a", "bcdef") == 5);
84         ok1(edit_distance_lev_static("a", "bcadef") == 5);
85         ok1(edit_distance_lev_static("acdef", "b") == 5);
86         ok1(edit_distance_lev_static("abcdef", "b") == 5);
87
88         /* Common prefix with single-char distance */
89         ok1(edit_distance_lev_static("aa", "ab") == 1);
90         ok1(edit_distance_lev_static("ab", "aa") == 1);
91
92         /* Common suffix with single-char distance */
93         ok1(edit_distance_lev_static("aa", "ba") == 1);
94         ok1(edit_distance_lev_static("ba", "aa") == 1);
95
96         /* Non-optimized cases (require Wagner-Fischer matrix) */
97         ok1(edit_distance_lev_static("ab", "ba") == 2);
98         ok1(edit_distance_lev_static("abc", "de") == 3);
99         ok1(edit_distance_lev_static("abc", "def") == 3);
100
101         /* (Restricted) Transposition */
102         ok1(edit_distance_lev_static("abcd", "ecbf") == 4);
103
104         /* (Unrestricted) Transposition */
105         ok1(edit_distance_lev_static("ca", "abc") == 3);
106
107         /* Insert + Delete + Sub */
108         ok1(edit_distance_lev_static("abcde", "xcdef") == 3);
109
110         /* Distance depends on multiple deletions in final row. */
111         ok1(edit_distance_lev_static("aabcc", "bccdd") == 4);
112
113         /* Distance depends on multiple insertions in final column. */
114         ok1(edit_distance_lev_static("bccdd", "aabcc") == 4);
115 }
116
117 static void test_rdl(void)
118 {
119         /* Trivial cases */
120         ok1(edit_distance_rdl_static("", "") == 0);
121         ok1(edit_distance_rdl_static("a", "") == 1);
122         ok1(edit_distance_rdl_static("", "a") == 1);
123         ok1(edit_distance_rdl_static("a", "a") == 0);
124         ok1(edit_distance_rdl_static("a", "b") == 1);
125         ok1(edit_distance_rdl_static("b", "a") == 1);
126
127         /* Trivial search cases */
128         ok1(edit_distance_rdl_static("a", "bcdef") == 5);
129         ok1(edit_distance_rdl_static("a", "bcadef") == 5);
130         ok1(edit_distance_rdl_static("acdef", "b") == 5);
131         ok1(edit_distance_rdl_static("abcdef", "b") == 5);
132
133         /* Common prefix with single-char distance */
134         ok1(edit_distance_rdl_static("aa", "ab") == 1);
135         ok1(edit_distance_rdl_static("ab", "aa") == 1);
136
137         /* Common suffix with single-char distance */
138         ok1(edit_distance_rdl_static("aa", "ba") == 1);
139         ok1(edit_distance_rdl_static("ba", "aa") == 1);
140
141         /* Non-optimized cases (require Wagner-Fischer matrix) */
142         ok1(edit_distance_rdl_static("ab", "ba") == 1);
143         ok1(edit_distance_rdl_static("abc", "de") == 3);
144         ok1(edit_distance_rdl_static("abc", "def") == 3);
145
146         /* (Restricted) Transposition */
147         ok1(edit_distance_rdl_static("abcd", "ecbf") == 3);
148
149         /* (Unrestricted) Transposition */
150         ok1(edit_distance_rdl_static("ca", "abc") == 3);
151
152         /* Insert + Delete + Sub */
153         ok1(edit_distance_rdl_static("abcde", "xcdef") == 3);
154
155         /* Distance depends on multiple deletions in final row. */
156         ok1(edit_distance_rdl_static("aabcc", "bccdd") == 4);
157
158         /* Distance depends on multiple insertions in final column. */
159         ok1(edit_distance_rdl_static("bccdd", "aabcc") == 4);
160 }
161
162 static void test_dl(void)
163 {
164         /* Trivial cases */
165         ok1(edit_distance_dl_static("", "") == 0);
166         ok1(edit_distance_dl_static("a", "") == 1);
167         ok1(edit_distance_dl_static("", "a") == 1);
168         ok1(edit_distance_dl_static("a", "a") == 0);
169         ok1(edit_distance_dl_static("a", "b") == 1);
170         ok1(edit_distance_dl_static("b", "a") == 1);
171
172         /* Trivial search cases */
173         ok1(edit_distance_dl_static("a", "bcdef") == 5);
174         ok1(edit_distance_dl_static("a", "bcadef") == 5);
175         ok1(edit_distance_dl_static("acdef", "b") == 5);
176         ok1(edit_distance_dl_static("abcdef", "b") == 5);
177
178         /* Common prefix with single-char distance */
179         ok1(edit_distance_dl_static("aa", "ab") == 1);
180         ok1(edit_distance_dl_static("ab", "aa") == 1);
181
182         /* Common suffix with single-char distance */
183         ok1(edit_distance_dl_static("aa", "ba") == 1);
184         ok1(edit_distance_dl_static("ba", "aa") == 1);
185
186         /* Non-optimized cases (require Wagner-Fischer matrix) */
187         ok1(edit_distance_dl_static("ab", "ba") == 1);
188         ok1(edit_distance_dl_static("abc", "de") == 3);
189         ok1(edit_distance_dl_static("abc", "def") == 3);
190
191         /* (Restricted) Transposition */
192         ok1(edit_distance_dl_static("abcd", "ecbf") == 3);
193
194         /* (Unrestricted) Transposition */
195         ok1(edit_distance_dl_static("ca", "abc") == 2);
196
197         /* Insert + Delete + Sub */
198         ok1(edit_distance_dl_static("abcde", "xcdef") == 3);
199
200         /* Distance depends on multiple deletions in final row. */
201         ok1(edit_distance_dl_static("aabcc", "bccdd") == 4);
202
203         /* Distance depends on multiple insertions in final column. */
204         ok1(edit_distance_dl_static("bccdd", "aabcc") == 4);
205 }
206
207 int main(void)
208 {
209         plan_tests(89);
210
211         test_lcs();
212         test_lev();
213         test_rdl();
214         test_dl();
215
216         /* Unsupported edit distance measure */
217         enum ed_measure badmeasure = (enum ed_measure)-1;
218         ok1(edit_distance("ab", 2, "ba", 2, badmeasure) == (ed_dist)-1);
219
220         return exit_status();
221 }