]> git.ozlabs.org Git - ccan/blob - ccan/edit_distance/edit_distance.c
edit_distance: calculate edit distance between strings
[ccan] / ccan / edit_distance / edit_distance.c
1 /** @file
2  * Defines shared edit distance functions.
3  *
4  * @copyright 2016 Kevin Locke <kevin@kevinlocke.name>
5  *            MIT license - see LICENSE file for details
6  */
7 #include "edit_distance-params.h"
8 #include "edit_distance-private.h"
9
10 ed_dist edit_distance(const ed_elem *src, ed_size slen,
11                       const ed_elem *tgt, ed_size tlen, enum ed_measure measure)
12 {
13         /* Remove common prefix. */
14         while (slen > 0 && tlen > 0 && ED_ELEM_EQUAL(src[0], tgt[0])) {
15                 ++src;
16                 ++tgt;
17                 --slen;
18                 --tlen;
19         }
20
21         /* Remove common suffix. */
22         while (slen > 0 && tlen > 0 &&
23                ED_ELEM_EQUAL(src[slen - 1], tgt[tlen - 1])) {
24                 --slen;
25                 --tlen;
26         }
27
28 #if defined(ED_COST_IS_SYMMETRIC)
29         /* Use smaller array for src. */
30         if (slen > tlen) {
31                 ED_SWAP(src, tgt, const ed_elem *);
32                 ED_SWAP(slen, tlen, ed_size);
33         }
34 #endif
35
36         /* Early return when all insertions. */
37         if (slen == 0) {
38 #ifdef ED_INS_COST_CONST
39                 return (ed_dist)tlen *ED_INS_COST();
40 #else
41                 ed_dist result = 0;
42                 for (ed_size i = 0; i < tlen; ++i) {
43                         result += ED_INS_COST(tgt[i]);
44                 }
45                 return result;
46 #endif
47         }
48 #ifndef ED_COST_IS_SYMMETRIC
49         /* Early return when all deletions. */
50         if (tlen == 0) {
51 # ifdef ED_DEL_COST_CONST
52                 return (ed_dist)slen *ED_DEL_COST();
53 # else
54                 ed_dist result = 0;
55                 for (ed_size i = 0; i < slen; ++i) {
56                         result += ED_DEL_COST(src[i]);
57                 }
58                 return result;
59 # endif
60         }
61 #endif
62
63 #if defined(ED_HAS_ELEM) && \
64                 defined(ED_INS_COST_CONST) && \
65                 defined(ED_SUB_COST_CONST)
66         /* Fast search for single-element source. */
67         if (slen == 1) {
68                 /* Always tlen - 1 inserts (of non-src elements). */
69                 ed_dist result = (ed_dist)(tlen - 1) * ED_INS_COST();
70                 if (!ED_HAS_ELEM(tgt, src[0], tlen)) {
71                         if (measure == EDIT_DISTANCE_LCS) {
72                                 /* If src is not present, delete + insert. */
73                                 result += ED_DEL_COST(src[0]) + ED_INS_COST();
74                         } else {
75                                 /* If src is not present, substitute it out. */
76                                 result += ED_SUB_COST(,);
77                         }
78                 }
79                 return result;
80         }
81 #endif
82
83 #if !defined(ED_COST_IS_SYMMETRIC) && \
84                 defined(ED_HAS_ELEM) && \
85                 defined(ED_DEL_COST_CONST) && \
86                 defined(ED_SUB_COST_CONST)
87         /* Fast search for single-element target. */
88         if (tlen == 1) {
89                 /* Always slen - 1 deletes (of non-tgt elements). */
90                 ed_dist result = (ed_dist)(slen - 1) * ED_DEL_COST();
91                 if (!ED_HAS_ELEM(src, tgt[0], slen)) {
92                         if (measure == EDIT_DISTANCE_LCS) {
93                                 /* If tgt is not present, delete + insert. */
94                                 result += ED_INS_COST(tgt[0]) + ED_DEL_COST();
95                         } else {
96                                 /* If tgt is not present, substitute it out. */
97                                 result += ED_SUB_COST(,);
98                         }
99                 }
100                 return result;
101         }
102 #endif
103
104         switch (measure) {
105         case EDIT_DISTANCE_LCS:
106                 return edit_distance_lcs(src, slen, tgt, tlen);
107         case EDIT_DISTANCE_LEV:
108                 return edit_distance_lev(src, slen, tgt, tlen);
109         case EDIT_DISTANCE_RDL:
110                 return edit_distance_rdl(src, slen, tgt, tlen);
111         case EDIT_DISTANCE_DL:
112                 return edit_distance_dl(src, slen, tgt, tlen);
113         }
114
115         return (ed_dist)-1;
116 }