]> git.ozlabs.org Git - ccan/blob - ccan/edit_distance/edit_distance_rdl.c
base64: fix for unsigned chars (e.g. ARM).
[ccan] / ccan / edit_distance / edit_distance_rdl.c
1 /** @file
2  * Defines Restricted Damerau-Levenshtein distance functions.
3  *
4  * @copyright 2016 Kevin Locke <kevin@kevinlocke.name>
5  *            MIT license - see LICENSE file for details
6  */
7 #include <stdlib.h>             /* free, malloc */
8
9 #include "edit_distance.h"
10 #include "edit_distance-params.h"
11 #include "edit_distance-private.h"
12
13 ed_dist edit_distance_rdl(const ed_elem *src, ed_size slen,
14                           const ed_elem *tgt, ed_size tlen)
15 {
16         ed_size i, j;
17
18         /* Optimization: Avoid malloc when required rows of distance matrix can
19          * fit on the stack.
20          */
21         ed_dist stackdist[ED_STACK_DIST_VALS];
22
23         /* Two rows of the Wagner-Fischer distance matrix. */
24         ed_dist *distmem, *dist, *prevdist;
25         if (slen < ED_STACK_DIST_VALS / 2) {
26                 distmem = stackdist;
27                 dist = distmem;
28                 prevdist = distmem + slen + 1;
29         } else {
30                 distmem = malloc((slen + 1) * sizeof(ed_dist) * 2);
31                 dist = distmem;
32                 prevdist = distmem + slen + 1;
33         }
34
35         /* Initialize row with cost to delete src[0..i-1] */
36         dist[0] = 0;
37         for (i = 1; i <= slen; ++i) {
38                 dist[i] = dist[i - 1] + ED_DEL_COST(src[i - 1]);
39         }
40
41         for (j = 1; j <= tlen; ++j) {
42                 /* Value for dist[j-2][i-1] (two rows up, one col left). */
43                 /* Note: dist[0] is not initialized when j == 1, var unused. */
44                 ed_dist diagdist1 = prevdist[0];
45                 /* Value for dist[j-2][i-2] (two rows up, two cols left).
46                  * Initialization value only used to placate GCC. */
47                 ed_dist diagdist2 = 0;
48
49                 ED_SWAP(dist, prevdist, ed_dist *);
50
51                 dist[0] = prevdist[0] + ED_INS_COST(tgt[j - 1]);
52
53                 /* Loop invariant: dist[i] is the edit distance between first j
54                  * elements of tgt and first i elements of src.
55                  */
56                 for (i = 1; i <= slen; ++i) {
57                         ed_dist nextdiagdist = dist[i];
58
59                         if (ED_ELEM_EQUAL(src[i - 1], tgt[j - 1])) {
60                                 /* Same as tgt upto j-2, src upto i-2. */
61                                 dist[i] = prevdist[i - 1];
62                         } else {
63                                 /* Insertion is tgt upto j-2, src upto i-1
64                                  * + insert tgt[j-1] */
65                                 ed_dist insdist =
66                                     prevdist[i] + ED_INS_COST(tgt[j - 1]);
67
68                                 /* Deletion is tgt upto j-1, src upto i-2
69                                  * + delete src[i-1] */
70                                 ed_dist deldist =
71                                     dist[i - 1] + ED_DEL_COST(src[i - 1]);
72
73                                 /* Substitution is tgt upto j-2, src upto i-2
74                                  * + substitute tgt[j-1] for src[i-1] */
75                                 ed_dist subdist = prevdist[i - 1] +
76                                     ED_SUB_COST(src[i - 1], tgt[j - 1]);
77
78                                 /* Use best distance available */
79                                 dist[i] = ED_MIN3(insdist, deldist, subdist);
80
81                                 if (j > 1 && i > 1 &&
82                                     ED_ELEM_EQUAL(src[i - 2], tgt[j - 1]) &&
83                                     ED_ELEM_EQUAL(src[i - 1], tgt[j - 2])) {
84                                         ed_dist tradist = diagdist2 +
85                                             ED_TRA_COST(src[j - 2], src[j - 1]);
86                                         dist[i] = ED_MIN2(dist[i], tradist);
87                                 }
88                         }
89
90                         diagdist2 = diagdist1;
91                         diagdist1 = nextdiagdist;
92                 }
93         }
94
95         ed_dist total = dist[slen];
96         if (distmem != stackdist) {
97                 free(distmem);
98         }
99         return total;
100 }