2 * Defines Restricted Damerau-Levenshtein distance functions.
4 * @copyright 2016 Kevin Locke <kevin@kevinlocke.name>
5 * MIT license - see LICENSE file for details
7 #include <stdlib.h> /* free, malloc */
9 #include "edit_distance.h"
10 #include "edit_distance-params.h"
11 #include "edit_distance-private.h"
13 ed_dist edit_distance_rdl(const ed_elem *src, ed_size slen,
14 const ed_elem *tgt, ed_size tlen)
16 /* Optimization: Avoid malloc when required rows of distance matrix can
19 ed_dist stackdist[ED_STACK_DIST_VALS];
21 /* Two rows of the Wagner-Fischer distance matrix. */
22 ed_dist *distmem, *dist, *prevdist;
23 if (slen < ED_STACK_DIST_VALS / 2) {
26 prevdist = distmem + slen + 1;
28 distmem = malloc((slen + 1) * sizeof(ed_dist) * 2);
30 prevdist = distmem + slen + 1;
33 /* Initialize row with cost to delete src[0..i-1] */
35 for (ed_size i = 1; i <= slen; ++i) {
36 dist[i] = dist[i - 1] + ED_DEL_COST(src[i - 1]);
39 for (ed_size j = 1; j <= tlen; ++j) {
40 /* Value for dist[j-2][i-1] (two rows up, one col left). */
41 /* Note: dist[0] is not initialized when j == 1, var unused. */
42 ed_dist diagdist1 = prevdist[0];
43 /* Value for dist[j-2][i-2] (two rows up, two cols left).
44 * Initialization value only used to placate GCC. */
45 ed_dist diagdist2 = 0;
47 ED_SWAP(dist, prevdist, ed_dist *);
49 dist[0] = prevdist[0] + ED_INS_COST(tgt[j - 1]);
51 /* Loop invariant: dist[i] is the edit distance between first j
52 * elements of tgt and first i elements of src.
54 for (ed_size i = 1; i <= slen; ++i) {
55 ed_dist nextdiagdist = dist[i];
57 if (ED_ELEM_EQUAL(src[i - 1], tgt[j - 1])) {
58 /* Same as tgt upto j-2, src upto i-2. */
59 dist[i] = prevdist[i - 1];
61 /* Insertion is tgt upto j-2, src upto i-1
62 * + insert tgt[j-1] */
64 prevdist[i] + ED_INS_COST(tgt[j - 1]);
66 /* Deletion is tgt upto j-1, src upto i-2
67 * + delete src[i-1] */
69 dist[i - 1] + ED_DEL_COST(src[i - 1]);
71 /* Substitution is tgt upto j-2, src upto i-2
72 * + substitute tgt[j-1] for src[i-1] */
73 ed_dist subdist = prevdist[i - 1] +
74 ED_SUB_COST(src[i - 1], tgt[j - 1]);
76 /* Use best distance available */
77 dist[i] = ED_MIN3(insdist, deldist, subdist);
80 ED_ELEM_EQUAL(src[i - 2], tgt[j - 1]) &&
81 ED_ELEM_EQUAL(src[i - 1], tgt[j - 2])) {
82 ed_dist tradist = diagdist2 +
83 ED_TRA_COST(src[j - 2], src[j - 1]);
84 dist[i] = ED_MIN2(dist[i], tradist);
88 diagdist2 = diagdist1;
89 diagdist1 = nextdiagdist;
93 ed_dist total = dist[slen];
94 if (distmem != stackdist) {