2 * Defines Levenshtein distance functions.
4 * @copyright 2016 Kevin Locke <kevin@kevinlocke.name>
5 * MIT license - see LICENSE file for details
7 #include <stdlib.h> /* free, malloc */
9 #include "edit_distance.h"
10 #include "edit_distance-params.h"
11 #include "edit_distance-private.h"
13 ed_dist edit_distance_lev(const ed_elem *src, ed_size slen,
14 const ed_elem *tgt, ed_size tlen)
16 /* Optimization: Avoid malloc when row of distance matrix can fit on
19 ed_dist stackdist[ED_STACK_DIST_VALS];
21 /* One row of the Wagner-Fischer distance matrix. */
22 ed_dist *dist = slen < ED_STACK_DIST_VALS ? stackdist :
23 malloc((slen + 1) * sizeof(ed_dist));
25 /* Initialize row with cost to delete src[0..i-1] */
27 for (ed_size i = 1; i <= slen; ++i) {
28 dist[i] = dist[i - 1] + ED_DEL_COST(src[i - 1]);
31 for (ed_size j = 1; j <= tlen; ++j) {
32 /* Value for dist[j-1][i-1] (one row up, one col left). */
33 ed_dist diagdist = dist[0];
34 dist[0] = dist[0] + ED_INS_COST(tgt[j - 1]);
36 /* Loop invariant: dist[i] is the edit distance between first j
37 * elements of tgt and first i elements of src.
39 for (ed_size i = 1; i <= slen; ++i) {
40 ed_dist nextdiagdist = dist[i];
42 if (ED_ELEM_EQUAL(src[i - 1], tgt[j - 1])) {
43 /* Same as tgt upto j-2, src upto i-2. */
46 /* Insertion is tgt upto j-2, src upto i-1
47 * + insert tgt[j-1] */
49 dist[i] + ED_INS_COST(tgt[j - 1]);
51 /* Deletion is tgt upto j-1, src upto i-2
52 * + delete src[i-1] */
54 dist[i - 1] + ED_DEL_COST(src[i - 1]);
56 /* Substitution is tgt upto j-2, src upto i-2
57 * + substitute tgt[j-1] for src[i-1] */
58 ed_dist subdist = diagdist +
59 ED_SUB_COST(src[i - 1], tgt[j - 1]);
61 /* Use best distance available */
62 dist[i] = ED_MIN3(insdist, deldist, subdist);
65 diagdist = nextdiagdist;
69 ed_dist total = dist[slen];
70 if (dist != stackdist) {