]> git.ozlabs.org Git - ccan/blob - ccan/edit_distance/edit_distance_lcs.c
base64: fix for unsigned chars (e.g. ARM).
[ccan] / ccan / edit_distance / edit_distance_lcs.c
1 /** @file
2  * Defines Longest Common Subsequence distance functions.
3  *
4  * @copyright 2016 Kevin Locke <kevin@kevinlocke.name>
5  *            MIT license - see LICENSE file for details
6  */
7 #include <stdlib.h>             /* free, malloc */
8
9 #include "edit_distance.h"
10 #include "edit_distance-params.h"
11 #include "edit_distance-private.h"
12
13 ed_dist edit_distance_lcs(const ed_elem *src, ed_size slen,
14                           const ed_elem *tgt, ed_size tlen)
15 {
16         ed_size i, j;
17
18         /* Optimization: Avoid malloc when row of distance matrix can fit on
19          * the stack.
20          */
21         ed_dist stackdist[ED_STACK_DIST_VALS];
22
23         /* One row of the Wagner-Fischer distance matrix. */
24         ed_dist *dist = slen < ED_STACK_DIST_VALS ? stackdist :
25             malloc((slen + 1) * sizeof(ed_dist));
26
27         /* Initialize row with cost to delete src[0..i-1] */
28         dist[0] = 0;
29         for (i = 1; i <= slen; ++i) {
30                 dist[i] = dist[i - 1] + ED_DEL_COST(src[i - 1]);
31         }
32
33         for (j = 1; j <= tlen; ++j) {
34                 /* Value for dist[j-1][i-1] (one row up, one col left). */
35                 ed_dist diagdist = dist[0];
36                 dist[0] = dist[0] + ED_INS_COST(tgt[j - 1]);
37
38                 /* Loop invariant: dist[i] is the edit distance between first j
39                  * elements of tgt and first i elements of src.
40                  */
41                 for (i = 1; i <= slen; ++i) {
42                         ed_dist nextdiagdist = dist[i];
43
44                         if (ED_ELEM_EQUAL(src[i - 1], tgt[j - 1])) {
45                                 /* Same as tgt upto j-2, src upto i-2. */
46                                 dist[i] = diagdist;
47                         } else {
48                                 /* Insertion is tgt upto j-2, src upto i-1
49                                  * + insert tgt[j-1] */
50                                 ed_dist insdist =
51                                     dist[i] + ED_INS_COST(tgt[j - 1]);
52
53                                 /* Deletion is tgt upto j-1, src upto i-2
54                                  * + delete src[i-1] */
55                                 ed_dist deldist =
56                                     dist[i - 1] + ED_DEL_COST(src[i - 1]);
57
58                                 /* Use best distance available */
59                                 dist[i] = ED_MIN2(insdist, deldist);
60                         }
61
62                         diagdist = nextdiagdist;
63                 }
64         }
65
66         ed_dist total = dist[slen];
67         if (dist != stackdist) {
68                 free(dist);
69         }
70         return total;
71 }