3 Copyright (C) 2014 Andrew Jeffery <andrew@aj.id.au>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "ccan/darray/darray.h"
24 #include "ccan/stringmap/stringmap.h"
25 #include "ccan/tal/tal.h"
26 #include "ccan/tal/str/str.h"
30 typedef darray(struct strgrp_grp *) darray_grp;
31 typedef darray(struct strgrp_item *) darray_item;
33 typedef stringmap(struct strgrp_grp *) stringmap_grp;
36 struct strgrp_grp *grp;
40 typedef darray(struct grp_score *) darray_score;
47 struct grp_score *scores;
51 const struct strgrp *ctx;
62 struct strgrp_grp_iter {
63 const struct strgrp_grp *grp;
74 static inline int cmi(int i, int j) {
79 lcs(const char *const a, const char *const b) {
80 const int lb = strlen(b);
81 const int lbp1 = lb + 1;
82 int16_t *const lookup = calloc(ROWS * lbp1, sizeof(int16_t));
87 for (ia = (strlen(a) - 1); ia >= 0; ia--) {
88 const char iav = a[ia];
89 for (ib = lb - 1; ib >= 0; ib--) {
90 const char ibv = b[ib];
91 const int ial = (ia + 1) & 1; // ia last
92 const int iac = ia & 1; // ia current
93 const int ibl = ib + 1; // ib last
94 // don't need separate "ib current" as it's just ib
96 lookup[cmi(iac, ib)] = 1 + lookup[cmi(ial, ibl)];
98 const int16_t valb = lookup[cmi(ial, ib)];
99 const int16_t vabl = lookup[cmi(iac, ibl)];
100 lookup[cmi(iac, ib)] = (valb > vabl) ? valb : vabl;
104 int16_t result = lookup[0];
112 nlcs(const char *const a, const char *const b) {
113 const double lcss = lcs(a, b);
114 return 2 * lcss / (strlen(a) + strlen(b));
118 should_grp_score(const struct strgrp *const ctx,
119 const struct strgrp_grp *const grp, const char *const str) {
120 const size_t strl = strlen(str);
121 const size_t keyl = grp->key_len;
122 double sr = strl / keyl;
126 return ctx->threshold <= sr;
130 grp_score(const struct strgrp_grp *const grp, const char *const str) {
131 return nlcs(grp->key, str);
134 static struct strgrp_item *
135 new_item(tal_t *const tctx, const char *const str, void *const data) {
136 struct strgrp_item *i = talz(tctx, struct strgrp_item);
140 i->key = tal_strdup(i, str);
146 add_item(struct strgrp_grp *const ctx, const char *const str,
148 struct strgrp_item *i = new_item(ctx, str, data);
152 darray_push(ctx->items, i);
158 free_grp(struct strgrp_grp *grp) {
159 darray_free(grp->items);
162 static struct strgrp_grp *
163 new_grp(tal_t *const tctx, const char *const str, void *const data) {
164 struct strgrp_grp *b = talz(tctx, struct strgrp_grp);
168 b->key = tal_strdup(b, str);
169 b->key_len = strlen(str);
171 darray_init(b->items);
172 tal_add_destructor(b, free_grp);
173 if (!add_item(b, str, data)) {
179 static struct strgrp_grp *
180 add_grp(struct strgrp *const ctx, const char *const str,
182 struct strgrp_grp *b = new_grp(ctx, str, data);
186 darray_push(ctx->grps, b);
189 if (!tal_resize(&ctx->scores, ctx->n_grps)) {
193 ctx->scores = tal_arr(ctx, struct grp_score, ctx->n_grps);
202 strgrp_new(const double threshold) {
203 struct strgrp *ctx = talz(NULL, struct strgrp);
204 ctx->threshold = threshold;
205 stringmap_init(ctx->known, NULL);
206 darray_init(ctx->grps);
211 cache(struct strgrp *const ctx, struct strgrp_grp *const grp,
212 const char *const str) {
213 *(stringmap_enter(ctx->known, str)) = grp;
216 static struct strgrp_grp *
217 grp_for(struct strgrp *const ctx, const char *const str) {
222 struct strgrp_grp **const grp = stringmap_lookup(ctx->known, str);
228 // Keep ccanlint happy in reduced feature mode
230 #pragma omp parallel for schedule(dynamic)
232 for (i = 0; i < ctx->n_grps; i++) {
233 ctx->scores[i].grp = darray_item(ctx->grps, i);
234 const bool ss = should_grp_score(ctx, ctx->scores[i].grp, str);
235 ctx->scores[i].score = ss ? grp_score(ctx->scores[i].grp, str) : 0;
237 struct grp_score *max = NULL;
238 for (i = 0; i < ctx->n_grps; i++) {
239 if (!max || ctx->scores[i].score > max->score) {
240 max = &(ctx->scores[i]);
243 return (max && max->score >= ctx->threshold) ? max->grp : NULL;
246 const struct strgrp_grp *
247 strgrp_grp_for(struct strgrp *const ctx, const char *const str) {
248 return grp_for(ctx, str);
251 const struct strgrp_grp *
252 strgrp_add(struct strgrp *const ctx, const char *const str,
254 bool inserted = false;
255 struct strgrp_grp *pick = grp_for(ctx, str);
257 inserted = add_item(pick, str, data);
259 pick = add_grp(ctx, str, data);
260 inserted = (NULL != pick);
263 assert(NULL != pick);
264 cache(ctx, pick, str);
270 strgrp_iter_new(struct strgrp *const ctx) {
271 struct strgrp_iter *iter = talz(ctx, struct strgrp_iter);
280 const struct strgrp_grp *
281 strgrp_iter_next(struct strgrp_iter *const iter) {
282 return (iter->ctx->n_grps == iter->i) ?
283 NULL : darray_item(iter->ctx->grps, iter->i++);
287 strgrp_iter_free(struct strgrp_iter *const iter) {
291 struct strgrp_grp_iter *
292 strgrp_grp_iter_new(const struct strgrp_grp *const grp) {
293 struct strgrp_grp_iter *iter = talz(grp, struct strgrp_grp_iter);
302 const struct strgrp_item *
303 strgrp_grp_iter_next(struct strgrp_grp_iter *const iter) {
304 return (iter->grp->n_items == iter->i) ?
305 NULL : darray_item(iter->grp->items, iter->i++);
309 strgrp_grp_iter_free(struct strgrp_grp_iter *iter) {
314 strgrp_grp_key(const struct strgrp_grp *const grp) {
319 strgrp_item_key(const struct strgrp_item *const item) {
324 strgrp_item_value(const struct strgrp_item *const item) {
329 strgrp_free(struct strgrp *const ctx) {
330 darray_free(ctx->grps);
331 stringmap_free(ctx->known);
336 strgrp_free_cb(struct strgrp *const ctx, void (*cb)(void *data)) {
337 struct strgrp_grp **grp;
338 struct strgrp_item **item;
339 darray_foreach(grp, ctx->grps) {
340 darray_foreach(item, (*grp)->items) {
350 print_item(const struct strgrp_item *item) {
351 printf("\t%s\n", item->key);
355 print_grp(const struct strgrp_grp *const grp) {
356 struct strgrp_item **item;
357 printf("%s:\n", grp->key);
358 darray_foreach(item, grp->items) {
365 strgrp_print(const struct strgrp *const ctx) {
366 struct strgrp_grp **grp;
367 darray_foreach(grp, ctx->grps) {