From 7c7ad8cc84eb75d83222fb269d9c117c12c8e75c Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 20 Aug 2015 15:46:31 +0930 Subject: [PATCH] strgrp: new module --- Makefile-ccan | 1 + ccan/strgrp/LICENSE | 1 + ccan/strgrp/_info | 107 +++++ ccan/strgrp/strgrp.c | 366 ++++++++++++++++++ ccan/strgrp/strgrp.h | 186 +++++++++ ccan/strgrp/test/api_all_similar_insert_two.c | 7 + .../test/api_almost_different_25_insert_two.c | 7 + .../test/api_almost_different_50_insert_two.c | 7 + .../test/api_almost_different_75_insert_two.c | 7 + .../test/api_almost_similar_25_insert_two.c | 7 + .../test/api_almost_similar_50_insert_two.c | 7 + .../test/api_almost_similar_75_insert_two.c | 7 + ccan/strgrp/test/api_exact_match_insert_two.c | 7 + .../api_illegal_none_similar_insert_two.c | 7 + ccan/strgrp/test/api_insert_duplicate.c | 7 + ccan/strgrp/test/api_insert_one.c | 23 ++ ccan/strgrp/test/api_insert_two_distinct.c | 7 + ccan/strgrp/test/api_iterate_no_groups.c | 14 + ccan/strgrp/test/api_iterate_one_group.c | 16 + ccan/strgrp/test/api_iterate_two_groups.c | 18 + ccan/strgrp/test/api_new_free.c | 11 + ccan/strgrp/test/api_test_free_cb.c | 16 + ccan/strgrp/test/api_test_print.c | 15 + ccan/strgrp/test/helpers.c | 67 ++++ ccan/strgrp/test/helpers.h | 29 ++ 25 files changed, 947 insertions(+) create mode 120000 ccan/strgrp/LICENSE create mode 100644 ccan/strgrp/_info create mode 100644 ccan/strgrp/strgrp.c create mode 100644 ccan/strgrp/strgrp.h create mode 100644 ccan/strgrp/test/api_all_similar_insert_two.c create mode 100644 ccan/strgrp/test/api_almost_different_25_insert_two.c create mode 100644 ccan/strgrp/test/api_almost_different_50_insert_two.c create mode 100644 ccan/strgrp/test/api_almost_different_75_insert_two.c create mode 100644 ccan/strgrp/test/api_almost_similar_25_insert_two.c create mode 100644 ccan/strgrp/test/api_almost_similar_50_insert_two.c create mode 100644 ccan/strgrp/test/api_almost_similar_75_insert_two.c create mode 100644 ccan/strgrp/test/api_exact_match_insert_two.c create mode 100644 ccan/strgrp/test/api_illegal_none_similar_insert_two.c create mode 100644 ccan/strgrp/test/api_insert_duplicate.c create mode 100644 ccan/strgrp/test/api_insert_one.c create mode 100644 ccan/strgrp/test/api_insert_two_distinct.c create mode 100644 ccan/strgrp/test/api_iterate_no_groups.c create mode 100644 ccan/strgrp/test/api_iterate_one_group.c create mode 100644 ccan/strgrp/test/api_iterate_two_groups.c create mode 100644 ccan/strgrp/test/api_new_free.c create mode 100644 ccan/strgrp/test/api_test_free_cb.c create mode 100644 ccan/strgrp/test/api_test_print.c create mode 100644 ccan/strgrp/test/helpers.c create mode 100644 ccan/strgrp/test/helpers.h diff --git a/Makefile-ccan b/Makefile-ccan index 8d13a4f5..fb7e4c9b 100644 --- a/Makefile-ccan +++ b/Makefile-ccan @@ -102,6 +102,7 @@ MODS_WITH_SRC := aga \ str/hex \ stringbuilder \ stringmap \ + strgrp \ strmap \ strset \ take \ diff --git a/ccan/strgrp/LICENSE b/ccan/strgrp/LICENSE new file mode 120000 index 00000000..74550445 --- /dev/null +++ b/ccan/strgrp/LICENSE @@ -0,0 +1 @@ +../../licenses/LGPL-3 \ No newline at end of file diff --git a/ccan/strgrp/_info b/ccan/strgrp/_info new file mode 100644 index 00000000..fcf2fb26 --- /dev/null +++ b/ccan/strgrp/_info @@ -0,0 +1,107 @@ +#include "config.h" +#include +#include + +/** + * strgrp - group/cluster similar strings. + * + * strgrp uses the Longest Common Subsequence (LCS) algorithm[1] to cluster + * similar strings. It is governed by a threshold which is compared against + * the computed normalised LCS length for all known groups. + * + * As a coarse and not entirely accurate summary, strgrp takes the following + * steps: + * + * 1. For all known strings, calculate the normalised LCS value against the + * input string + * 2. Find the maximum normalised LCS value and associated group + * 3. If the calculated normalised LCS value exceeds the configured threshold, + * add the input string to the group, otherwise create a new group + * + * The clustering operation is expensive; LCS on its own is computationally + * O(mn) on its two input strings and optimally requires O(min(m,n)) memory. In + * general each input string should be compared against all known strings, + * giving O(n^2) behaviour of the clustering algorithm on top of the O(mn) LCS + * similarity measurement. + * + * strgrp tries to battle this complexity on several fronts: + * + * 1. Coarse reduction of the required comparisons + * 1a. Each group has a 'key', which is the string that triggered the creation + * of the group + * 1b. Input strings are only compared against group keys rather than all known + * strings, reducing the complexity to the current number of groups rather + * than all known strings. Note due the pathological case where the number + * of groups is equal to the number of known strings the algorithm still + * has O(n^2) computational complexity + * + * 2. Elimination of LCS computations that will never breach the configured + * threshold + * 2a. This can be measured from the length of the compared strings + * 2b. This avoids invoking the O(mn) behaviour of LCS + * + * 3. Caching of input strings and their associated group + * 3a. By incurring the cost of a map's string hash function we may eliminate + * all calls to the LCS function for exact matches, potentially reducing + * the insertion to a constant-time operation. + * + * 4. Whilst the data dependencies of LCS prevent internally parallel + * implementations, LCS as a function can be applied in parallel. The code + * uses OpenMP to automatically and concurrently distribute scoring of the + * input string against group keys across threads. + * + * [1] https://en.wikipedia.org/wiki/Longest_common_subsequence_problem + * + * License: LGPL + * Author: Andrew Jeffery + * + * Example: + * #define BUF_SIZE 512 + * FILE *const f = fdopen(0, "r"); + * char *buf = malloc(BUF_SIZE); + * struct strgrp *ctx = strgrp_new(0.85); + * while(fgets(buf, BUF_SIZE, f)) { + * buf[strcspn(buf, "\r\n")] = '\0'; + * if (!strgrp_add(ctx, buf, NULL)) { + * printf("Failed to classify %s\n", buf); + * } + * } + * + * // Re-implement something similar to strgrp_print() via API as an + * // example + * struct strgrp_iter *iter = strgrp_iter_new(ctx); + * const struct strgrp_grp *grp; + * while(NULL != (grp = strgrp_iter_next(iter))) { + * printf("%s\n", strgrp_grp_key(grp)); + * struct strgrp_grp_iter *grp_iter = strgrp_grp_iter_new(grp); + * const struct strgrp_item *item; + * while(NULL != (item = strgrp_grp_iter_next(grp_iter))) { + * printf("\t%s\n", strgrp_item_key(item)); + * } + * strgrp_grp_iter_free(grp_iter); + * } + * strgrp_iter_free(iter); + * strgrp_free(ctx); + * free(buf); + * fclose(f); + */ +int main(int argc, char *argv[]) { + if (argc != 2) { + return 1; + } + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/darray\n"); + printf("ccan/stringmap\n"); + printf("ccan/tal\n"); + printf("ccan/tal/str\n"); + return 0; + } + + if (strcmp(argv[1], "testdepends") == 0) { + printf("ccan/str\n"); + return 0; + } + + return 1; +} diff --git a/ccan/strgrp/strgrp.c b/ccan/strgrp/strgrp.c new file mode 100644 index 00000000..1f3d2822 --- /dev/null +++ b/ccan/strgrp/strgrp.c @@ -0,0 +1,366 @@ +/* + Group similar strings + Copyright (C) 2014 Andrew Jeffery + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . +*/ +#include +#include +#include +#include +#include +#include "ccan/darray/darray.h" +#include "ccan/stringmap/stringmap.h" +#include "ccan/tal/tal.h" +#include "ccan/tal/str/str.h" +#include "strgrp.h" + +typedef darray(struct strgrp_grp *) darray_grp; +typedef darray(struct strgrp_item *) darray_item; + +typedef stringmap(struct strgrp_grp *) stringmap_grp; + +struct grp_score { + struct strgrp_grp *grp; + double score; +}; + +typedef darray(struct grp_score *) darray_score; + +struct strgrp { + double threshold; + stringmap_grp known; + unsigned int n_grps; + darray_grp grps; + struct grp_score *scores; +}; + +struct strgrp_iter { + const struct strgrp *ctx; + int i; +}; + +struct strgrp_grp { + const char *key; + size_t key_len; + darray_item items; + int32_t n_items; +}; + +struct strgrp_grp_iter { + const struct strgrp_grp *grp; + int i; +}; + +struct strgrp_item { + const char *key; + void *value; +}; + +#define ROWS 2 + +static inline int cmi(int i, int j) { + return ROWS * j + i; +} + +static inline int16_t +lcs(const char *const a, const char *const b) { + const int lb = strlen(b); + const int lbp1 = lb + 1; + int16_t *const lookup = calloc(ROWS * lbp1, sizeof(int16_t)); + if (!lookup) { + return -1; + } + int ia, ib; + for (ia = (strlen(a) - 1); ia >= 0; ia--) { + const char iav = a[ia]; + for (ib = lb - 1; ib >= 0; ib--) { + const char ibv = b[ib]; + const int ial = (ia + 1) & 1; // ia last + const int iac = ia & 1; // ia current + const int ibl = ib + 1; // ib last + // don't need separate "ib current" as it's just ib + if (iav == ibv) { + lookup[cmi(iac, ib)] = 1 + lookup[cmi(ial, ibl)]; + } else { + const int16_t valb = lookup[cmi(ial, ib)]; + const int16_t vabl = lookup[cmi(iac, ibl)]; + lookup[cmi(iac, ib)] = (valb > vabl) ? valb : vabl; + } + } + } + int16_t result = lookup[0]; + free(lookup); + return result; +} + +#undef ROWS + +static inline double +nlcs(const char *const a, const char *const b) { + const double lcss = lcs(a, b); + return 2 * lcss / (strlen(a) + strlen(b)); +} + +static bool +should_grp_score(const struct strgrp *const ctx, + const struct strgrp_grp *const grp, const char *const str) { + const size_t strl = strlen(str); + const size_t keyl = grp->key_len; + double sr = strl / keyl; + if (1 < sr) { + sr = 1 / sr; + } + return ctx->threshold <= sr; +} + +static inline double +grp_score(const struct strgrp_grp *const grp, const char *const str) { + return nlcs(grp->key, str); +} + +static struct strgrp_item * +new_item(tal_t *const tctx, const char *const str, void *const data) { + struct strgrp_item *i = talz(tctx, struct strgrp_item); + if (!i) { + return NULL; + } + i->key = tal_strdup(i, str); + i->value = data; + return i; +} + +static bool +add_item(struct strgrp_grp *const ctx, const char *const str, + void *const data) { + struct strgrp_item *i = new_item(ctx, str, data); + if (!i) { + return false; + } + darray_push(ctx->items, i); + ctx->n_items++; + return true; +} + +static void +free_grp(struct strgrp_grp *grp) { + darray_free(grp->items); +} + +static struct strgrp_grp * +new_grp(tal_t *const tctx, const char *const str, void *const data) { + struct strgrp_grp *b = talz(tctx, struct strgrp_grp); + if (!b) { + return NULL; + } + b->key = tal_strdup(b, str); + b->key_len = strlen(str); + b->n_items = 0; + darray_init(b->items); + tal_add_destructor(b, free_grp); + if (!add_item(b, str, data)) { + return tal_free(b); + } + return b; +} + +static struct strgrp_grp * +add_grp(struct strgrp *const ctx, const char *const str, + void *const data) { + struct strgrp_grp *b = new_grp(ctx, str, data); + if (!b) { + return NULL; + } + darray_push(ctx->grps, b); + ctx->n_grps++; + if (ctx->scores) { + if (!tal_resize(&ctx->scores, ctx->n_grps)) { + return NULL; + } + } else { + ctx->scores = tal_arr(ctx, struct grp_score, ctx->n_grps); + if (!ctx->scores) { + return NULL; + } + } + return b; +} + +struct strgrp * +strgrp_new(const double threshold) { + struct strgrp *ctx = talz(NULL, struct strgrp); + ctx->threshold = threshold; + stringmap_init(ctx->known, NULL); + darray_init(ctx->grps); + return ctx; +} + +static inline void +cache(struct strgrp *const ctx, struct strgrp_grp *const grp, + const char *const str) { + *(stringmap_enter(ctx->known, str)) = grp; +} + +static struct strgrp_grp * +grp_for(struct strgrp *const ctx, const char *const str) { + if (!ctx->n_grps) { + return NULL; + } + { + struct strgrp_grp **const grp = stringmap_lookup(ctx->known, str); + if (grp) { + return *grp; + } + } + int i; + #pragma omp parallel for schedule(dynamic) + for (i = 0; i < ctx->n_grps; i++) { + ctx->scores[i].grp = darray_item(ctx->grps, i); + const bool ss = should_grp_score(ctx, ctx->scores[i].grp, str); + ctx->scores[i].score = ss ? grp_score(ctx->scores[i].grp, str) : 0; + } + struct grp_score *max = NULL; + for (i = 0; i < ctx->n_grps; i++) { + if (!max || ctx->scores[i].score > max->score) { + max = &(ctx->scores[i]); + } + } + return (max && max->score >= ctx->threshold) ? max->grp : NULL; +} + +const struct strgrp_grp * +strgrp_grp_for(struct strgrp *const ctx, const char *const str) { + return grp_for(ctx, str); +} + +const struct strgrp_grp * +strgrp_add(struct strgrp *const ctx, const char *const str, + void *const data) { + bool inserted = false; + struct strgrp_grp *pick = grp_for(ctx, str); + if (pick) { + inserted = add_item(pick, str, data); + } else { + pick = add_grp(ctx, str, data); + inserted = (NULL != pick); + } + if (inserted) { + assert(NULL != pick); + cache(ctx, pick, str); + } + return pick; +} + +struct strgrp_iter * +strgrp_iter_new(struct strgrp *const ctx) { + struct strgrp_iter *iter = talz(ctx, struct strgrp_iter); + if (!iter) { + return NULL; + } + iter->ctx = ctx; + iter->i = 0; + return iter; +} + +const struct strgrp_grp * +strgrp_iter_next(struct strgrp_iter *const iter) { + return (iter->ctx->n_grps == iter->i) ? + NULL : darray_item(iter->ctx->grps, iter->i++); +} + +void +strgrp_iter_free(struct strgrp_iter *const iter) { + tal_free(iter); +} + +struct strgrp_grp_iter * +strgrp_grp_iter_new(const struct strgrp_grp *const grp) { + struct strgrp_grp_iter *iter = talz(grp, struct strgrp_grp_iter); + if (!iter) { + return NULL; + } + iter->grp = grp; + iter->i = 0; + return iter; +} + +const struct strgrp_item * +strgrp_grp_iter_next(struct strgrp_grp_iter *const iter) { + return (iter->grp->n_items == iter->i) ? + NULL : darray_item(iter->grp->items, iter->i++); +} + +void +strgrp_grp_iter_free(struct strgrp_grp_iter *iter) { + tal_free(iter); +} + +const char * +strgrp_grp_key(const struct strgrp_grp *const grp) { + return grp->key; +} + +const char * +strgrp_item_key(const struct strgrp_item *const item) { + return item->key; +} + +void * +strgrp_item_value(const struct strgrp_item *const item) { + return item->value; +} + +void +strgrp_free(struct strgrp *const ctx) { + darray_free(ctx->grps); + stringmap_free(ctx->known); + tal_free(ctx); +} + +void +strgrp_free_cb(struct strgrp *const ctx, void (*cb)(void *data)) { + struct strgrp_grp **grp; + struct strgrp_item **item; + darray_foreach(grp, ctx->grps) { + darray_foreach(item, (*grp)->items) { + cb((*item)->value); + } + } + strgrp_free(ctx); +} + +#include + +static void +print_item(const struct strgrp_item *item) { + printf("\t%s\n", item->key); +} + +static void +print_grp(const struct strgrp_grp *const grp) { + struct strgrp_item **item; + printf("%s:\n", grp->key); + darray_foreach(item, grp->items) { + print_item(*item); + } + printf("\n"); +} + +void +strgrp_print(const struct strgrp *const ctx) { + struct strgrp_grp **grp; + darray_foreach(grp, ctx->grps) { + print_grp(*grp); + } +} diff --git a/ccan/strgrp/strgrp.h b/ccan/strgrp/strgrp.h new file mode 100644 index 00000000..4f0d57b7 --- /dev/null +++ b/ccan/strgrp/strgrp.h @@ -0,0 +1,186 @@ +/* GNU LGPL - see LICENSE file for details */ + +#ifndef STRGRP_H +#define STRGRP_H +#include + +struct strgrp; +struct strgrp_iter; +struct strgrp_grp; +struct strgrp_grp_iter; +struct strgrp_item; + +/** + * Constructs a new strgrp instance. + * @threshold: A value in [0.0, 1.0] describing the desired similarity of + * strings in a cluster + * + * @return A heap-allocated strgrp instance, or NULL if initialisation fails. + * Ownership of the pointer resides with the caller, which must be freed with + * strgrp_free. + */ +struct strgrp * +strgrp_new(double threshold); + +/** + * Find a group which best matches the provided string key. + * @ctx: The strgrp instance to search + * @str: The string key to cluster + * + * The returned group is the group providing the maximum score that is equal to + * or above the configured threshold. + * + * @return A matched group, or NULL if no reasonable group is found. Ownership + * of the returned pointer resides with the strgrp instance and it becomes + * invalid if the strgrp instance is freed. + */ +const struct strgrp_grp * +strgrp_grp_for(struct strgrp *ctx, const char *str); + +/** + * Add a string key and arbitrary data value (together, an item) to the + * appropriate group. + * @ctx: The strgrp instance to add the string and data + * @str: The string key used to select a group. The caller retains ownership of + * the pointer and may free or change the memory prior to freeing the + * strgrp instance. + * @data: The data to attach to the group's new entry. The caller retains + * ownership of the pointer, but for correctness its lifetime should be at + * least equal to the lifetime of the strgrp instance. + * + * Returns the group to which the item was added. Ownership of the returned + * pointer resides with the strgrp instance and it becomes invalid if the + * strgrp instance is freed. + */ +const struct strgrp_grp * +strgrp_add(struct strgrp *ctx, const char *str, void *data); + +/** + * Create an iterator over the current groups. + * @ctx: The strgrp instance to iterate over + * + * @return An iterator structure, or NULL if a failure occurred. Ownership of + * the returned pointer is with the strgrp instance. The caller should pass the + * pointer strgrp_iter_free when the iterator is exhausted. It is invalid to + * call strgrp_iter_next or strgrp_iter_free on the returned pointer after the + * strgrp instance has been freed. + */ +struct strgrp_iter * +strgrp_iter_new(struct strgrp *ctx); + +/** + * Extract the next group from a group iterator + * @iter: The iterator in question + * + * Returns the next group in the iterator or NULL if no further groups exist. + * Ownership of the returned pointer resides with the strgrp instance and + * becomes invalid if the strgrp instance is freed. + */ +const struct strgrp_grp * +strgrp_iter_next(struct strgrp_iter *iter); + +/** + * Clean up a group iterator instance + * @iter: The iterator to free + */ +void +strgrp_iter_free(struct strgrp_iter *iter); + +/** + * Extract the key for a group. + * @grp: A strgrp_grp pointer + * + * A group's key is the input string that caused the creation of the group. + * + * Returns the group key. Ownership of the pointer resides with the grp + * parameter and by extension the strgrp instance. The caller must duplicate + * the string if the content is required beyond the lifetime of the strgrp + * instance. + */ +const char * +strgrp_grp_key(const struct strgrp_grp *grp); + +/** + * Create an iterator over items in the provided group + * @grp: The group whose items to iterate over + * + * @return An iterator structure, or NULL if a failure occurred. Ownership of + * the returned pointer is with the strgrp instance. The caller should pass the + * pointer strgrp_grp_iter_free when the iterator is exhausted. It is invalid + * to call strgrp_grp_iter_next or strgrp_grp_iter_free on the returned pointer + * after the strgrp instance has been freed. + */ +struct strgrp_grp_iter * +strgrp_grp_iter_new(const struct strgrp_grp *grp); + +/** + * Extract the next item from a item iterator + * @iter: The iterator in question + * + * Returns the next group in the iterator or NULL if no further items exist. + * Ownership of the returned pointer resides with the strgrp instance and + * becomes invalid if the strgrp instance is freed. + */ +const struct strgrp_item * +strgrp_grp_iter_next(struct strgrp_grp_iter *iter); + +/** + * Clean up an item iterator instance + * @iter: The iterator to free + */ +void +strgrp_grp_iter_free(struct strgrp_grp_iter *iter); + +/** + * Extract the key for an item + * + * @item: The item in question + * + * The key is the string input string which generated the item in the cluster. + * + * Returns the item key. Ownership of the pointer resides with the item + * parameter and by extension the strgrp instance. The caller must duplicate + * the string if the content is required beyond the lifetime of the strgrp + * instance. + */ +const char * +strgrp_item_key(const struct strgrp_item *item); + +/** + * Extract the value for an item + * @item: The item in question + * + * The value is the arbitrary pointer associated with the input string + * + * Returns the item value. The ownership of the pointer does not reside with + * the strgrp instance, but for correctness should exceed the lifetime of the + * strgrp instance. + */ +void * +strgrp_item_value(const struct strgrp_item *item); + +/** + * Destroy the strgrp instance + * + * @ctx: The strgrp instance in question + */ +void +strgrp_free(struct strgrp *ctx); + +/** + * Destroy the strgrp instance, but not before applying cb() to each item's value element + * @ctx: The strgrp instance in question + * @cb: The callback to execute against each item's value. This might be used + * to free the value data. + */ +void +strgrp_free_cb(struct strgrp *ctx, void (*cb)(void *data)); + + +/** + * Dump the groupings to stdout. + * @ctx: The strgrp instance in question + */ +void +strgrp_print(const struct strgrp *ctx); +#endif diff --git a/ccan/strgrp/test/api_all_similar_insert_two.c b/ccan/strgrp/test/api_all_similar_insert_two.c new file mode 100644 index 00000000..7d68920a --- /dev/null +++ b/ccan/strgrp/test/api_all_similar_insert_two.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, 0); + return one_group_from_two(ctx, "a", NULL, "b", NULL); +} diff --git a/ccan/strgrp/test/api_almost_different_25_insert_two.c b/ccan/strgrp/test/api_almost_different_25_insert_two.c new file mode 100644 index 00000000..3da484bf --- /dev/null +++ b/ccan/strgrp/test/api_almost_different_25_insert_two.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, 0.25); + return one_group_from_two(ctx, "abcde", NULL, "zyxab", NULL); +} diff --git a/ccan/strgrp/test/api_almost_different_50_insert_two.c b/ccan/strgrp/test/api_almost_different_50_insert_two.c new file mode 100644 index 00000000..790930b1 --- /dev/null +++ b/ccan/strgrp/test/api_almost_different_50_insert_two.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, 0.50); + return one_group_from_two(ctx, "abcde", NULL, "zyabc", NULL); +} diff --git a/ccan/strgrp/test/api_almost_different_75_insert_two.c b/ccan/strgrp/test/api_almost_different_75_insert_two.c new file mode 100644 index 00000000..dcf01a07 --- /dev/null +++ b/ccan/strgrp/test/api_almost_different_75_insert_two.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, 0.50); + return one_group_from_two(ctx, "abcde", NULL, "zabcd", NULL); +} diff --git a/ccan/strgrp/test/api_almost_similar_25_insert_two.c b/ccan/strgrp/test/api_almost_similar_25_insert_two.c new file mode 100644 index 00000000..35687f5a --- /dev/null +++ b/ccan/strgrp/test/api_almost_similar_25_insert_two.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, 0.25); + return two_groups_from_two(ctx, "abcde", NULL, "zyxwa", NULL); +} diff --git a/ccan/strgrp/test/api_almost_similar_50_insert_two.c b/ccan/strgrp/test/api_almost_similar_50_insert_two.c new file mode 100644 index 00000000..62198e7b --- /dev/null +++ b/ccan/strgrp/test/api_almost_similar_50_insert_two.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, 0.50); + return two_groups_from_two(ctx, "abcde", NULL, "zyxab", NULL); +} diff --git a/ccan/strgrp/test/api_almost_similar_75_insert_two.c b/ccan/strgrp/test/api_almost_similar_75_insert_two.c new file mode 100644 index 00000000..6c8553f1 --- /dev/null +++ b/ccan/strgrp/test/api_almost_similar_75_insert_two.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, 0.75); + return two_groups_from_two(ctx, "abcde", NULL, "zyabc", NULL); +} diff --git a/ccan/strgrp/test/api_exact_match_insert_two.c b/ccan/strgrp/test/api_exact_match_insert_two.c new file mode 100644 index 00000000..c44409a4 --- /dev/null +++ b/ccan/strgrp/test/api_exact_match_insert_two.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, 1.0); + return one_group_from_two(ctx, "a", NULL, "a", NULL); +} diff --git a/ccan/strgrp/test/api_illegal_none_similar_insert_two.c b/ccan/strgrp/test/api_illegal_none_similar_insert_two.c new file mode 100644 index 00000000..bc330c5f --- /dev/null +++ b/ccan/strgrp/test/api_illegal_none_similar_insert_two.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, 1.1); + return one_group_from_two(ctx, "a", NULL, "a", NULL); +} diff --git a/ccan/strgrp/test/api_insert_duplicate.c b/ccan/strgrp/test/api_insert_duplicate.c new file mode 100644 index 00000000..7bf6fb4f --- /dev/null +++ b/ccan/strgrp/test/api_insert_duplicate.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, DEFAULT_SIMILARITY); + return one_group_from_two(ctx, "a", (void *)"1", "a", (void *)"2"); +} diff --git a/ccan/strgrp/test/api_insert_one.c b/ccan/strgrp/test/api_insert_one.c new file mode 100644 index 00000000..68032626 --- /dev/null +++ b/ccan/strgrp/test/api_insert_one.c @@ -0,0 +1,23 @@ +#include "../test/helpers.h" + +int main(void) { + const char k[] = "a"; + struct strgrp *ctx; + const struct strgrp_grp *grp; + struct strgrp_grp_iter *iter; + const struct strgrp_item *item; + + plan_tests(5); + create(ctx, DEFAULT_SIMILARITY); + grp = strgrp_add(ctx, k, NULL); + ok1(streq(k, strgrp_grp_key(grp))); + iter = strgrp_grp_iter_new(grp); + item = strgrp_grp_iter_next(iter); + ok1(item); + ok1(streq(k, strgrp_item_key(item))); + ok1(!strgrp_item_value(item)); + ok1(!strgrp_grp_iter_next(iter)); + strgrp_grp_iter_free(iter); + strgrp_free(ctx); + return exit_status(); +} diff --git a/ccan/strgrp/test/api_insert_two_distinct.c b/ccan/strgrp/test/api_insert_two_distinct.c new file mode 100644 index 00000000..3ee8e3e5 --- /dev/null +++ b/ccan/strgrp/test/api_insert_two_distinct.c @@ -0,0 +1,7 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + create(ctx, DEFAULT_SIMILARITY); + return two_groups_from_two(ctx, "a", NULL, "b", NULL); +} diff --git a/ccan/strgrp/test/api_iterate_no_groups.c b/ccan/strgrp/test/api_iterate_no_groups.c new file mode 100644 index 00000000..a7f692b4 --- /dev/null +++ b/ccan/strgrp/test/api_iterate_no_groups.c @@ -0,0 +1,14 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + struct strgrp_iter *iter; + + plan_tests(1); + create(ctx, DEFAULT_SIMILARITY); + iter = strgrp_iter_new(ctx); + ok1(!strgrp_iter_next(iter)); + strgrp_iter_free(iter); + strgrp_free(ctx); + return exit_status(); +} diff --git a/ccan/strgrp/test/api_iterate_one_group.c b/ccan/strgrp/test/api_iterate_one_group.c new file mode 100644 index 00000000..b363c2c1 --- /dev/null +++ b/ccan/strgrp/test/api_iterate_one_group.c @@ -0,0 +1,16 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + struct strgrp_iter *iter; + + plan_tests(2); + create(ctx, DEFAULT_SIMILARITY); + strgrp_add(ctx, "a", NULL); + iter = strgrp_iter_new(ctx); + ok1(strgrp_iter_next(iter)); + ok1(!strgrp_iter_next(iter)); + strgrp_iter_free(iter); + strgrp_free(ctx); + return exit_status(); +} diff --git a/ccan/strgrp/test/api_iterate_two_groups.c b/ccan/strgrp/test/api_iterate_two_groups.c new file mode 100644 index 00000000..ebf81a53 --- /dev/null +++ b/ccan/strgrp/test/api_iterate_two_groups.c @@ -0,0 +1,18 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + struct strgrp_iter *iter; + + plan_tests(3); + create(ctx, DEFAULT_SIMILARITY); + strgrp_add(ctx, "a", NULL); + strgrp_add(ctx, "b", NULL); + iter = strgrp_iter_new(ctx); + ok1(strgrp_iter_next(iter)); + ok1(strgrp_iter_next(iter)); + ok1(!strgrp_iter_next(iter)); + strgrp_iter_free(iter); + strgrp_free(ctx); + return exit_status(); +} diff --git a/ccan/strgrp/test/api_new_free.c b/ccan/strgrp/test/api_new_free.c new file mode 100644 index 00000000..da3575dd --- /dev/null +++ b/ccan/strgrp/test/api_new_free.c @@ -0,0 +1,11 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + + plan_tests(1); + create(ctx, DEFAULT_SIMILARITY); + strgrp_free(ctx); + pass("Successfully initialised strgrp instance"); + return exit_status(); +} diff --git a/ccan/strgrp/test/api_test_free_cb.c b/ccan/strgrp/test/api_test_free_cb.c new file mode 100644 index 00000000..bf85b36e --- /dev/null +++ b/ccan/strgrp/test/api_test_free_cb.c @@ -0,0 +1,16 @@ +#include "../test/helpers.h" + +static void +data_cb(void *data) { + ok1(streq("1", (char *)data)); +} + +int main(void) { + struct strgrp *ctx; + + plan_tests(1); + create(ctx, DEFAULT_SIMILARITY); + strgrp_add(ctx, "a", (void *)"1"); + strgrp_free_cb(ctx, &data_cb); + return exit_status(); +} diff --git a/ccan/strgrp/test/api_test_print.c b/ccan/strgrp/test/api_test_print.c new file mode 100644 index 00000000..8635882d --- /dev/null +++ b/ccan/strgrp/test/api_test_print.c @@ -0,0 +1,15 @@ +#include "../test/helpers.h" + +int main(void) { + struct strgrp *ctx; + + plan_tests(1); + create(ctx, DEFAULT_SIMILARITY); + strgrp_add(ctx, "a", (void *)"1"); + strgrp_add(ctx, "a", (void *)"2"); + strgrp_add(ctx, "b", (void *)"3"); + strgrp_print(ctx); + strgrp_free(ctx); + pass("No errors"); + return exit_status(); +} diff --git a/ccan/strgrp/test/helpers.c b/ccan/strgrp/test/helpers.c new file mode 100644 index 00000000..24b5dfd0 --- /dev/null +++ b/ccan/strgrp/test/helpers.c @@ -0,0 +1,67 @@ +//#include "../test/helpers.h" +#include "helpers.h" + +int +one_group_from_two(struct strgrp *ctx, + const char *const k1, void *v1, + const char *const k2, void *v2) { + const struct strgrp_grp *grp1, *grp2; + struct strgrp_grp_iter *iter; + const struct strgrp_item *item; + + plan_tests(10); + grp1 = strgrp_add(ctx, k1, v1); + ok1(grp1); + grp2 = strgrp_add(ctx, k2, v2); + ok1(grp2); + ok1(grp1 == grp2); + iter = strgrp_grp_iter_new(grp1); + item = strgrp_grp_iter_next(iter); + ok1(item); + ok1(streq(k1, strgrp_item_key(item))); + ok1(v1 == strgrp_item_value(item)); + item = strgrp_grp_iter_next(iter); + ok1(item); + ok1(streq(k2, strgrp_item_key(item))); + ok1(v2 == strgrp_item_value(item)); + ok1(!strgrp_grp_iter_next(iter)); + strgrp_grp_iter_free(iter); + strgrp_free(ctx); + return exit_status(); +} + +int +two_groups_from_two(struct strgrp *ctx, + const char *const k1, void *v1, + const char *const k2, void *v2) { + const struct strgrp_grp *grp1, *grp2; + struct strgrp_grp_iter *iter; + const struct strgrp_item *item; + + plan_tests(11); + grp1 = strgrp_add(ctx, k1, v1); + ok1(grp1); + grp2 = strgrp_add(ctx, k2, v2); + ok1(grp2); + ok1(grp1 != grp2); + { + iter = strgrp_grp_iter_new(grp1); + item = strgrp_grp_iter_next(iter); + ok1(item); + ok1(streq(k1, strgrp_item_key(item))); + ok1(v1 == strgrp_item_value(item)); + ok1(!strgrp_grp_iter_next(iter)); + strgrp_grp_iter_free(iter); + } + { + iter = strgrp_grp_iter_new(grp2); + item = strgrp_grp_iter_next(iter); + ok1(item); + ok1(streq(k2, strgrp_item_key(item))); + ok1(v2 == strgrp_item_value(item)); + ok1(!strgrp_grp_iter_next(iter)); + strgrp_grp_iter_free(iter); + } + strgrp_free(ctx); + return exit_status(); +} diff --git a/ccan/strgrp/test/helpers.h b/ccan/strgrp/test/helpers.h new file mode 100644 index 00000000..8a754b0c --- /dev/null +++ b/ccan/strgrp/test/helpers.h @@ -0,0 +1,29 @@ +#ifndef STRGRP_TEST_HELPERS +#define STRGRP_TEST_HELPERS +#include +#include "ccan/str/str.h" +#include "ccan/tap/tap.h" +#include "../strgrp.h" + +#define DEFAULT_SIMILARITY 0.85 + +#define create(dst, similarity) \ + do { \ + dst = strgrp_new(similarity); \ + if (!dst) { \ + fail("strgrp_new() returned NULL reference"); \ + return 1; \ + } \ + } while (0) + +int +one_group_from_two(struct strgrp *ctx, + const char *const k1, void *v1, + const char *const k2, void *v2); + +int +two_groups_from_two(struct strgrp *ctx, + const char *const k1, void *v1, + const char *const k2, void *v2); + +#endif /* STRGRP_TEST_HELPERS */ -- 2.39.2