X-Git-Url: http://git.ozlabs.org/?a=blobdiff_plain;f=ccan%2Fstrgrp%2Fstrgrp.c;h=bab8d334a99eb6b0216c22cb806ea61f5b77b624;hb=2d17aeeecc0ae1f16398960bd41a2e7047e57c60;hp=85d9765c66f69ca68961bab045a4f49eb48c28fb;hpb=10db5dc0f2038a317d40cbf216f26684163dcf84;p=ccan diff --git a/ccan/strgrp/strgrp.c b/ccan/strgrp/strgrp.c index 85d9765c..bab8d334 100644 --- a/ccan/strgrp/strgrp.c +++ b/ccan/strgrp/strgrp.c @@ -103,27 +103,33 @@ strcossim(const int16_t ref[CHAR_N_VALUES], const int16_t key[CHAR_N_VALUES]) { sai2 += ref[i] * ref[i]; sbi2 += key[i] * key[i]; } - return saibi / (sqrt(sai2) * sqrt(sbi2)); + return 1.0 - (2 * acos(saibi / sqrt(sai2 * sbi2)) / M_PI); } /* Low-cost filter functions */ +static inline double +cossim_correction(const double s) +{ + return -((s - 0.5) * (s - 0.5)) + 0.33; +} + static inline bool should_grp_score_cos(const struct strgrp *const ctx, struct strgrp_grp *const grp, const char *const str) { - return ctx->threshold <= strcossim(ctx->pop, grp->pop); + const double s1 = strcossim(ctx->pop, grp->pop); + const double s2 = s1 + cossim_correction(s1); + return ctx->threshold <= s2; } static inline bool should_grp_score_len(const struct strgrp *const ctx, const struct strgrp_grp *const grp, const char *const str) { - const size_t strl = strlen(str); - const size_t keyl = grp->key_len; - double sr = strl / keyl; - if (1 < sr) { - sr = 1 / sr; - } - return ctx->threshold <= sr; + const double lstr = (double) strlen(str); + const double lkey = (double) grp->key_len; + const double lmin = (lstr > lkey) ? lkey : lstr; + const double s = sqrt((2 * lmin * lmin) / (1.0 * lstr * lstr + lkey * lkey)); + return ctx->threshold <= s; } /* Scoring - Longest Common Subsequence[2] @@ -172,7 +178,10 @@ lcs(const char *const a, const char *const b) { static inline double nlcs(const char *const a, const char *const b) { const double lcss = lcs(a, b); - return 2 * lcss / (strlen(a) + strlen(b)); + const double la = (double) strlen(a); + const double lb = (double) strlen(b); + const double s = sqrt((2 * lcss * lcss) / (la * la + lb * lb)); + return s; } static inline double