sai2 += ref[i] * ref[i];
sbi2 += key[i] * key[i];
}
- return saibi / (sqrt(sai2) * sqrt(sbi2));
+ return 1.0 - (2 * acos(saibi / sqrt(sai2 * sbi2)) / M_PI);
}
/* Low-cost filter functions */
+static inline double
+cossim_correction(const double s)
+{
+ return -((s - 0.5) * (s - 0.5)) + 0.33;
+}
+
static inline bool
should_grp_score_cos(const struct strgrp *const ctx,
struct strgrp_grp *const grp, const char *const str) {
- return ctx->threshold <= strcossim(ctx->pop, grp->pop);
+ const double s1 = strcossim(ctx->pop, grp->pop);
+ const double s2 = s1 + cossim_correction(s1);
+ return ctx->threshold <= s2;
}
static inline bool
should_grp_score_len(const struct strgrp *const ctx,
const struct strgrp_grp *const grp, const char *const str) {
- const size_t strl = strlen(str);
- const size_t keyl = grp->key_len;
- double sr = strl / keyl;
- if (1 < sr) {
- sr = 1 / sr;
- }
- return ctx->threshold <= sr;
+ const double lstr = (double) strlen(str);
+ const double lkey = (double) grp->key_len;
+ const double lmin = (lstr > lkey) ? lkey : lstr;
+ const double s = sqrt((2 * lmin * lmin) / (1.0 * lstr * lstr + lkey * lkey));
+ return ctx->threshold <= s;
}
/* Scoring - Longest Common Subsequence[2]
static inline double
nlcs(const char *const a, const char *const b) {
const double lcss = lcs(a, b);
- return 2 * lcss / (strlen(a) + strlen(b));
+ const double la = (double) strlen(a);
+ const double lb = (double) strlen(b);
+ const double s = sqrt((2 * lcss * lcss) / (la * la + lb * lb));
+ return s;
}
static inline double