+
+/* String vector cosine similarity[1]
+ *
+ * [1] http://blog.nishtahir.com/2015/09/19/fuzzy-string-matching-using-cosine-similarity/
+ */
+
+static inline void
+strpopcnt(const char *const str, int16_t pop[CHAR_N_VALUES]) {
+ const char *c;
+ memset(pop, 0, CHAR_N_VALUES * sizeof(*pop));
+ for(c = str; *c; c++) {
+ assert(*c >= 0);
+ pop[(unsigned char)*c]++;
+ }
+}
+
+static inline double
+strcossim(const int16_t ref[CHAR_N_VALUES], const int16_t key[CHAR_N_VALUES]) {
+ int32_t saibi = 0;
+ int32_t sai2 = 0;
+ int32_t sbi2 = 0;
+ size_t i;
+ for (i = 0; i < CHAR_N_VALUES; i++) {
+ saibi += ref[i] * key[i];
+ sai2 += ref[i] * ref[i];
+ sbi2 += key[i] * key[i];
+ }
+ return 1.0 - (2 * acos(saibi / sqrt(sai2 * sbi2)) / M_PI);
+}
+
+/* Low-cost filter functions */
+
+static inline double
+cossim_correction(const double s)
+{
+ return -((s - 0.5) * (s - 0.5)) + 0.33;
+}
+
+static inline bool
+should_grp_score_cos(const struct strgrp *const ctx,
+ struct strgrp_grp *const grp, const char *const str) {
+ const double s1 = strcossim(ctx->pop, grp->pop);
+ const double s2 = s1 + cossim_correction(s1);
+ return ctx->threshold <= s2;
+}
+
+static inline bool
+should_grp_score_len(const struct strgrp *const ctx,
+ const struct strgrp_grp *const grp, const char *const str) {
+ const double lstr = (double) strlen(str);
+ const double lkey = (double) grp->key_len;
+ const double lmin = (lstr > lkey) ? lkey : lstr;
+ const double s = sqrt((2 * lmin * lmin) / (1.0 * lstr * lstr + lkey * lkey));
+ return ctx->threshold <= s;
+}
+
+/* Scoring - Longest Common Subsequence[2]
+ *
+ * [2] https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
+ */