From e8f7a978bf4eb41c1877958e31a1a7213680335b Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Sat, 20 Feb 2016 21:22:41 +1030 Subject: [PATCH] strgrp: Add cosine fudge-curve to unify filter comparison spaces If we are to use should_grp_score_cos(x,y) as a filter the the following relationship must hold (from least to most expensive): should_grp_score_len(x,y) >= should_grp_score_cos(x,y) >= grp_score(x) should_grp_score_cos(x,y) wasn't holding up its part of the bargain, so real data was used to generate a fudge curve to bring should_grp_score_cos(x,y) results into the same space. Really this is a terrible hack and the problem needs more thought. Evaluation of should_grp_score_cos(x,y)'s performance benefit (given the relaxation of the filter under the fudge curve) is sorely needed. --- ccan/strgrp/_info | 4 ---- ccan/strgrp/strgrp.c | 10 +++++++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/ccan/strgrp/_info b/ccan/strgrp/_info index 3c78b35c..2b88ea7b 100644 --- a/ccan/strgrp/_info +++ b/ccan/strgrp/_info @@ -74,10 +74,6 @@ * License: LGPL * Author: Andrew Jeffery * - * Ccanlint: - * tests_pass FAIL - * tests_pass_without_features FAIL - * * Example: * FILE *f; * char *buf; diff --git a/ccan/strgrp/strgrp.c b/ccan/strgrp/strgrp.c index 12119a1f..bab8d334 100644 --- a/ccan/strgrp/strgrp.c +++ b/ccan/strgrp/strgrp.c @@ -108,10 +108,18 @@ strcossim(const int16_t ref[CHAR_N_VALUES], const int16_t key[CHAR_N_VALUES]) { /* Low-cost filter functions */ +static inline double +cossim_correction(const double s) +{ + return -((s - 0.5) * (s - 0.5)) + 0.33; +} + static inline bool should_grp_score_cos(const struct strgrp *const ctx, struct strgrp_grp *const grp, const char *const str) { - return ctx->threshold <= strcossim(ctx->pop, grp->pop); + const double s1 = strcossim(ctx->pop, grp->pop); + const double s2 = s1 + cossim_correction(s1); + return ctx->threshold <= s2; } static inline bool -- 2.39.2