Commit 4a439212 authored by Andrew Jeffery's avatar Andrew Jeffery

strgrp: Cache cosine popcounts to reduce computation

Neatly, this reduces some of the OpenMP-related noise in the
implementation by removing the need for the include. The loop body of
grp_for() is also clearer as a result of the change; the changes to the
function interfaces motivated a re-think of the complexity here.
parent 3a663181
...@@ -30,13 +30,6 @@ ...@@ -30,13 +30,6 @@
#include "strgrp.h" #include "strgrp.h"
#include "config.h" #include "config.h"
#if HAVE_OPENMP
#include <omp.h>
#else
#define omp_get_max_threads() 1
#define omp_get_thread_num() 0
#endif
#define CHAR_N_VALUES (1 << CHAR_BIT) #define CHAR_N_VALUES (1 << CHAR_BIT)
typedef darray(struct strgrp_grp *) darray_grp; typedef darray(struct strgrp_grp *) darray_grp;
...@@ -49,11 +42,6 @@ struct grp_score { ...@@ -49,11 +42,6 @@ struct grp_score {
double score; double score;
}; };
struct cosvec {
const char *str;
int16_t pop[CHAR_N_VALUES];
};
typedef darray(struct grp_score *) darray_score; typedef darray(struct grp_score *) darray_score;
struct strgrp { struct strgrp {
...@@ -62,8 +50,7 @@ struct strgrp { ...@@ -62,8 +50,7 @@ struct strgrp {
unsigned int n_grps; unsigned int n_grps;
darray_grp grps; darray_grp grps;
struct grp_score *scores; struct grp_score *scores;
int32_t n_cosvecs; int16_t pop[CHAR_N_VALUES];
struct cosvec *cosvecs;
}; };
struct strgrp_iter { struct strgrp_iter {
...@@ -76,6 +63,7 @@ struct strgrp_grp { ...@@ -76,6 +63,7 @@ struct strgrp_grp {
size_t key_len; size_t key_len;
darray_item items; darray_item items;
int32_t n_items; int32_t n_items;
int16_t pop[CHAR_N_VALUES];
}; };
struct strgrp_grp_iter { struct strgrp_grp_iter {
...@@ -95,27 +83,25 @@ struct strgrp_item { ...@@ -95,27 +83,25 @@ struct strgrp_item {
*/ */
static inline void static inline void
strpopcnt(struct cosvec *ctx) { strpopcnt(const char *const str, int16_t pop[CHAR_N_VALUES]) {
const char *c; const char *c;
memset(ctx->pop, 0, CHAR_N_VALUES * sizeof(int16_t)); memset(pop, 0, CHAR_N_VALUES * sizeof(*pop));
for(c = ctx->str; *c; c++) { for(c = str; *c; c++) {
assert(*c >= 0); assert(*c >= 0);
ctx->pop[(unsigned char)*c]++; pop[(unsigned char)*c]++;
} }
} }
static inline double static inline double
strcossim(struct cosvec *avec, struct cosvec *bvec) { strcossim(const int16_t ref[CHAR_N_VALUES], const int16_t key[CHAR_N_VALUES]) {
strpopcnt(avec);
strpopcnt(bvec);
int32_t saibi = 0; int32_t saibi = 0;
int32_t sai2 = 0; int32_t sai2 = 0;
int32_t sbi2 = 0; int32_t sbi2 = 0;
size_t i; size_t i;
for (i = 0; i < CHAR_N_VALUES; i++) { for (i = 0; i < CHAR_N_VALUES; i++) {
saibi += avec->pop[i] * bvec->pop[i]; saibi += ref[i] * key[i];
sai2 += avec->pop[i] * avec->pop[i]; sai2 += ref[i] * ref[i];
sbi2 += bvec->pop[i] * bvec->pop[i]; sbi2 += key[i] * key[i];
} }
return saibi / (sqrt(sai2) * sqrt(sbi2)); return saibi / (sqrt(sai2) * sqrt(sbi2));
} }
...@@ -123,9 +109,9 @@ strcossim(struct cosvec *avec, struct cosvec *bvec) { ...@@ -123,9 +109,9 @@ strcossim(struct cosvec *avec, struct cosvec *bvec) {
/* Low-cost filter functions */ /* Low-cost filter functions */
static inline bool static inline bool
should_grp_score_cos(const struct strgrp *const ctx, struct cosvec *const avec, should_grp_score_cos(const struct strgrp *const ctx,
struct cosvec *const bvec) { struct strgrp_grp *const grp, const char *const str) {
return ctx->threshold <= strcossim(avec, bvec); return ctx->threshold <= strcossim(ctx->pop, grp->pop);
} }
static inline bool static inline bool
...@@ -248,6 +234,7 @@ add_grp(struct strgrp *const ctx, const char *const str, ...@@ -248,6 +234,7 @@ add_grp(struct strgrp *const ctx, const char *const str,
if (!b) { if (!b) {
return NULL; return NULL;
} }
memcpy(b->pop, ctx->pop, sizeof(ctx->pop));
darray_push(ctx->grps, b); darray_push(ctx->grps, b);
ctx->n_grps++; ctx->n_grps++;
if (ctx->scores) { if (ctx->scores) {
...@@ -269,8 +256,6 @@ strgrp_new(const double threshold) { ...@@ -269,8 +256,6 @@ strgrp_new(const double threshold) {
ctx->threshold = threshold; ctx->threshold = threshold;
stringmap_init(ctx->known, NULL); stringmap_init(ctx->known, NULL);
// n threads compare strings // n threads compare strings
ctx->n_cosvecs = 2 * omp_get_max_threads();
ctx->cosvecs = tal_arrz(ctx, struct cosvec, ctx->n_cosvecs);
darray_init(ctx->grps); darray_init(ctx->grps);
return ctx; return ctx;
} }
...@@ -281,18 +266,12 @@ cache(struct strgrp *const ctx, struct strgrp_grp *const grp, ...@@ -281,18 +266,12 @@ cache(struct strgrp *const ctx, struct strgrp_grp *const grp,
*(stringmap_enter(ctx->known, str)) = grp; *(stringmap_enter(ctx->known, str)) = grp;
} }
static inline struct cosvec *
tl_avec(struct strgrp *ctx) {
return &ctx->cosvecs[2 * omp_get_thread_num()];
}
static inline struct cosvec *
tl_bvec(struct strgrp *ctx) {
return &ctx->cosvecs[2 * omp_get_thread_num() + 1];
}
static struct strgrp_grp * static struct strgrp_grp *
grp_for(struct strgrp *const ctx, const char *const str) { grp_for(struct strgrp *const ctx, const char *const str) {
// Ensure ctx->pop is always populated. Returning null here indicates a new
// group should be created, at which point add_grp() copies ctx->pop into
// the new group's struct.
strpopcnt(str, ctx->pop);
if (!ctx->n_grps) { if (!ctx->n_grps) {
return NULL; return NULL;
} }
...@@ -308,17 +287,13 @@ grp_for(struct strgrp *const ctx, const char *const str) { ...@@ -308,17 +287,13 @@ grp_for(struct strgrp *const ctx, const char *const str) {
#pragma omp parallel for schedule(dynamic) #pragma omp parallel for schedule(dynamic)
#endif #endif
for (i = 0; i < ctx->n_grps; i++) { for (i = 0; i < ctx->n_grps; i++) {
ctx->scores[i].grp = darray_item(ctx->grps, i); struct strgrp_grp *grp = darray_item(ctx->grps, i);
if (should_grp_score_len(ctx, ctx->scores[i].grp, str)) { ctx->scores[i].grp = grp;
tl_avec(ctx)->str = ctx->scores[i].grp->key; ctx->scores[i].score = 0;
tl_bvec(ctx)->str = str; if (should_grp_score_len(ctx, grp, str)) {
if (should_grp_score_cos(ctx, tl_avec(ctx), tl_bvec(ctx))) { if (should_grp_score_cos(ctx, grp, str)) {
ctx->scores[i].score = grp_score(ctx->scores[i].grp, str); ctx->scores[i].score = grp_score(grp, str);
} else {
ctx->scores[i].score = 0;
} }
} else {
ctx->scores[i].score = 0;
} }
} }
struct grp_score *max = NULL; struct grp_score *max = NULL;
...@@ -339,6 +314,9 @@ const struct strgrp_grp * ...@@ -339,6 +314,9 @@ const struct strgrp_grp *
strgrp_add(struct strgrp *const ctx, const char *const str, strgrp_add(struct strgrp *const ctx, const char *const str,
void *const data) { void *const data) {
bool inserted = false; bool inserted = false;
// grp_for() populates the ctx->pop memory. add_grp() copies this memory
// into the strgrp_grp that it creates. It's assumed the ctx->pop memory
// has not been modified between the grp_for() and add_grp() calls.
struct strgrp_grp *pick = grp_for(ctx, str); struct strgrp_grp *pick = grp_for(ctx, str);
if (pick) { if (pick) {
inserted = add_item(pick, str, data); inserted = add_item(pick, str, data);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment