Commit 7c7ad8cc authored by Andrew Jeffery's avatar Andrew Jeffery Committed by Rusty Russell

strgrp: new module

parent 4ad51447
...@@ -102,6 +102,7 @@ MODS_WITH_SRC := aga \ ...@@ -102,6 +102,7 @@ MODS_WITH_SRC := aga \
str/hex \ str/hex \
stringbuilder \ stringbuilder \
stringmap \ stringmap \
strgrp \
strmap \ strmap \
strset \ strset \
take \ take \
......
../../licenses/LGPL-3
\ No newline at end of file
#include "config.h"
#include <stdio.h>
#include <string.h>
/**
* strgrp - group/cluster similar strings.
*
* strgrp uses the Longest Common Subsequence (LCS) algorithm[1] to cluster
* similar strings. It is governed by a threshold which is compared against
* the computed normalised LCS length for all known groups.
*
* As a coarse and not entirely accurate summary, strgrp takes the following
* steps:
*
* 1. For all known strings, calculate the normalised LCS value against the
* input string
* 2. Find the maximum normalised LCS value and associated group
* 3. If the calculated normalised LCS value exceeds the configured threshold,
* add the input string to the group, otherwise create a new group
*
* The clustering operation is expensive; LCS on its own is computationally
* O(mn) on its two input strings and optimally requires O(min(m,n)) memory. In
* general each input string should be compared against all known strings,
* giving O(n^2) behaviour of the clustering algorithm on top of the O(mn) LCS
* similarity measurement.
*
* strgrp tries to battle this complexity on several fronts:
*
* 1. Coarse reduction of the required comparisons
* 1a. Each group has a 'key', which is the string that triggered the creation
* of the group
* 1b. Input strings are only compared against group keys rather than all known
* strings, reducing the complexity to the current number of groups rather
* than all known strings. Note due the pathological case where the number
* of groups is equal to the number of known strings the algorithm still
* has O(n^2) computational complexity
*
* 2. Elimination of LCS computations that will never breach the configured
* threshold
* 2a. This can be measured from the length of the compared strings
* 2b. This avoids invoking the O(mn) behaviour of LCS
*
* 3. Caching of input strings and their associated group
* 3a. By incurring the cost of a map's string hash function we may eliminate
* all calls to the LCS function for exact matches, potentially reducing
* the insertion to a constant-time operation.
*
* 4. Whilst the data dependencies of LCS prevent internally parallel
* implementations, LCS as a function can be applied in parallel. The code
* uses OpenMP to automatically and concurrently distribute scoring of the
* input string against group keys across threads.
*
* [1] https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
*
* License: LGPL
* Author: Andrew Jeffery <andrew@aj.id.au>
*
* Example:
* #define BUF_SIZE 512
* FILE *const f = fdopen(0, "r");
* char *buf = malloc(BUF_SIZE);
* struct strgrp *ctx = strgrp_new(0.85);
* while(fgets(buf, BUF_SIZE, f)) {
* buf[strcspn(buf, "\r\n")] = '\0';
* if (!strgrp_add(ctx, buf, NULL)) {
* printf("Failed to classify %s\n", buf);
* }
* }
*
* // Re-implement something similar to strgrp_print() via API as an
* // example
* struct strgrp_iter *iter = strgrp_iter_new(ctx);
* const struct strgrp_grp *grp;
* while(NULL != (grp = strgrp_iter_next(iter))) {
* printf("%s\n", strgrp_grp_key(grp));
* struct strgrp_grp_iter *grp_iter = strgrp_grp_iter_new(grp);
* const struct strgrp_item *item;
* while(NULL != (item = strgrp_grp_iter_next(grp_iter))) {
* printf("\t%s\n", strgrp_item_key(item));
* }
* strgrp_grp_iter_free(grp_iter);
* }
* strgrp_iter_free(iter);
* strgrp_free(ctx);
* free(buf);
* fclose(f);
*/
int main(int argc, char *argv[]) {
if (argc != 2) {
return 1;
}
if (strcmp(argv[1], "depends") == 0) {
printf("ccan/darray\n");
printf("ccan/stringmap\n");
printf("ccan/tal\n");
printf("ccan/tal/str\n");
return 0;
}
if (strcmp(argv[1], "testdepends") == 0) {
printf("ccan/str\n");
return 0;
}
return 1;
}
/*
Group similar strings
Copyright (C) 2014 Andrew Jeffery <andrew@aj.id.au>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "ccan/darray/darray.h"
#include "ccan/stringmap/stringmap.h"
#include "ccan/tal/tal.h"
#include "ccan/tal/str/str.h"
#include "strgrp.h"
typedef darray(struct strgrp_grp *) darray_grp;
typedef darray(struct strgrp_item *) darray_item;
typedef stringmap(struct strgrp_grp *) stringmap_grp;
struct grp_score {
struct strgrp_grp *grp;
double score;
};
typedef darray(struct grp_score *) darray_score;
struct strgrp {
double threshold;
stringmap_grp known;
unsigned int n_grps;
darray_grp grps;
struct grp_score *scores;
};
struct strgrp_iter {
const struct strgrp *ctx;
int i;
};
struct strgrp_grp {
const char *key;
size_t key_len;
darray_item items;
int32_t n_items;
};
struct strgrp_grp_iter {
const struct strgrp_grp *grp;
int i;
};
struct strgrp_item {
const char *key;
void *value;
};
#define ROWS 2
static inline int cmi(int i, int j) {
return ROWS * j + i;
}
static inline int16_t
lcs(const char *const a, const char *const b) {
const int lb = strlen(b);
const int lbp1 = lb + 1;
int16_t *const lookup = calloc(ROWS * lbp1, sizeof(int16_t));
if (!lookup) {
return -1;
}
int ia, ib;
for (ia = (strlen(a) - 1); ia >= 0; ia--) {
const char iav = a[ia];
for (ib = lb - 1; ib >= 0; ib--) {
const char ibv = b[ib];
const int ial = (ia + 1) & 1; // ia last
const int iac = ia & 1; // ia current
const int ibl = ib + 1; // ib last
// don't need separate "ib current" as it's just ib
if (iav == ibv) {
lookup[cmi(iac, ib)] = 1 + lookup[cmi(ial, ibl)];
} else {
const int16_t valb = lookup[cmi(ial, ib)];
const int16_t vabl = lookup[cmi(iac, ibl)];
lookup[cmi(iac, ib)] = (valb > vabl) ? valb : vabl;
}
}
}
int16_t result = lookup[0];
free(lookup);
return result;
}
#undef ROWS
static inline double
nlcs(const char *const a, const char *const b) {
const double lcss = lcs(a, b);
return 2 * lcss / (strlen(a) + strlen(b));
}
static bool
should_grp_score(const struct strgrp *const ctx,
const struct strgrp_grp *const grp, const char *const str) {
const size_t strl = strlen(str);
const size_t keyl = grp->key_len;
double sr = strl / keyl;
if (1 < sr) {
sr = 1 / sr;
}
return ctx->threshold <= sr;
}
static inline double
grp_score(const struct strgrp_grp *const grp, const char *const str) {
return nlcs(grp->key, str);
}
static struct strgrp_item *
new_item(tal_t *const tctx, const char *const str, void *const data) {
struct strgrp_item *i = talz(tctx, struct strgrp_item);
if (!i) {
return NULL;
}
i->key = tal_strdup(i, str);
i->value = data;
return i;
}
static bool
add_item(struct strgrp_grp *const ctx, const char *const str,
void *const data) {
struct strgrp_item *i = new_item(ctx, str, data);
if (!i) {
return false;
}
darray_push(ctx->items, i);
ctx->n_items++;
return true;
}
static void
free_grp(struct strgrp_grp *grp) {
darray_free(grp->items);
}
static struct strgrp_grp *
new_grp(tal_t *const tctx, const char *const str, void *const data) {
struct strgrp_grp *b = talz(tctx, struct strgrp_grp);
if (!b) {
return NULL;
}
b->key = tal_strdup(b, str);
b->key_len = strlen(str);
b->n_items = 0;
darray_init(b->items);
tal_add_destructor(b, free_grp);
if (!add_item(b, str, data)) {
return tal_free(b);
}
return b;
}
static struct strgrp_grp *
add_grp(struct strgrp *const ctx, const char *const str,
void *const data) {
struct strgrp_grp *b = new_grp(ctx, str, data);
if (!b) {
return NULL;
}
darray_push(ctx->grps, b);
ctx->n_grps++;
if (ctx->scores) {
if (!tal_resize(&ctx->scores, ctx->n_grps)) {
return NULL;
}
} else {
ctx->scores = tal_arr(ctx, struct grp_score, ctx->n_grps);
if (!ctx->scores) {
return NULL;
}
}
return b;
}
struct strgrp *
strgrp_new(const double threshold) {
struct strgrp *ctx = talz(NULL, struct strgrp);
ctx->threshold = threshold;
stringmap_init(ctx->known, NULL);
darray_init(ctx->grps);
return ctx;
}
static inline void
cache(struct strgrp *const ctx, struct strgrp_grp *const grp,
const char *const str) {
*(stringmap_enter(ctx->known, str)) = grp;
}
static struct strgrp_grp *
grp_for(struct strgrp *const ctx, const char *const str) {
if (!ctx->n_grps) {
return NULL;
}
{
struct strgrp_grp **const grp = stringmap_lookup(ctx->known, str);
if (grp) {
return *grp;
}
}
int i;
#pragma omp parallel for schedule(dynamic)
for (i = 0; i < ctx->n_grps; i++) {
ctx->scores[i].grp = darray_item(ctx->grps, i);
const bool ss = should_grp_score(ctx, ctx->scores[i].grp, str);
ctx->scores[i].score = ss ? grp_score(ctx->scores[i].grp, str) : 0;
}
struct grp_score *max = NULL;
for (i = 0; i < ctx->n_grps; i++) {
if (!max || ctx->scores[i].score > max->score) {
max = &(ctx->scores[i]);
}
}
return (max && max->score >= ctx->threshold) ? max->grp : NULL;
}
const struct strgrp_grp *
strgrp_grp_for(struct strgrp *const ctx, const char *const str) {
return grp_for(ctx, str);
}
const struct strgrp_grp *
strgrp_add(struct strgrp *const ctx, const char *const str,
void *const data) {
bool inserted = false;
struct strgrp_grp *pick = grp_for(ctx, str);
if (pick) {
inserted = add_item(pick, str, data);
} else {
pick = add_grp(ctx, str, data);
inserted = (NULL != pick);
}
if (inserted) {
assert(NULL != pick);
cache(ctx, pick, str);
}
return pick;
}
struct strgrp_iter *
strgrp_iter_new(struct strgrp *const ctx) {
struct strgrp_iter *iter = talz(ctx, struct strgrp_iter);
if (!iter) {
return NULL;
}
iter->ctx = ctx;
iter->i = 0;
return iter;
}
const struct strgrp_grp *
strgrp_iter_next(struct strgrp_iter *const iter) {
return (iter->ctx->n_grps == iter->i) ?
NULL : darray_item(iter->ctx->grps, iter->i++);
}
void
strgrp_iter_free(struct strgrp_iter *const iter) {
tal_free(iter);
}
struct strgrp_grp_iter *
strgrp_grp_iter_new(const struct strgrp_grp *const grp) {
struct strgrp_grp_iter *iter = talz(grp, struct strgrp_grp_iter);
if (!iter) {
return NULL;
}
iter->grp = grp;
iter->i = 0;
return iter;
}
const struct strgrp_item *
strgrp_grp_iter_next(struct strgrp_grp_iter *const iter) {
return (iter->grp->n_items == iter->i) ?
NULL : darray_item(iter->grp->items, iter->i++);
}
void
strgrp_grp_iter_free(struct strgrp_grp_iter *iter) {
tal_free(iter);
}
const char *
strgrp_grp_key(const struct strgrp_grp *const grp) {
return grp->key;
}
const char *
strgrp_item_key(const struct strgrp_item *const item) {
return item->key;
}
void *
strgrp_item_value(const struct strgrp_item *const item) {
return item->value;
}
void
strgrp_free(struct strgrp *const ctx) {
darray_free(ctx->grps);
stringmap_free(ctx->known);
tal_free(ctx);
}
void
strgrp_free_cb(struct strgrp *const ctx, void (*cb)(void *data)) {
struct strgrp_grp **grp;
struct strgrp_item **item;
darray_foreach(grp, ctx->grps) {
darray_foreach(item, (*grp)->items) {
cb((*item)->value);
}
}
strgrp_free(ctx);
}
#include <stdio.h>
static void
print_item(const struct strgrp_item *item) {
printf("\t%s\n", item->key);
}
static void
print_grp(const struct strgrp_grp *const grp) {
struct strgrp_item **item;
printf("%s:\n", grp->key);
darray_foreach(item, grp->items) {
print_item(*item);
}
printf("\n");
}
void
strgrp_print(const struct strgrp *const ctx) {
struct strgrp_grp **grp;
darray_foreach(grp, ctx->grps) {
print_grp(*grp);
}
}
/* GNU LGPL - see LICENSE file for details */
#ifndef STRGRP_H
#define STRGRP_H
#include <stdbool.h>
struct strgrp;
struct strgrp_iter;
struct strgrp_grp;
struct strgrp_grp_iter;
struct strgrp_item;
/**
* Constructs a new strgrp instance.
* @threshold: A value in [0.0, 1.0] describing the desired similarity of
* strings in a cluster
*
* @return A heap-allocated strgrp instance, or NULL if initialisation fails.
* Ownership of the pointer resides with the caller, which must be freed with
* strgrp_free.
*/
struct strgrp *
strgrp_new(double threshold);
/**
* Find a group which best matches the provided string key.
* @ctx: The strgrp instance to search
* @str: The string key to cluster
*
* The returned group is the group providing the maximum score that is equal to
* or above the configured threshold.
*
* @return A matched group, or NULL if no reasonable group is found. Ownership
* of the returned pointer resides with the strgrp instance and it becomes
* invalid if the strgrp instance is freed.
*/
const struct strgrp_grp *
strgrp_grp_for(struct strgrp *ctx, const char *str);
/**
* Add a string key and arbitrary data value (together, an item) to the
* appropriate group.
* @ctx: The strgrp instance to add the string and data
* @str: The string key used to select a group. The caller retains ownership of
* the pointer and may free or change the memory prior to freeing the
* strgrp instance.
* @data: The data to attach to the group's new entry. The caller retains
* ownership of the pointer, but for correctness its lifetime should be at
* least equal to the lifetime of the strgrp instance.
*
* Returns the group to which the item was added. Ownership of the returned
* pointer resides with the strgrp instance and it becomes invalid if the
* strgrp instance is freed.
*/
const struct strgrp_grp *
strgrp_add(struct strgrp *ctx, const char *str, void *data);
/**
* Create an iterator over the current groups.
* @ctx: The strgrp instance to iterate over
*
* @return An iterator structure, or NULL if a failure occurred. Ownership of
* the returned pointer is with the strgrp instance. The caller should pass the
* pointer strgrp_iter_free when the iterator is exhausted. It is invalid to
* call strgrp_iter_next or strgrp_iter_free on the returned pointer after the
* strgrp instance has been freed.
*/
struct strgrp_iter *
strgrp_iter_new(struct strgrp *ctx);
/**
* Extract the next group from a group iterator
* @iter: The iterator in question
*
* Returns the next group in the iterator or NULL if no further groups exist.
* Ownership of the returned pointer resides with the strgrp instance and
* becomes invalid if the strgrp instance is freed.
*/
const struct strgrp_grp *
strgrp_iter_next(struct strgrp_iter *iter);
/**
* Clean up a group iterator instance
* @iter: The iterator to free
*/
void
strgrp_iter_free(struct strgrp_iter *iter);
/**
* Extract the key for a group.
* @grp: A strgrp_grp pointer
*
* A group's key is the input string that caused the creation of the group.
*
* Returns the group key. Ownership of the pointer resides with the grp
* parameter and by extension the strgrp instance. The caller must duplicate
* the string if the content is required beyond the lifetime of the strgrp
* instance.
*/
const char *
strgrp_grp_key(const struct strgrp_grp *grp);
/**
* Create an iterator over items in the provided group
* @grp: The group whose items to iterate over
*
* @return An iterator structure, or NULL if a failure occurred. Ownership of
* the returned pointer is with the strgrp instance. The caller should pass the
* pointer strgrp_grp_iter_free when the iterator is exhausted. It is invalid
* to call strgrp_grp_iter_next or strgrp_grp_iter_free on the returned pointer
* after the strgrp instance has been freed.
*/
struct strgrp_grp_iter *
strgrp_grp_iter_new(const struct strgrp_grp *grp);
/**
* Extract the next item from a item iterator
* @iter: The iterator in question
*
* Returns the next group in the iterator or NULL if no further items exist.
* Ownership of the returned pointer resides with the strgrp instance and
* becomes invalid if the strgrp instance is freed.
*/
const struct strgrp_item *
strgrp_grp_iter_next(struct strgrp_grp_iter *iter);
/**
* Clean up an item iterator instance
* @iter: The iterator to free
*/
void
strgrp_grp_iter_free(struct strgrp_grp_iter *iter);
/**
* Extract the key for an item
*
* @item: The item in question
*
* The key is the string input string which generated the item in the cluster.
*
* Returns the item key. Ownership of the pointer resides with the item
* parameter and by extension the strgrp instance. The caller must duplicate
* the string if the content is required beyond the lifetime of the strgrp
* instance.
*/
const char *
strgrp_item_key(const struct strgrp_item *item);
/**
* Extract the value for an item
* @item: The item in question
*
* The value is the arbitrary pointer associated with the input string
*
* Returns the item value. The ownership of the pointer does not reside with
* the strgrp instance, but for correctness should exceed the lifetime of the
* strgrp instance.
*/
void *
strgrp_item_value(const struct strgrp_item *item);
/**
* Destroy the strgrp instance
*
* @ctx: The strgrp instance in question
*/
void
strgrp_free(struct strgrp *ctx);
/**
* Destroy the strgrp instance, but not before applying cb() to each item's value element
* @ctx: The strgrp instance in question
* @cb: The callback to execute against each item's value. This might be used
* to free the value data.
*/
void
strgrp_free_cb(struct strgrp *ctx, void (*cb)(void *data));
/**
* Dump the groupings to stdout.
* @ctx: The strgrp instance in question
*/
void
strgrp_print(const struct strgrp *ctx);
#endif
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, 0);
return one_group_from_two(ctx, "a", NULL, "b", NULL);
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, 0.25);
return one_group_from_two(ctx, "abcde", NULL, "zyxab", NULL);
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, 0.50);
return one_group_from_two(ctx, "abcde", NULL, "zyabc", NULL);
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, 0.50);
return one_group_from_two(ctx, "abcde", NULL, "zabcd", NULL);
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, 0.25);
return two_groups_from_two(ctx, "abcde", NULL, "zyxwa", NULL);
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, 0.50);
return two_groups_from_two(ctx, "abcde", NULL, "zyxab", NULL);
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, 0.75);
return two_groups_from_two(ctx, "abcde", NULL, "zyabc", NULL);
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, 1.0);
return one_group_from_two(ctx, "a", NULL, "a", NULL);
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, 1.1);
return one_group_from_two(ctx, "a", NULL, "a", NULL);
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, DEFAULT_SIMILARITY);
return one_group_from_two(ctx, "a", (void *)"1", "a", (void *)"2");
}
#include "../test/helpers.h"
int main(void) {
const char k[] = "a";
struct strgrp *ctx;
const struct strgrp_grp *grp;
struct strgrp_grp_iter *iter;
const struct strgrp_item *item;
plan_tests(5);
create(ctx, DEFAULT_SIMILARITY);
grp = strgrp_add(ctx, k, NULL);
ok1(streq(k, strgrp_grp_key(grp)));
iter = strgrp_grp_iter_new(grp);
item = strgrp_grp_iter_next(iter);
ok1(item);
ok1(streq(k, strgrp_item_key(item)));
ok1(!strgrp_item_value(item));
ok1(!strgrp_grp_iter_next(iter));
strgrp_grp_iter_free(iter);
strgrp_free(ctx);
return exit_status();
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
create(ctx, DEFAULT_SIMILARITY);
return two_groups_from_two(ctx, "a", NULL, "b", NULL);
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
struct strgrp_iter *iter;
plan_tests(1);
create(ctx, DEFAULT_SIMILARITY);
iter = strgrp_iter_new(ctx);
ok1(!strgrp_iter_next(iter));
strgrp_iter_free(iter);
strgrp_free(ctx);
return exit_status();
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
struct strgrp_iter *iter;
plan_tests(2);
create(ctx, DEFAULT_SIMILARITY);
strgrp_add(ctx, "a", NULL);
iter = strgrp_iter_new(ctx);
ok1(strgrp_iter_next(iter));
ok1(!strgrp_iter_next(iter));
strgrp_iter_free(iter);
strgrp_free(ctx);
return exit_status();
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
struct strgrp_iter *iter;
plan_tests(3);
create(ctx, DEFAULT_SIMILARITY);
strgrp_add(ctx, "a", NULL);
strgrp_add(ctx, "b", NULL);
iter = strgrp_iter_new(ctx);
ok1(strgrp_iter_next(iter));
ok1(strgrp_iter_next(iter));
ok1(!strgrp_iter_next(iter));
strgrp_iter_free(iter);
strgrp_free(ctx);
return exit_status();
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
plan_tests(1);
create(ctx, DEFAULT_SIMILARITY);
strgrp_free(ctx);
pass("Successfully initialised strgrp instance");
return exit_status();
}
#include "../test/helpers.h"
static void
data_cb(void *data) {
ok1(streq("1", (char *)data));
}
int main(void) {
struct strgrp *ctx;
plan_tests(1);
create(ctx, DEFAULT_SIMILARITY);
strgrp_add(ctx, "a", (void *)"1");
strgrp_free_cb(ctx, &data_cb);
return exit_status();
}
#include "../test/helpers.h"
int main(void) {
struct strgrp *ctx;
plan_tests(1);
create(ctx, DEFAULT_SIMILARITY);
strgrp_add(ctx, "a", (void *)"1");
strgrp_add(ctx, "a", (void *)"2");
strgrp_add(ctx, "b", (void *)"3");
strgrp_print(ctx);
strgrp_free(ctx);
pass("No errors");
return exit_status();
}
//#include "../test/helpers.h"
#include "helpers.h"
int
one_group_from_two(struct strgrp *ctx,
const char *const k1, void *v1,
const char *const k2, void *v2) {
const struct strgrp_grp *grp1, *grp2;
struct strgrp_grp_iter *iter;
const struct strgrp_item *item;
plan_tests(10);
grp1 = strgrp_add(ctx, k1, v1);
ok1(grp1);
grp2 = strgrp_add(ctx, k2, v2);
ok1(grp2);
ok1(grp1 == grp2);
iter = strgrp_grp_iter_new(grp1);
item = strgrp_grp_iter_next(iter);
ok1(item);
ok1(streq(k1, strgrp_item_key(item)));
ok1(v1 == strgrp_item_value(item));
item = strgrp_grp_iter_next(iter);
ok1(item);
ok1(streq(k2, strgrp_item_key(item)));
ok1(v2 == strgrp_item_value(item));
ok1(!strgrp_grp_iter_next(iter));
strgrp_grp_iter_free(iter);
strgrp_free(ctx);
return exit_status();
}
int
two_groups_from_two(struct strgrp *ctx,
const char *const k1, void *v1,
const char *const k2, void *v2) {
const struct strgrp_grp *grp1, *grp2;
struct strgrp_grp_iter *iter;
const struct strgrp_item *item;
plan_tests(11);
grp1 = strgrp_add(ctx, k1, v1);
ok1(grp1);
grp2 = strgrp_add(ctx, k2, v2);
ok1(grp2);
ok1(grp1 != grp2);
{
iter = strgrp_grp_iter_new(grp1);
item = strgrp_grp_iter_next(iter);
ok1(item);
ok1(streq(k1, strgrp_item_key(item)));
ok1(v1 == strgrp_item_value(item));
ok1(!strgrp_grp_iter_next(iter));
strgrp_grp_iter_free(iter);
}
{
iter = strgrp_grp_iter_new(grp2);
item = strgrp_grp_iter_next(iter);
ok1(item);
ok1(streq(k2, strgrp_item_key(item)));
ok1(v2 == strgrp_item_value(item));
ok1(!strgrp_grp_iter_next(iter));
strgrp_grp_iter_free(iter);
}
strgrp_free(ctx);
return exit_status();
}
#ifndef STRGRP_TEST_HELPERS
#define STRGRP_TEST_HELPERS
#include <stdbool.h>
#include "ccan/str/str.h"
#include "ccan/tap/tap.h"
#include "../strgrp.h"
#define DEFAULT_SIMILARITY 0.85
#define create(dst, similarity) \
do { \
dst = strgrp_new(similarity); \
if (!dst) { \
fail("strgrp_new() returned NULL reference"); \
return 1; \
} \
} while (0)
int
one_group_from_two(struct strgrp *ctx,
const char *const k1, void *v1,
const char *const k2, void *v2);
int
two_groups_from_two(struct strgrp *ctx,
const char *const k1, void *v1,
const char *const k2, void *v2);
#endif /* STRGRP_TEST_HELPERS */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment