[U-Boot] [Patch] Fix hash table deletion to prevent lost entries
Peter Barada
peter.barada at logicpd.com
Wed Jan 19 17:32:08 CET 2011
>> The hash delete code is in error; instead of just removing the deleted
>> key, it should instead allocate a new hashtable, hash all the keys into
>> the new table except for the deleted key and then reclaim the old table
>> (and deleted key).
> Can you please come up with a patch?
Hashtable delete is broken since freeing entry could break collision
chain for other entries. Instead free key/data pair and mark entry as
deleted (via
negative used value) and then skip deleted entries while probing.
Break up hsearch_r into separate henter_r/hfind_r functions for
readability.
Recycle first deleted entry in probe chain when adding entry to table.
Factor primary/secondary hash calculations into functions and use
in henter_r/hfind_r/hdelete_r.
---
include/search.h | 16 ++-
lib/hashtable.c | 382
++++++++++++++++++++++++++++++++---------------------
2 files changed, 241 insertions(+), 157 deletions(-)
diff --git a/include/search.h b/include/search.h
index a7c1293..4466731 100644
--- a/include/search.h
+++ b/include/search.h
@@ -66,12 +66,16 @@ extern int hcreate_r(size_t __nel, struct
hsearch_data *__htab);
extern void hdestroy_r(struct hsearch_data *__htab);
/*
- * Search for entry matching ITEM.key in internal hash table. If
- * ACTION is `FIND' return found entry or signal error by returning
- * NULL. If ACTION is `ENTER' replace existing data (if any) with
- * ITEM.data.
- * */
-extern int hsearch_r(ENTRY __item, ACTION __action, ENTRY ** __retval,
+ * Search for entry matching ITEM.key in internal hash table. Return
+ * found entry or signal error by returning NULL.
+ */
+extern int hfind_r(ENTRY __item, ENTRY ** __retval,
+ struct hsearch_data *__htab);
+/*
+ * Add entry matching ITEM.key in internal hash table. Replace
+ * existing data (if any) with ITEM.data.
+ */
+extern int henter_r(ENTRY __item, ENTRY ** __retval,
struct hsearch_data *__htab);
/*
diff --git a/lib/hashtable.c b/lib/hashtable.c
index 9f069c0..7b5153a 100644
--- a/lib/hashtable.c
+++ b/lib/hashtable.c
@@ -65,7 +65,7 @@
* which describes the current status.
*/
typedef struct _ENTRY {
- unsigned int used;
+ int used;
ENTRY entry;
} _ENTRY;
@@ -152,7 +152,7 @@ void hdestroy_r(struct hsearch_data *htab)
/* free used memory */
for (i = 1; i <= htab->size; ++i) {
- if (htab->table[i].used) {
+ if (htab->table[i].used > 0) {
ENTRY *ep = &htab->table[i].entry;
free(ep->key);
@@ -165,77 +165,20 @@ void hdestroy_r(struct hsearch_data *htab)
htab->table = NULL;
}
-/*
- * hsearch()
- */
-
-/*
- * This is the search function. It uses double hashing with open
addressing.
- * The argument item.key has to be a pointer to an zero terminated, most
- * probably strings of chars. The function for generating a number of the
- * strings is simple but fast. It can be replaced by a more complex
function
- * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
- *
- * We use an trick to speed up the lookup. The table is created by hcreate
- * with one more element available. This enables us to use the index zero
- * special. This index will never be used because we store the first hash
- * index in the field used where zero means not used. Every other value
- * means used. The used field can be used as a first fast comparison for
- * equality of the stored and the parameter value. This helps to prevent
- * unnecessary expensive calls of strcmp.
- *
- * This implementation differs from the standard library version of
- * this function in a number of ways:
- *
- * - While the standard version does not make any assumptions about
- * the type of the stored data objects at all, this implementation
- * works with NUL terminated strings only.
- * - Instead of storing just pointers to the original objects, we
- * create local copies so the caller does not need to care about the
- * data any more.
- * - The standard implementation does not provide a way to update an
- * existing entry. This version will create a new entry or update an
- * existing one when both "action == ENTER" and "item.data != NULL".
- * - Instead of returning 1 on success, we return the index into the
- * internal hash table, which is also guaranteed to be positive.
- * This allows us direct access to the found hash table slot for
- * example for functions like hdelete().
- */
-
-int hmatch_r(const char *match, int last_idx, ENTRY ** retval,
- struct hsearch_data *htab)
-{
- unsigned int idx;
- size_t key_len = strlen(match);
-
- for (idx = last_idx + 1; idx < htab->size; ++idx) {
- if (!htab->table[idx].used)
- continue;
- if (!strncmp(match, htab->table[idx].entry.key, key_len)) {
- *retval = &htab->table[idx].entry;
- return idx;
- }
- }
-
- __set_errno(ESRCH);
- *retval = NULL;
- return 0;
-}
-int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
- struct hsearch_data *htab)
+/* Primary hash function */
+int hash_primary(ENTRY *item, struct hsearch_data *htab)
{
+ int len = strlen(item->key);
unsigned int hval;
unsigned int count;
- unsigned int len = strlen(item.key);
- unsigned int idx;
- /* Compute an value for the given string. Perhaps use a better
method. */
+ /* Compute a value for the given string. Perhaps use a better
method. */
hval = len;
count = len;
while (count-- > 0) {
hval <<= 4;
- hval += item.key[count];
+ hval += item->key[count];
}
/*
@@ -246,20 +189,95 @@ int hsearch_r(ENTRY item, ACTION action, ENTRY **
retval,
if (hval == 0)
++hval;
+ return hval;
+}
+
+/* Secondary hash function */
+int hash_secondary(unsigned int hval, struct hsearch_data *htab)
+{
+ unsigned int hval2;
+
+ /*
+ * Second hash function:
+ * as suggested in [Knuth]
+ */
+ hval2 = 1 + hval % (htab->size - 2);
+ return hval2;
+}
+
+/*
+ * Find an entry in the hashtable. Skip negative used counts; these mark
+ * deleted entries.
+ */
+int hfind_r(ENTRY item, ENTRY ** retval, struct hsearch_data *htab)
+{
+ unsigned int hval;
+ unsigned int idx;
+
/* The first index tried. */
- idx = hval;
+ idx = hval = hash_primary(&item, htab);
+
+ debug("hash(%s) = %d/%d; used %d\n", item.key, idx, htab->size,
htab->table[idx].used);
+
+ do {
+ unsigned int hval2;
+
+ if (htab->table[idx].used == hval
+ && strcmp(item.key, htab->table[idx].entry.key) == 0) {
+ /* return found entry */
+ *retval = &htab->table[idx].entry;
+ return idx;
+ }
+
+ if (!htab->table[idx].used) {
+ /* end of chain, return empty */
+ __set_errno(ESRCH);
+ *retval = NULL;
+ return 0;
+ }
+
+ hval2 = hash_secondary(hval, htab);
- if (htab->table[idx].used) {
/*
- * Further action might be required according to the
- * action value.
+ * Because SIZE is prime this guarantees to
+ * step through all available indices.
*/
- unsigned hval2;
+ if (idx <= hval2)
+ idx = htab->size + idx - hval2;
+ else
+ idx -= hval2;
+
+ /* If we visited all entries leave the loop */
+ } while (idx != hval);
+ __set_errno(ESRCH);
+ *retval = NULL;
+ return 0;
+}
+
+/*
+ * Add/update an entry in the table; search the whole chain
+ * looking for the entry. Remember first index of deleted entry and
+ * use if found to hold new entry otherwise use empty entry at end of
+ * probe chain
+ */
+int henter_r(ENTRY item, ENTRY ** retval, struct hsearch_data *htab)
+{
+ unsigned int hval;
+ unsigned int idx;
+ int deleted_entry = 0;
+
+ /* The first index tried. */
+ idx = hval = hash_primary(&item, htab);
+
+ debug("hash(%s) = %d/%d; used %d\n", item.key, idx, htab->size,
htab->table[idx].used);
+
+ do {
+ unsigned int hval2;
if (htab->table[idx].used == hval
- && strcmp(item.key, htab->table[idx].entry.key) == 0) {
+ && strcmp(item.key, htab->table[idx].entry.key) == 0) {
/* Overwrite existing value? */
- if ((action == ENTER) && (item.data != NULL)) {
+ if (item.data != NULL) {
free(htab->table[idx].entry.data);
htab->table[idx].entry.data =
strdup(item.data);
@@ -274,82 +292,122 @@ int hsearch_r(ENTRY item, ACTION action, ENTRY **
retval,
return idx;
}
- /*
- * Second hash function:
- * as suggested in [Knuth]
- */
- hval2 = 1 + hval % (htab->size - 2);
-
- do {
+ if (htab->table[idx].used < 0) {
+ /* Remember deleted entry to use for inserution */
+ if (!deleted_entry)
+ deleted_entry = idx + 1;
+ } else if (!htab->table[idx].used) {
/*
- * Because SIZE is prime this guarantees to
- * step through all available indices.
+ * If table is full and another entry should be
+ * entered return with error.
*/
- if (idx <= hval2)
- idx = htab->size + idx - hval2;
- else
- idx -= hval2;
+ if (htab->filled == htab->size) {
+ __set_errno(ENOMEM);
+ *retval = NULL;
+ return 0;
+ }
/*
- * If we visited all entries leave the loop
- * unsuccessfully.
+ * Create new entry; recyle deleted entry if found
+ * create copies of item.key and item.data
*/
- if (idx == hval)
- break;
-
- /* If entry is found use it. */
- if ((htab->table[idx].used == hval)
- && strcmp(item.key, htab->table[idx].entry.key) == 0) {
- /* Overwrite existing value? */
- if ((action == ENTER) && (item.data != NULL)) {
- free(htab->table[idx].entry.data);
- htab->table[idx].entry.data =
- strdup(item.data);
- if (!htab->table[idx].entry.data) {
- __set_errno(ENOMEM);
- *retval = NULL;
- return 0;
- }
- }
- /* return found entry */
- *retval = &htab->table[idx].entry;
- return idx;
+ if (deleted_entry)
+ idx = deleted_entry - 1;
+
+ htab->table[idx].used = hval;
+ htab->table[idx].entry.key = strdup(item.key);
+ htab->table[idx].entry.data = strdup(item.data);
+ if (!htab->table[idx].entry.key ||
+ !htab->table[idx].entry.data) {
+ __set_errno(ENOMEM);
+ *retval = NULL;
+ return 0;
}
- }
- while (htab->table[idx].used);
- }
- /* An empty bucket has been found. */
- if (action == ENTER) {
- /*
- * If table is full and another entry should be
- * entered return with error.
- */
- if (htab->filled == htab->size) {
- __set_errno(ENOMEM);
- *retval = NULL;
- return 0;
+ ++htab->filled;
+
+ /* return new entry */
+ *retval = &htab->table[idx].entry;
+ return 1;
}
+ hval2 = hash_secondary(hval, htab);
+
/*
- * Create new entry;
- * create copies of item.key and item.data
+ * Because SIZE is prime this guarantees to
+ * step through all available indices.
*/
- htab->table[idx].used = hval;
- htab->table[idx].entry.key = strdup(item.key);
- htab->table[idx].entry.data = strdup(item.data);
- if (!htab->table[idx].entry.key ||
- !htab->table[idx].entry.data) {
- __set_errno(ENOMEM);
- *retval = NULL;
- return 0;
- }
+ if (idx <= hval2)
+ idx = htab->size + idx - hval2;
+ else
+ idx -= hval2;
+
+ /* If we visited all entries leave the loop */
+ } while (idx != hval);
+ __set_errno(ESRCH);
+ *retval = NULL;
+ return 0;
+}
+
+
+/*
+ * hsearch()
+ */
+
+int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
+ struct hsearch_data *htab)
+{
+ if (action == FIND)
+ return hfind_r(item, retval, htab);
+ else
+ return henter_r(item, retval, htab);
+}
+
+/*
+ * This is the search function. It uses double hashing with open
addressing.
+ * The argument item.key has to be a pointer to an zero terminated, most
+ * probably strings of chars. The function for generating a number of the
+ * strings is simple but fast. It can be replaced by a more complex
function
+ * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
+ *
+ * We use an trick to speed up the lookup. The table is created by hcreate
+ * with one more element available. This enables us to use the index zero
+ * special. This index will never be used because we store the first hash
+ * index in the field used where zero means not used. Every other
non-negative
+ * value means used and a negative value means a deleted entry. The
used field
+ * can be used as a first fast comparison for equality of the stored
and the
+ * parameter value. This helps to prevent unnecessary expensive calls
of strcmp.
+ *
+ * This implementation differs from the standard library version of
+ * this function in a number of ways:
+ *
+ * - While the standard version does not make any assumptions about
+ * the type of the stored data objects at all, this implementation
+ * works with NUL terminated strings only.
+ * - Instead of storing just pointers to the original objects, we
+ * create local copies so the caller does not need to care about the
+ * data any more.
+ * - The standard implementation does not provide a way to update an
+ * existing entry. This version will create a new entry or update an
+ * existing one when both "action == ENTER" and "item.data != NULL".
+ * - Instead of returning 1 on success, we return the index into the
+ * internal hash table, which is also guaranteed to be positive.
+ * This allows us direct access to the found hash table slot.
+ */
- ++htab->filled;
+int hmatch_r(const char *match, int last_idx, ENTRY ** retval,
+ struct hsearch_data *htab)
+{
+ unsigned int idx;
+ size_t key_len = strlen(match);
- /* return new entry */
- *retval = &htab->table[idx].entry;
- return 1;
+ for (idx = last_idx + 1; idx < htab->size; ++idx) {
+ if (htab->table[idx].used <= 0)
+ continue;
+ if (!strncmp(match, htab->table[idx].entry.key, key_len)) {
+ *retval = &htab->table[idx].entry;
+ return idx;
+ }
}
__set_errno(ESRCH);
@@ -357,7 +415,6 @@ int hsearch_r(ENTRY item, ACTION action, ENTRY **
retval,
return 0;
}
-
/*
* hdelete()
*/
@@ -365,33 +422,56 @@ int hsearch_r(ENTRY item, ACTION action, ENTRY **
retval,
/*
* The standard implementation of hsearch(3) does not provide any way
* to delete any entries from the hash table. We extend the code to
- * do that.
+ * do that. This is done by storing a negative value in used.
*/
int hdelete_r(const char *key, struct hsearch_data *htab)
{
- ENTRY e, *ep;
- int idx;
+ ENTRY e;
+ unsigned int hval;
+ unsigned int idx;
debug("hdelete: DELETE key \"%s\"\n", key);
e.key = (char *)key;
- if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
- __set_errno(ESRCH);
- return 0; /* not found */
- }
+ idx = hval = hash_primary(&e, htab);
+ do {
+ unsigned int hval2;
- /* free used ENTRY */
- debug("hdelete: DELETING key \"%s\"\n", key);
+ if (htab->table[idx].used == hval
+ && strcmp(e.key, htab->table[idx].entry.key) == 0) {
+ /* Delete key/data pair */
+ free(htab->table[idx].entry.key);
+ free(htab->table[idx].entry.data);
+ /* Mark entry as deleted */
+ htab->table[idx].used = -1;
+ --htab->filled;
+ return 1;
+ }
- free(ep->key);
- free(ep->data);
- htab->table[idx].used = 0;
+ if (!htab->table[idx].used) {
+ /* end of chain, return empty */
+ __set_errno(ESRCH);
+ return 0;
+ }
- --htab->filled;
+ hval2 = hash_secondary(hval, htab);
- return 1;
+ /*
+ * Because SIZE is prime this guarantees to
+ * step through all available indices.
+ */
+ if (idx <= hval2)
+ idx = htab->size + idx - hval2;
+ else
+ idx -= hval2;
+
+ /* If we visited all entries leave the loop */
+ } while (idx != hval);
+
+ __set_errno(ESRCH);
+ return 0;
}
/*
@@ -467,7 +547,7 @@ ssize_t hexport_r(struct hsearch_data *htab, const
char sep,
*/
for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
- if (htab->table[i].used) {
+ if (htab->table[i].used > 0) {
ENTRY *ep = &htab->table[i].entry;
list[n++] = ep;
@@ -703,7 +783,7 @@ int himport_r(struct hsearch_data *htab,
e.key = name;
e.data = value;
- hsearch_r(e, ENTER, &rv, htab);
+ henter_r(e, &rv, htab);
if (rv == NULL) {
printf("himport_r: can't insert \"%s=%s\" into hash table\n",
name, value);
--
1.7.0.4
--
Peter Barada
peter.barada at logicpd.com
More information about the U-Boot
mailing list