[Mesa-dev] [PATCH 3/3] util: Change util/set to use quadratic probing
Thomas Helland
thomashelland90 at gmail.com
Sat Apr 11 16:25:17 PDT 2015
The same rationale applies here as for the hash table.
Power of two size should give better performance,
and using the algorithm hash = sh + i/2 + i*i/2
should result in only distinct hash values when hitting collisions.
Difference at 95.0% confidence
-7.9505 +/- 2.44011
-5.04357% +/- 1.54794%
V3: Feedback from Eric Anholt
- Don't change load factor and starting size.
V2: Feedback from Connor Abbott
- Don't set initial hash address before potential rehash
- Remove hash_sizes table
- Correct the quadratic hashing algorithm
- Use correct comment style
Feedback from Jason Ekstrand
- Use unreachable() to detect if we fail to insert
Signed-off-by: Thomas Helland <thomashelland90 at gmail.com>
---
src/util/set.c | 118 ++++++++++++++++++++-------------------------------------
src/util/set.h | 3 +-
2 files changed, 43 insertions(+), 78 deletions(-)
diff --git a/src/util/set.c b/src/util/set.c
index f01f869..7ff9520 100644
--- a/src/util/set.c
+++ b/src/util/set.c
@@ -32,6 +32,17 @@
* Keith Packard <keithp at keithp.com>
*/
+/**
+ * Implements an open-addressing, quadratic probing hash-set.
+ *
+ * We choose set sizes that's a power of two.
+ * This is computationally less expensive than primes.
+ * As a bonus the size and free space can be calculated instead of looked up.
+ * FNV-1a has good avalanche properties, so collision is not an issue.
+ * These sets are sized to have an extra 10% free to avoid
+ * exponential performance degradation as the set fills.
+ */
+
#include <stdlib.h>
#include <assert.h>
@@ -39,51 +50,9 @@
#include "ralloc.h"
#include "set.h"
-/*
- * From Knuth -- a good choice for hash/rehash values is p, p-2 where
- * p and p-2 are both prime. These tables are sized to have an extra 10%
- * free to avoid exponential performance degradation as the hash table fills
- */
-
uint32_t deleted_key_value;
const void *deleted_key = &deleted_key_value;
-static const struct {
- uint32_t max_entries, size, rehash;
-} hash_sizes[] = {
- { 2, 5, 3 },
- { 4, 7, 5 },
- { 8, 13, 11 },
- { 16, 19, 17 },
- { 32, 43, 41 },
- { 64, 73, 71 },
- { 128, 151, 149 },
- { 256, 283, 281 },
- { 512, 571, 569 },
- { 1024, 1153, 1151 },
- { 2048, 2269, 2267 },
- { 4096, 4519, 4517 },
- { 8192, 9013, 9011 },
- { 16384, 18043, 18041 },
- { 32768, 36109, 36107 },
- { 65536, 72091, 72089 },
- { 131072, 144409, 144407 },
- { 262144, 288361, 288359 },
- { 524288, 576883, 576881 },
- { 1048576, 1153459, 1153457 },
- { 2097152, 2307163, 2307161 },
- { 4194304, 4613893, 4613891 },
- { 8388608, 9227641, 9227639 },
- { 16777216, 18455029, 18455027 },
- { 33554432, 36911011, 36911009 },
- { 67108864, 73819861, 73819859 },
- { 134217728, 147639589, 147639587 },
- { 268435456, 295279081, 295279079 },
- { 536870912, 590559793, 590559791 },
- { 1073741824, 1181116273, 1181116271 },
- { 2147483648ul, 2362232233ul, 2362232231ul }
-};
-
static int
entry_is_free(struct set_entry *entry)
{
@@ -114,10 +83,9 @@ _mesa_set_create(void *mem_ctx,
if (ht == NULL)
return NULL;
- ht->size_index = 0;
- ht->size = hash_sizes[ht->size_index].size;
- ht->rehash = hash_sizes[ht->size_index].rehash;
- ht->max_entries = hash_sizes[ht->size_index].max_entries;
+ ht->size_iteration = 2;
+ ht->size = 1 << ht->size_iteration;
+ ht->max_entries = ht->size * 0.9;
ht->key_hash_function = key_hash_function;
ht->key_equals_function = key_equals_function;
ht->table = rzalloc_array(ht, struct set_entry, ht->size);
@@ -163,12 +131,11 @@ _mesa_set_destroy(struct set *ht, void (*delete_function)(struct set_entry *entr
static struct set_entry *
set_search(const struct set *ht, uint32_t hash, const void *key)
{
- uint32_t hash_address;
+ uint32_t start_hash_address = hash & (ht->size - 1);
+ uint32_t hash_address = start_hash_address;
+ uint32_t quad_hash = 1;
- hash_address = hash % ht->size;
do {
- uint32_t double_hash;
-
struct set_entry *entry = ht->table + hash_address;
if (entry_is_free(entry)) {
@@ -179,10 +146,10 @@ set_search(const struct set *ht, uint32_t hash, const void *key)
}
}
- double_hash = 1 + hash % ht->rehash;
-
- hash_address = (hash_address + double_hash) % ht->size;
- } while (hash_address != hash % ht->size);
+ hash_address = (start_hash_address +
+ (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size - 1);
+ quad_hash++;
+ } while (hash_address != start_hash_address);
return NULL;
}
@@ -207,35 +174,30 @@ static struct set_entry *
set_add(struct set *ht, uint32_t hash, const void *key);
static void
-set_rehash(struct set *ht, unsigned new_size_index)
+set_rehash(struct set *ht, uint32_t new_size_iteration)
{
struct set old_ht;
struct set_entry *table, *entry;
- if (new_size_index >= ARRAY_SIZE(hash_sizes))
+ if (new_size_iteration >= 31)
return;
table = rzalloc_array(ht, struct set_entry,
- hash_sizes[new_size_index].size);
+ 1 << new_size_iteration);
if (table == NULL)
return;
old_ht = *ht;
ht->table = table;
- ht->size_index = new_size_index;
- ht->size = hash_sizes[ht->size_index].size;
- ht->rehash = hash_sizes[ht->size_index].rehash;
- ht->max_entries = hash_sizes[ht->size_index].max_entries;
+ ht->size_iteration = new_size_iteration;
+ ht->size = 1 << new_size_iteration;
+ ht->max_entries = ht->size * 0.7;
ht->entries = 0;
ht->deleted_entries = 0;
- for (entry = old_ht.table;
- entry != old_ht.table + old_ht.size;
- entry++) {
- if (entry_is_present(entry)) {
- set_add(ht, entry->hash, entry->key);
- }
+ set_foreach(&old_ht, entry) {
+ set_add(ht, entry->hash, entry->key);
}
ralloc_free(old_ht.table);
@@ -250,19 +212,21 @@ set_rehash(struct set *ht, unsigned new_size_index)
static struct set_entry *
set_add(struct set *ht, uint32_t hash, const void *key)
{
- uint32_t hash_address;
+ uint32_t start_hash_address, hash_address;
+ uint32_t quad_hash = 1;
struct set_entry *available_entry = NULL;
if (ht->entries >= ht->max_entries) {
- set_rehash(ht, ht->size_index + 1);
+ set_rehash(ht, ht->size_iteration + 1);
} else if (ht->deleted_entries + ht->entries >= ht->max_entries) {
- set_rehash(ht, ht->size_index);
+ set_rehash(ht, ht->size_iteration);
}
- hash_address = hash % ht->size;
+ start_hash_address = hash & (ht->size - 1);
+ hash_address = start_hash_address;
+
do {
struct set_entry *entry = ht->table + hash_address;
- uint32_t double_hash;
if (!entry_is_present(entry)) {
/* Stash the first available entry we find */
@@ -288,10 +252,11 @@ set_add(struct set *ht, uint32_t hash, const void *key)
return entry;
}
- double_hash = 1 + hash % ht->rehash;
+ hash_address = (start_hash_address +
+ (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size - 1);
+ quad_hash++;
+ } while (hash_address != start_hash_address);
- hash_address = (hash_address + double_hash) % ht->size;
- } while (hash_address != hash % ht->size);
if (available_entry) {
if (entry_is_deleted(available_entry))
@@ -305,6 +270,7 @@ set_add(struct set *ht, uint32_t hash, const void *key)
/* We could hit here if a required resize failed. An unchecked-malloc
* application could ignore this result.
*/
+ unreachable("Failed to insert entry in hash set");
return NULL;
}
@@ -368,7 +334,7 @@ _mesa_set_random_entry(struct set *ht,
int (*predicate)(struct set_entry *entry))
{
struct set_entry *entry;
- uint32_t i = rand() % ht->size;
+ uint32_t i = rand() & (ht->size - 1);
if (ht->entries == 0)
return NULL;
diff --git a/src/util/set.h b/src/util/set.h
index 9acd2c2..fa17d3a 100644
--- a/src/util/set.h
+++ b/src/util/set.h
@@ -46,9 +46,8 @@ struct set {
uint32_t (*key_hash_function)(const void *key);
bool (*key_equals_function)(const void *a, const void *b);
uint32_t size;
- uint32_t rehash;
uint32_t max_entries;
- uint32_t size_index;
+ uint32_t size_iteration;
uint32_t entries;
uint32_t deleted_entries;
};
--
2.3.4
More information about the mesa-dev
mailing list