[Mesa-dev] [PATCH 3/3] util: Change util/set to use quadratic probing
Thomas Helland
thomashelland90 at gmail.com
Mon Apr 13 15:22:40 PDT 2015
On 14 Apr 2015 00:05, "Jason Ekstrand" <jason at jlekstrand.net> wrote:
>
> On Sat, Apr 11, 2015 at 4:25 PM, Thomas Helland
> <thomashelland90 at gmail.com> wrote:
> > The same rationale applies here as for the hash table.
> > Power of two size should give better performance,
> > and using the algorithm hash = sh + i/2 + i*i/2
> > should result in only distinct hash values when hitting collisions.
> >
> > Difference at 95.0% confidence
> > -7.9505 +/- 2.44011
> > -5.04357% +/- 1.54794%
> >
> > V3: Feedback from Eric Anholt
> > - Don't change load factor and starting size.
> >
> > V2: Feedback from Connor Abbott
> > - Don't set initial hash address before potential rehash
> > - Remove hash_sizes table
> > - Correct the quadratic hashing algorithm
> > - Use correct comment style
> >
> > Feedback from Jason Ekstrand
> > - Use unreachable() to detect if we fail to insert
> >
> > Signed-off-by: Thomas Helland <thomashelland90 at gmail.com>
> > ---
> > src/util/set.c | 118
++++++++++++++++++++-------------------------------------
> > src/util/set.h | 3 +-
> > 2 files changed, 43 insertions(+), 78 deletions(-)
> >
> > diff --git a/src/util/set.c b/src/util/set.c
> > index f01f869..7ff9520 100644
> > --- a/src/util/set.c
> > +++ b/src/util/set.c
> > @@ -32,6 +32,17 @@
> > * Keith Packard <keithp at keithp.com>
> > */
> >
> > +/**
> > + * Implements an open-addressing, quadratic probing hash-set.
> > + *
> > + * We choose set sizes that's a power of two.
> > + * This is computationally less expensive than primes.
> > + * As a bonus the size and free space can be calculated instead of
looked up.
> > + * FNV-1a has good avalanche properties, so collision is not an issue.
> > + * These sets are sized to have an extra 10% free to avoid
> > + * exponential performance degradation as the set fills.
> > + */
> > +
> > #include <stdlib.h>
> > #include <assert.h>
> >
> > @@ -39,51 +50,9 @@
> > #include "ralloc.h"
> > #include "set.h"
> >
> > -/*
> > - * From Knuth -- a good choice for hash/rehash values is p, p-2 where
> > - * p and p-2 are both prime. These tables are sized to have an extra
10%
> > - * free to avoid exponential performance degradation as the hash table
fills
> > - */
> > -
> > uint32_t deleted_key_value;
> > const void *deleted_key = &deleted_key_value;
> >
> > -static const struct {
> > - uint32_t max_entries, size, rehash;
> > -} hash_sizes[] = {
> > - { 2, 5, 3 },
> > - { 4, 7, 5 },
> > - { 8, 13, 11 },
> > - { 16, 19, 17 },
> > - { 32, 43, 41 },
> > - { 64, 73, 71 },
> > - { 128, 151, 149 },
> > - { 256, 283, 281 },
> > - { 512, 571, 569 },
> > - { 1024, 1153, 1151 },
> > - { 2048, 2269, 2267 },
> > - { 4096, 4519, 4517 },
> > - { 8192, 9013, 9011 },
> > - { 16384, 18043, 18041 },
> > - { 32768, 36109, 36107 },
> > - { 65536, 72091, 72089 },
> > - { 131072, 144409, 144407 },
> > - { 262144, 288361, 288359 },
> > - { 524288, 576883, 576881 },
> > - { 1048576, 1153459, 1153457 },
> > - { 2097152, 2307163, 2307161 },
> > - { 4194304, 4613893, 4613891 },
> > - { 8388608, 9227641, 9227639 },
> > - { 16777216, 18455029, 18455027 },
> > - { 33554432, 36911011, 36911009 },
> > - { 67108864, 73819861, 73819859 },
> > - { 134217728, 147639589, 147639587 },
> > - { 268435456, 295279081, 295279079 },
> > - { 536870912, 590559793, 590559791 },
> > - { 1073741824, 1181116273, 1181116271 },
> > - { 2147483648ul, 2362232233ul, 2362232231ul }
> > -};
> > -
> > static int
> > entry_is_free(struct set_entry *entry)
> > {
> > @@ -114,10 +83,9 @@ _mesa_set_create(void *mem_ctx,
> > if (ht == NULL)
> > return NULL;
> >
> > - ht->size_index = 0;
> > - ht->size = hash_sizes[ht->size_index].size;
> > - ht->rehash = hash_sizes[ht->size_index].rehash;
> > - ht->max_entries = hash_sizes[ht->size_index].max_entries;
> > + ht->size_iteration = 2;
> > + ht->size = 1 << ht->size_iteration;
> > + ht->max_entries = ht->size * 0.9;
> > ht->key_hash_function = key_hash_function;
> > ht->key_equals_function = key_equals_function;
> > ht->table = rzalloc_array(ht, struct set_entry, ht->size);
> > @@ -163,12 +131,11 @@ _mesa_set_destroy(struct set *ht, void
(*delete_function)(struct set_entry *entr
> > static struct set_entry *
> > set_search(const struct set *ht, uint32_t hash, const void *key)
> > {
> > - uint32_t hash_address;
> > + uint32_t start_hash_address = hash & (ht->size - 1);
> > + uint32_t hash_address = start_hash_address;
> > + uint32_t quad_hash = 1;
> >
> > - hash_address = hash % ht->size;
> > do {
> > - uint32_t double_hash;
> > -
> > struct set_entry *entry = ht->table + hash_address;
> >
> > if (entry_is_free(entry)) {
> > @@ -179,10 +146,10 @@ set_search(const struct set *ht, uint32_t hash,
const void *key)
> > }
> > }
> >
> > - double_hash = 1 + hash % ht->rehash;
> > -
> > - hash_address = (hash_address + double_hash) % ht->size;
> > - } while (hash_address != hash % ht->size);
> > + hash_address = (start_hash_address +
> > + (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size
- 1);
> > + quad_hash++;
> > + } while (hash_address != start_hash_address);
> >
> > return NULL;
> > }
> > @@ -207,35 +174,30 @@ static struct set_entry *
> > set_add(struct set *ht, uint32_t hash, const void *key);
> >
> > static void
> > -set_rehash(struct set *ht, unsigned new_size_index)
> > +set_rehash(struct set *ht, uint32_t new_size_iteration)
> > {
> > struct set old_ht;
> > struct set_entry *table, *entry;
> >
> > - if (new_size_index >= ARRAY_SIZE(hash_sizes))
> > + if (new_size_iteration >= 31)
> > return;
> >
> > table = rzalloc_array(ht, struct set_entry,
> > - hash_sizes[new_size_index].size);
> > + 1 << new_size_iteration);
> > if (table == NULL)
> > return;
> >
> > old_ht = *ht;
> >
> > ht->table = table;
> > - ht->size_index = new_size_index;
> > - ht->size = hash_sizes[ht->size_index].size;
> > - ht->rehash = hash_sizes[ht->size_index].rehash;
> > - ht->max_entries = hash_sizes[ht->size_index].max_entries;
> > + ht->size_iteration = new_size_iteration;
> > + ht->size = 1 << new_size_iteration;
> > + ht->max_entries = ht->size * 0.7;
> > ht->entries = 0;
> > ht->deleted_entries = 0;
> >
> > - for (entry = old_ht.table;
> > - entry != old_ht.table + old_ht.size;
> > - entry++) {
> > - if (entry_is_present(entry)) {
> > - set_add(ht, entry->hash, entry->key);
> > - }
> > + set_foreach(&old_ht, entry) {
> > + set_add(ht, entry->hash, entry->key);
> > }
>
> This hunk, while I think it's correct, is unrelated. Please drop it.
> I think I'm ok with the cleanup, but it should be a separate patch if
> you want to do that.
> --Jason
>
Yup, unrelated change it is.
I'll split it in two and post both in one of the coming days.
(This is how it is done in the hash table in current master.
Following the same pattern in the set seemed desirable.)
- Thomas
> > ralloc_free(old_ht.table);
> > @@ -250,19 +212,21 @@ set_rehash(struct set *ht, unsigned
new_size_index)
> > static struct set_entry *
> > set_add(struct set *ht, uint32_t hash, const void *key)
> > {
> > - uint32_t hash_address;
> > + uint32_t start_hash_address, hash_address;
> > + uint32_t quad_hash = 1;
> > struct set_entry *available_entry = NULL;
> >
> > if (ht->entries >= ht->max_entries) {
> > - set_rehash(ht, ht->size_index + 1);
> > + set_rehash(ht, ht->size_iteration + 1);
> > } else if (ht->deleted_entries + ht->entries >= ht->max_entries) {
> > - set_rehash(ht, ht->size_index);
> > + set_rehash(ht, ht->size_iteration);
> > }
> >
> > - hash_address = hash % ht->size;
> > + start_hash_address = hash & (ht->size - 1);
> > + hash_address = start_hash_address;
> > +
> > do {
> > struct set_entry *entry = ht->table + hash_address;
> > - uint32_t double_hash;
> >
> > if (!entry_is_present(entry)) {
> > /* Stash the first available entry we find */
> > @@ -288,10 +252,11 @@ set_add(struct set *ht, uint32_t hash, const void
*key)
> > return entry;
> > }
> >
> > - double_hash = 1 + hash % ht->rehash;
> > + hash_address = (start_hash_address +
> > + (quad_hash + (quad_hash * quad_hash)) / 2) & (ht->size
- 1);
> > + quad_hash++;
> > + } while (hash_address != start_hash_address);
> >
> > - hash_address = (hash_address + double_hash) % ht->size;
> > - } while (hash_address != hash % ht->size);
> >
> > if (available_entry) {
> > if (entry_is_deleted(available_entry))
> > @@ -305,6 +270,7 @@ set_add(struct set *ht, uint32_t hash, const void
*key)
> > /* We could hit here if a required resize failed. An
unchecked-malloc
> > * application could ignore this result.
> > */
> > + unreachable("Failed to insert entry in hash set");
> > return NULL;
> > }
> >
> > @@ -368,7 +334,7 @@ _mesa_set_random_entry(struct set *ht,
> > int (*predicate)(struct set_entry *entry))
> > {
> > struct set_entry *entry;
> > - uint32_t i = rand() % ht->size;
> > + uint32_t i = rand() & (ht->size - 1);
> >
> > if (ht->entries == 0)
> > return NULL;
> > diff --git a/src/util/set.h b/src/util/set.h
> > index 9acd2c2..fa17d3a 100644
> > --- a/src/util/set.h
> > +++ b/src/util/set.h
> > @@ -46,9 +46,8 @@ struct set {
> > uint32_t (*key_hash_function)(const void *key);
> > bool (*key_equals_function)(const void *a, const void *b);
> > uint32_t size;
> > - uint32_t rehash;
> > uint32_t max_entries;
> > - uint32_t size_index;
> > + uint32_t size_iteration;
> > uint32_t entries;
> > uint32_t deleted_entries;
> > };
> > --
> > 2.3.4
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150414/a380a2c2/attachment-0001.html>
More information about the mesa-dev
mailing list