--- common/hash.h.old Mon Jul 24 22:16:13 2000 +++ common/hash.h Tue Jul 25 17:07:51 2000 @@ -33,6 +33,11 @@ unsigned int hash_fval_int(const void *key, unsigned int num_buckets); int hash_fcmp_int(const void *key1, const void *key2); +/* Appropriate for void pointers or casted longs, used as keys + directly instead of by reference. */ +unsigned int hash_fval_keyval(const void *vkey, unsigned int num_buckets); +int hash_fcmp_keyval(const void *vkey1, const void *vkey2); + /* General functions: */ struct hash_table *hash_new(hash_val_fn_t fval, hash_cmp_fn_t fcmp); struct hash_table *hash_new_nentries(hash_val_fn_t fval, hash_cmp_fn_t fcmp, --- common/hash.c.old Tue Jul 25 17:15:37 2000 +++ common/hash.c Wed Jul 26 19:27:56 2000 @@ -98,7 +98,9 @@ #include "hash.h" -#define FULL_RATIO 0.75 /* resize when above this */ +#define FULL_RATIO 0.75 /* consider expanding when above this */ +#define MIN_RATIO 0.25 /* shrink when below this */ +#define MIN_BUCKETS 32 /* lowest number of buckets */ enum Bucket_State { BUCKET_UNUSED=0, BUCKET_USED, BUCKET_DELETED }; @@ -147,28 +149,67 @@ h->frozen = 0; } + +/************************************************************************** + A supplied hash function where key is pointer to int. A modified + version of Knuth's multiplication method: since I can't find an + efficient way to take the bucket number from the top of a word, I'm + doing a modulo (from the bottom of the word) instead; and since a + multiplication alone won't make variations in high bits affect lower + bits, I have to do a bit of bit-juggling to avoid problems. --Jed +**************************************************************************/ +#define MAGIC_NUMBER 0x9e3779b9 +/* A Magic Number due to Knuth. Specifically, the golden mean + (sqrt(5/4)-1/2) multiplied by 2^32 and rounded to the nearest odd + number (so as to be relatively prime to powers of 2). */ +unsigned int hash_fval_int(const void *vkey, unsigned int num_buckets) +{ + const unsigned int key = (unsigned int) *(const int*)vkey; + unsigned long result = key * MAGIC_NUMBER; + result &= 0xFFFFFFFF; /* To make results independant of sizeof(long) */ + + /* And this is the bit-juggling, which may or may not do what I want. */ + result -= (result >> 27); /* 27 = 32-floor(log(MIN_BUCKETS)/log(2)) */ + result ^= (result >> 23); /* 23 = 2*27-31 */ + result += (result >> 15); /* 15 = 2*23-31 */ + result &= 0xFFFFFFFF; /* To make results independant of sizeof(long) */ + + return (result % num_buckets); +} + +/************************************************************************** + A supplied function for comparison of pointers to unsigned ints: +**************************************************************************/ +int hash_fcmp_int(const void *vkey1, const void *vkey2) +{ + const int key1 = *(const int*)vkey1; + const int key2 = *(const int*)vkey2; + /* avoid overflow issues: */ + return (key1 < key2) ? -1 : (key1 > key2) ? 1 : 0; +} + + /************************************************************************** A supplied hash function appropriate to nul-terminated strings. - Anyone know a good one? + Sort of a mutated version of the above. **************************************************************************/ -/* plenty of primes, not too small, in random order: */ -static unsigned int coeff[] = { 113, 59, 23, 73, 31, 79, 97, 103, 67, 109, - 71, 89, 53, 37, 101, 41, 29, 43, 13, 61, - 107, 47, 83, 17 }; -#define NCOEFF (sizeof(coeff)/sizeof(coeff[0])) unsigned int hash_fval_string(const void *vkey, unsigned int num_buckets) { const char *key = (const char*)vkey; - unsigned int result=0; - int i; + unsigned long result=0; - for(i=0; *key; i++, key++) { - if (i == NCOEFF) { - i = 0; - } - result *= (unsigned int)5; - result += coeff[i] * (unsigned int)(*key); + for(; *key; key++) { + result += *key; + result *= MAGIC_NUMBER; } + result &= 0xFFFFFFFF; /* To make results independant of sizeof(long) */ + + /* The bit-juggling again, although it's not quite as necessary here. */ + result -= (result >> 27); /* 27 = 32-floor(log(MIN_BUCKETS)/log(2)) */ + result ^= (result >> 23); /* 23 = 2*27-31 */ + result += (result >> 15); /* 15 = 2*23-31 */ + result &= 0xFFFFFFFF; /* To make results independant of sizeof(long) */ + return (result % num_buckets); } @@ -180,43 +221,62 @@ return strcmp((const char*)vkey1, (const char*)vkey2); } + /************************************************************************** - A supplied hash function where key is pointer to int. + A supplied hash function which operates on the key pointer values + themselves; this way a void* (or, with casting, a long) can be used + as a key, and also without having allocated space for it. **************************************************************************/ -unsigned int hash_fval_int(const void *vkey, unsigned int num_buckets) +unsigned int hash_fval_keyval(const void *vkey, unsigned int num_buckets) { - const unsigned int key = (unsigned int) *(const int*)vkey; - return (key * 107U) % num_buckets; + unsigned long result = ((unsigned long)vkey) * MAGIC_NUMBER; + result &= 0xFFFFFFFF; /* To make results independant of sizeof(long) */ + + /* And this is the bit-juggling, which may or may not do what I want. */ + result -= (result >> 27); /* 27 = 32-floor(log(MIN_BUCKETS)/log(2)) */ + result ^= (result >> 23); /* 23 = 2*27-31 */ + result += (result >> 15); /* 15 = 2*23-31 */ + result &= 0xFFFFFFFF; /* To make results independant of sizeof(long) */ + + return (result % num_buckets); } /************************************************************************** - A supplied function for comparison of pointers to unsigned ints: + A supplied function for comparison of the raw void pointers (or, + with casting, longs) **************************************************************************/ -int hash_fcmp_int(const void *vkey1, const void *vkey2) +int hash_fcmp_keyval(const void *vkey1, const void *vkey2) { - const int key1 = *(const int*)vkey1; - const int key2 = *(const int*)vkey2; - /* avoid overflow issues: */ - return (key1 < key2) ? -1 : (key1 > key2) ? 1 : 0; + /* Simplicity itself. */ + return (vkey1 < vkey2) ? -1 : (vkey1 > vkey2) ? 1 : 0; } /************************************************************************** - Calculate a "reasonable" number of buckets for a given number - of entries. Use a power of 2 (not sure how important), with - first power of 2 which is larger than num_entries, then extra + Calculate a "reasonable" number of buckets for a given number of + entries. Use a power of 2 (not sure how important), with first + power of 2 which is larger than or equal to num_entries, then extra factor of 2 for breathing space. Say minimum of 32. +*************************************************************************** + Generalized restrictions on the behavior of this function: + * calc_appropriate_nbuckets(x) >= MIN_BUCKETS + * calc_appropriate_nbuckets(x) * MIN_RATIO < x whenever + x > MIN_BUCKETS * MIN_RATIO + * calc_appropriate_nbuckets(x) * FULL_RATIO > x + This one is more of a recommendation, to ensure enough free space: + * calc_appropriate_nbuckets(x) >= 2 * x **************************************************************************/ static unsigned int calc_appropriate_nbuckets(unsigned int num_entries) { unsigned int pow2; + if (num_entries) --num_entries; /* ...or equal to... */ pow2 = 2; while (num_entries) { num_entries >>= 1; pow2 <<= 1; } - if (pow2 < 32) pow2 = 32; + if (pow2 < MIN_BUCKETS) pow2 = MIN_BUCKETS; return pow2; } @@ -319,12 +379,15 @@ } /************************************************************************** - Call this when an entry might be added: resizes the hash table if - seems like a good idea. Count deleted entries in check because - efficiency may be degraded if there are too many deleted entries. - But not for appropriate new size. (?) + Call this when an entry might be added or deleted: resizes the hash + table if seems like a good idea. Count deleted entries in check for + expansion because efficiency may be degraded if there are too many + deleted entries. But for determining new size, ignore deleted + entries, since they'll be removed by rehashing. **************************************************************************/ -static void hash_maybe_resize(struct hash_table *h) +#define hash_maybe_expand(htab) hash_maybe_resize((htab),1) +#define hash_maybe_shrink(htab) hash_maybe_resize((htab),0) +static void hash_maybe_resize(struct hash_table *h, int expandingp) { unsigned int num_used, limit, new_nbuckets; @@ -332,15 +395,29 @@ return; } num_used = h->num_entries + h->num_deleted; - limit = FULL_RATIO * h->num_buckets; - if (num_used < limit) { - return; + if (expandingp) { + limit = FULL_RATIO * h->num_buckets; + if (num_used < limit) { + return; + } + } else { + if (h->num_buckets <= MIN_BUCKETS) { + return; + } + limit = MIN_RATIO * h->num_buckets; + if (h->num_entries > limit) { + return; + } } - new_nbuckets = calc_appropriate_nbuckets(2*h->num_entries); - freelog(LOG_DEBUG, "Resizing hash table " - "(used %u del %u nbuck %u used %u limit %u new %u)", - h->num_entries, h->num_deleted, h->num_buckets, - num_used, limit, new_nbuckets); + + new_nbuckets = calc_appropriate_nbuckets(h->num_entries); + + freelog(LOG_DEBUG, "%s hash table " + "(entry %u del %u used %u nbuck %u new %u %slimit %u)", + (new_nbucketsnum_buckets) ? "Shrinking" : + (new_nbuckets>h->num_buckets) ? "Expanding" : "Rehashing", + h->num_entries, h->num_deleted, num_used, + h->num_buckets, new_nbuckets, expandingp?"up":"dn", limit); hash_resize_table(h, new_nbuckets); } @@ -404,7 +481,7 @@ struct hash_bucket *bucket; int hash_val; - hash_maybe_resize(h); + hash_maybe_expand(h); hash_val = HASH_VAL(h, key); bucket = internal_lookup(h, key, hash_val); if (bucket->used == BUCKET_USED) { @@ -433,7 +510,7 @@ int hash_val; const void *ret; - hash_maybe_resize(h); + hash_maybe_expand(h); hash_val = HASH_VAL(h, key); bucket = internal_lookup(h, key, hash_val); if (bucket->used == BUCKET_USED) { @@ -459,8 +536,10 @@ **************************************************************************/ void *hash_delete_entry(struct hash_table *h, const void *key) { - struct hash_bucket *bucket = internal_lookup(h, key, HASH_VAL(h,key)); + struct hash_bucket *bucket; + hash_maybe_shrink(h); + bucket = internal_lookup(h, key, HASH_VAL(h,key)); if (bucket->used == BUCKET_USED) { const void *ret = bucket->data; zero_hbucket(bucket);