[2162] | 1 | /* Copyright (c) 1998 - 2005, Google Inc.
|
---|
| 2 | * All rights reserved.
|
---|
| 3 | *
|
---|
| 4 | * Redistribution and use in source and binary forms, with or without
|
---|
| 5 | * modification, are permitted provided that the following conditions are
|
---|
| 6 | * met:
|
---|
| 7 | *
|
---|
| 8 | * * Redistributions of source code must retain the above copyright
|
---|
| 9 | * notice, this list of conditions and the following disclaimer.
|
---|
| 10 | * * Redistributions in binary form must reproduce the above
|
---|
| 11 | * copyright notice, this list of conditions and the following disclaimer
|
---|
| 12 | * in the documentation and/or other materials provided with the
|
---|
| 13 | * distribution.
|
---|
| 14 | * * Neither the name of Google Inc. nor the names of its
|
---|
| 15 | * contributors may be used to endorse or promote products derived from
|
---|
| 16 | * this software without specific prior written permission.
|
---|
| 17 | *
|
---|
| 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
---|
| 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
---|
| 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
---|
| 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
---|
| 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
---|
| 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
---|
| 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
---|
| 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
---|
| 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
---|
| 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
---|
| 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
---|
| 29 | *
|
---|
| 30 | * ---
|
---|
| 31 | * Author: Craig Silverstein
|
---|
| 32 | *
|
---|
| 33 | * This library is intended to be used for in-memory hash tables,
|
---|
| 34 | * though it provides rudimentary permanent-storage capabilities.
|
---|
| 35 | * It attempts to be fast, portable, and small. The best algorithm
|
---|
| 36 | * to fulfill these goals is an internal probing hashing algorithm,
|
---|
| 37 | * as in Knuth, _Art of Computer Programming_, vol III. Unlike
|
---|
| 38 | * chained (open) hashing, it doesn't require a pointer for every
|
---|
| 39 | * item, yet it is still constant time lookup in practice.
|
---|
| 40 | *
|
---|
| 41 | * Also to save space, we let the contents (both data and key) that
|
---|
| 42 | * you insert be a union: if the key/data is small, we store it
|
---|
| 43 | * directly in the hashtable, otherwise we store a pointer to it.
|
---|
| 44 | * To keep you from having to figure out which, use KEY_PTR and
|
---|
| 45 | * PTR_KEY to convert between the arguments to these functions and
|
---|
| 46 | * a pointer to the real data. For instance:
|
---|
| 47 | * char key[] = "ab", *key2;
|
---|
| 48 | * HTItem *bck; HashTable *ht;
|
---|
| 49 | * HashInsert(ht, PTR_KEY(ht, key), 0);
|
---|
| 50 | * bck = HashFind(ht, PTR_KEY(ht, "ab"));
|
---|
| 51 | * key2 = KEY_PTR(ht, bck->key);
|
---|
| 52 | *
|
---|
| 53 | * There are a rich set of operations supported:
|
---|
| 54 | * AllocateHashTable() -- Allocates a hashtable structure and
|
---|
| 55 | * returns it.
|
---|
| 56 | * cchKey: if it's a positive number, then each key is a
|
---|
| 57 | * fixed-length record of that length. If it's 0,
|
---|
| 58 | * the key is assumed to be a \0-terminated string.
|
---|
| 59 | * fSaveKey: normally, you are responsible for allocating
|
---|
| 60 | * space for the key. If this is 1, we make a
|
---|
| 61 | * copy of the key for you.
|
---|
| 62 | * ClearHashTable() -- Removes everything from a hashtable
|
---|
| 63 | * FreeHashTable() -- Frees memory used by a hashtable
|
---|
| 64 | *
|
---|
| 65 | * HashFind() -- takes a key (use PTR_KEY) and returns the
|
---|
| 66 | * HTItem containing that key, or NULL if the
|
---|
| 67 | * key is not in the hashtable.
|
---|
| 68 | * HashFindLast() -- returns the item found by last HashFind()
|
---|
| 69 | * HashFindOrInsert() -- inserts the key/data pair if the key
|
---|
| 70 | * is not already in the hashtable, or
|
---|
| 71 | * returns the appropraite HTItem if it is.
|
---|
| 72 | * HashFindOrInsertItem() -- takes key/data as an HTItem.
|
---|
| 73 | * HashInsert() -- adds a key/data pair to the hashtable. What
|
---|
| 74 | * it does if the key is already in the table
|
---|
| 75 | * depends on the value of SAMEKEY_OVERWRITE.
|
---|
| 76 | * HashInsertItem() -- takes key/data as an HTItem.
|
---|
| 77 | * HashDelete() -- removes a key/data pair from the hashtable,
|
---|
| 78 | * if it's there. RETURNS 1 if it was there,
|
---|
| 79 | * 0 else.
|
---|
| 80 | * If you use sparse tables and never delete, the full data
|
---|
| 81 | * space is available. Otherwise we steal -2 (maybe -3),
|
---|
| 82 | * so you can't have data fields with those values.
|
---|
| 83 | * HashDeleteLast() -- deletes the item returned by the last Find().
|
---|
| 84 | *
|
---|
| 85 | * HashFirstBucket() -- used to iterate over the buckets in a
|
---|
| 86 | * hashtable. DON'T INSERT OR DELETE WHILE
|
---|
| 87 | * ITERATING! You can't nest iterations.
|
---|
| 88 | * HashNextBucket() -- RETURNS NULL at the end of iterating.
|
---|
| 89 | *
|
---|
| 90 | * HashSetDeltaGoalSize() -- if you're going to insert 1000 items
|
---|
| 91 | * at once, call this fn with arg 1000.
|
---|
| 92 | * It grows the table more intelligently.
|
---|
| 93 | *
|
---|
| 94 | * HashSave() -- saves the hashtable to a file. It saves keys ok,
|
---|
| 95 | * but it doesn't know how to interpret the data field,
|
---|
| 96 | * so if the data field is a pointer to some complex
|
---|
| 97 | * structure, you must send a function that takes a
|
---|
| 98 | * file pointer and a pointer to the structure, and
|
---|
| 99 | * write whatever you want to write. It should return
|
---|
| 100 | * the number of bytes written. If the file is NULL,
|
---|
| 101 | * it should just return the number of bytes it would
|
---|
| 102 | * write, without writing anything.
|
---|
| 103 | * If your data field is just an integer, not a
|
---|
| 104 | * pointer, just send NULL for the function.
|
---|
| 105 | * HashLoad() -- loads a hashtable. It needs a function that takes
|
---|
| 106 | * a file and the size of the structure, and expects
|
---|
| 107 | * you to read in the structure and return a pointer
|
---|
| 108 | * to it. You must do memory allocation, etc. If
|
---|
| 109 | * the data is just a number, send NULL.
|
---|
| 110 | * HashLoadKeys() -- unlike HashLoad(), doesn't load the data off disk
|
---|
| 111 | * until needed. This saves memory, but if you look
|
---|
| 112 | * up the same key a lot, it does a disk access each
|
---|
| 113 | * time.
|
---|
| 114 | * You can't do Insert() or Delete() on hashtables that were loaded
|
---|
| 115 | * from disk.
|
---|
| 116 | *
|
---|
| 117 | * See libchash.h for parameters you can modify. Make sure LOG_WORD_SIZE
|
---|
| 118 | * is defined correctly for your machine! (5 for 32 bit words, 6 for 64).
|
---|
| 119 | */
|
---|
| 120 |
|
---|
| 121 | #include <stdlib.h>
|
---|
| 122 | #include <stdio.h>
|
---|
| 123 | #include <string.h> /* for strcmp, memcmp, etc */
|
---|
| 124 | #include <sys/types.h> /* ULTRIX needs this for in.h */
|
---|
| 125 | #include <netinet/in.h> /* for reading/writing hashtables */
|
---|
| 126 | #include <assert.h>
|
---|
| 127 | #include "libchash.h" /* all the types */
|
---|
| 128 |
|
---|
| 129 | /* if keys are stored directly but cchKey is less than sizeof(ulong), */
|
---|
| 130 | /* this cuts off the bits at the end */
|
---|
| 131 | char grgKeyTruncMask[sizeof(ulong)][sizeof(ulong)];
|
---|
| 132 | #define KEY_TRUNC(ht, key) \
|
---|
| 133 | ( STORES_PTR(ht) || (ht)->cchKey == sizeof(ulong) \
|
---|
| 134 | ? (key) : ((key) & *(ulong *)&(grgKeyTruncMask[(ht)->cchKey][0])) )
|
---|
| 135 |
|
---|
| 136 | /* round num up to a multiple of wordsize. (LOG_WORD_SIZE-3 is in bytes) */
|
---|
| 137 | #define WORD_ROUND(num) ( ((num-1) | ((1<<(LOG_WORD_SIZE-3))-1)) + 1 )
|
---|
| 138 | #define NULL_TERMINATED 0 /* val of cchKey if keys are null-term strings */
|
---|
| 139 |
|
---|
| 140 | /* Useful operations we do to keys: compare them, copy them, free them */
|
---|
| 141 |
|
---|
| 142 | #define KEY_CMP(ht, key1, key2) ( !STORES_PTR(ht) ? (key1) - (key2) : \
|
---|
| 143 | (key1) == (key2) ? 0 : \
|
---|
| 144 | HashKeySize(ht) == NULL_TERMINATED ? \
|
---|
| 145 | strcmp((char *)key1, (char *)key2) :\
|
---|
| 146 | memcmp((void *)key1, (void *)key2, \
|
---|
| 147 | HashKeySize(ht)) )
|
---|
| 148 |
|
---|
| 149 | #define COPY_KEY(ht, keyTo, keyFrom) do \
|
---|
| 150 | if ( !STORES_PTR(ht) || !(ht)->fSaveKeys ) \
|
---|
| 151 | (keyTo) = (keyFrom); /* just copy pointer or info */\
|
---|
| 152 | else if ( (ht)->cchKey == NULL_TERMINATED ) /* copy 0-term.ed str */\
|
---|
| 153 | { \
|
---|
| 154 | (keyTo) = (ulong)HTsmalloc( WORD_ROUND(strlen((char *)(keyFrom))+1) ); \
|
---|
| 155 | strcpy((char *)(keyTo), (char *)(keyFrom)); \
|
---|
| 156 | } \
|
---|
| 157 | else \
|
---|
| 158 | { \
|
---|
| 159 | (keyTo) = (ulong) HTsmalloc( WORD_ROUND((ht)->cchKey) ); \
|
---|
| 160 | memcpy( (char *)(keyTo), (char *)(keyFrom), (ht)->cchKey); \
|
---|
| 161 | } \
|
---|
| 162 | while ( 0 )
|
---|
| 163 |
|
---|
| 164 | #define FREE_KEY(ht, key) do \
|
---|
| 165 | if ( STORES_PTR(ht) && (ht)->fSaveKeys ) \
|
---|
| 166 | if ( (ht)->cchKey == NULL_TERMINATED ) \
|
---|
| 167 | HTfree((char *)(key), WORD_ROUND(strlen((char *)(key))+1)); \
|
---|
| 168 | else \
|
---|
| 169 | HTfree((char *)(key), WORD_ROUND((ht)->cchKey)); \
|
---|
| 170 | while ( 0 )
|
---|
| 171 |
|
---|
| 172 | /* the following are useful for bitmaps */
|
---|
| 173 | /* Format is like this (if 1 word = 4 bits): 3210 7654 ba98 fedc ... */
|
---|
| 174 | typedef ulong HTBitmapPart; /* this has to be unsigned, for >> */
|
---|
| 175 | typedef HTBitmapPart HTBitmap[1<<LOG_BM_WORDS];
|
---|
| 176 | typedef ulong HTOffset; /* something big enough to hold offsets */
|
---|
| 177 |
|
---|
| 178 | #define BM_BYTES(cBuckets) /* we must ensure it's a multiple of word size */\
|
---|
| 179 | ( (((cBuckets) + 8*sizeof(ulong)-1) >> LOG_WORD_SIZE) << (LOG_WORD_SIZE-3) )
|
---|
| 180 | #define MOD2(i, logmod) ( (i) & ((1<<(logmod))-1) )
|
---|
| 181 | #define DIV_NUM_ENTRIES(i) ( (i) >> LOG_WORD_SIZE )
|
---|
| 182 | #define MOD_NUM_ENTRIES(i) ( MOD2(i, LOG_WORD_SIZE) )
|
---|
| 183 | #define MODBIT(i) ( ((ulong)1) << MOD_NUM_ENTRIES(i) )
|
---|
| 184 |
|
---|
| 185 | #define TEST_BITMAP(bm, i) ( (bm)[DIV_NUM_ENTRIES(i)] & MODBIT(i) ? 1 : 0 )
|
---|
| 186 | #define SET_BITMAP(bm, i) (bm)[DIV_NUM_ENTRIES(i)] |= MODBIT(i)
|
---|
| 187 | #define CLEAR_BITMAP(bm, i) (bm)[DIV_NUM_ENTRIES(i)] &= ~MODBIT(i)
|
---|
| 188 |
|
---|
| 189 | /* the following are useful for reading and writing hashtables */
|
---|
| 190 | #define READ_UL(fp, data) \
|
---|
| 191 | do { \
|
---|
| 192 | long _ul; \
|
---|
| 193 | fread(&_ul, sizeof(_ul), 1, (fp)); \
|
---|
| 194 | data = ntohl(_ul); \
|
---|
| 195 | } while (0)
|
---|
| 196 |
|
---|
| 197 | #define WRITE_UL(fp, data) \
|
---|
| 198 | do { \
|
---|
| 199 | long _ul = htonl((long)(data)); \
|
---|
| 200 | fwrite(&_ul, sizeof(_ul), 1, (fp)); \
|
---|
| 201 | } while (0)
|
---|
| 202 |
|
---|
| 203 | /* Moves data from disk to memory if necessary. Note dataRead cannot be *
|
---|
| 204 | * NULL, because then we might as well (and do) load the data into memory */
|
---|
| 205 | #define LOAD_AND_RETURN(ht, loadCommand) /* lC returns an HTItem * */ \
|
---|
| 206 | if ( !(ht)->fpData ) /* data is stored in memory */ \
|
---|
| 207 | return (loadCommand); \
|
---|
| 208 | else /* must read data off of disk */ \
|
---|
| 209 | { \
|
---|
| 210 | int cchData; \
|
---|
| 211 | HTItem *bck; \
|
---|
| 212 | if ( (ht)->bckData.data ) free((char *)(ht)->bckData.data); \
|
---|
| 213 | ht->bckData.data = (ulong)NULL; /* needed if loadCommand fails */ \
|
---|
| 214 | bck = (loadCommand); \
|
---|
| 215 | if ( bck == NULL ) /* loadCommand failed: key not found */ \
|
---|
| 216 | return NULL; \
|
---|
| 217 | else \
|
---|
| 218 | (ht)->bckData = *bck; \
|
---|
| 219 | fseek(ht->fpData, (ht)->bckData.data, SEEK_SET); \
|
---|
| 220 | READ_UL((ht)->fpData, cchData); \
|
---|
| 221 | (ht)->bckData.data = (ulong)(ht)->dataRead((ht)->fpData, cchData); \
|
---|
| 222 | return &((ht)->bckData); \
|
---|
| 223 | }
|
---|
| 224 |
|
---|
| 225 |
|
---|
| 226 | /* ======================================================================== */
|
---|
| 227 | /* UTILITY ROUTINES */
|
---|
| 228 | /* ---------------------- */
|
---|
| 229 |
|
---|
| 230 | /* HTsmalloc() -- safe malloc
|
---|
| 231 | * allocates memory, or crashes if the allocation fails.
|
---|
| 232 | */
|
---|
| 233 | static void *HTsmalloc(unsigned long size)
|
---|
| 234 | {
|
---|
| 235 | void *retval;
|
---|
| 236 |
|
---|
| 237 | if ( size == 0 )
|
---|
| 238 | return NULL;
|
---|
| 239 | retval = (void *)malloc(size);
|
---|
| 240 | if ( !retval )
|
---|
| 241 | {
|
---|
| 242 | fprintf(stderr, "HTsmalloc: Unable to allocate %lu bytes of memory\n",
|
---|
| 243 | size);
|
---|
| 244 | exit(1);
|
---|
| 245 | }
|
---|
| 246 | return retval;
|
---|
| 247 | }
|
---|
| 248 |
|
---|
| 249 | /* HTscalloc() -- safe calloc
|
---|
| 250 | * allocates memory and initializes it to 0, or crashes if
|
---|
| 251 | * the allocation fails.
|
---|
| 252 | */
|
---|
| 253 | static void *HTscalloc(unsigned long size)
|
---|
| 254 | {
|
---|
| 255 | void *retval;
|
---|
| 256 |
|
---|
| 257 | retval = (void *)calloc(size, 1);
|
---|
| 258 | if ( !retval && size > 0 )
|
---|
| 259 | {
|
---|
| 260 | fprintf(stderr, "HTscalloc: Unable to allocate %lu bytes of memory\n",
|
---|
| 261 | size);
|
---|
| 262 | exit(1);
|
---|
| 263 | }
|
---|
| 264 | return retval;
|
---|
| 265 | }
|
---|
| 266 |
|
---|
| 267 | /* HTsrealloc() -- safe calloc
|
---|
| 268 | * grows the amount of memory from a source, or crashes if
|
---|
| 269 | * the allocation fails.
|
---|
| 270 | */
|
---|
| 271 | static void *HTsrealloc(void *ptr, unsigned long new_size, long delta)
|
---|
| 272 | {
|
---|
| 273 | if ( ptr == NULL )
|
---|
| 274 | return HTsmalloc(new_size);
|
---|
| 275 | ptr = realloc(ptr, new_size);
|
---|
| 276 | if ( !ptr && new_size > 0 )
|
---|
| 277 | {
|
---|
| 278 | fprintf(stderr, "HTsrealloc: Unable to reallocate %lu bytes of memory\n",
|
---|
| 279 | new_size);
|
---|
| 280 | exit(1);
|
---|
| 281 | }
|
---|
| 282 | return ptr;
|
---|
| 283 | }
|
---|
| 284 |
|
---|
| 285 | /* HTfree() -- keep track of memory use
|
---|
| 286 | * frees memory using free, but updates count of how much memory
|
---|
| 287 | * is being used.
|
---|
| 288 | */
|
---|
| 289 | static void HTfree(void *ptr, unsigned long size)
|
---|
| 290 | {
|
---|
| 291 | if ( size > 0 ) /* some systems seem to not like freeing NULL */
|
---|
| 292 | free(ptr);
|
---|
| 293 | }
|
---|
| 294 |
|
---|
| 295 | /*************************************************************************\
|
---|
| 296 | | HTcopy() |
|
---|
| 297 | | Sometimes we interpret data as a ulong. But ulongs must be |
|
---|
| 298 | | aligned on some machines, so instead of casting we copy. |
|
---|
| 299 | \*************************************************************************/
|
---|
| 300 |
|
---|
| 301 | unsigned long HTcopy(char *ul)
|
---|
| 302 | {
|
---|
| 303 | unsigned long retval;
|
---|
| 304 |
|
---|
| 305 | memcpy(&retval, ul, sizeof(retval));
|
---|
| 306 | return retval;
|
---|
| 307 | }
|
---|
| 308 |
|
---|
| 309 | /*************************************************************************\
|
---|
| 310 | | HTSetupKeyTrunc() |
|
---|
| 311 | | If keys are stored directly but cchKey is less than |
|
---|
| 312 | | sizeof(ulong), this cuts off the bits at the end. |
|
---|
| 313 | \*************************************************************************/
|
---|
| 314 |
|
---|
| 315 | static void HTSetupKeyTrunc(void)
|
---|
| 316 | {
|
---|
| 317 | int i, j;
|
---|
| 318 |
|
---|
| 319 | for ( i = 0; i < sizeof(unsigned long); i++ )
|
---|
| 320 | for ( j = 0; j < sizeof(unsigned long); j++ )
|
---|
| 321 | grgKeyTruncMask[i][j] = j < i ? 255 : 0; /* chars have 8 bits */
|
---|
| 322 | }
|
---|
| 323 |
|
---|
| 324 |
|
---|
| 325 | /* ======================================================================== */
|
---|
| 326 | /* TABLE ROUTINES */
|
---|
| 327 | /* -------------------- */
|
---|
| 328 |
|
---|
| 329 | /* The idea is that a hashtable with (logically) t buckets is divided
|
---|
| 330 | * into t/M groups of M buckets each. (M is a constant set in
|
---|
| 331 | * LOG_BM_WORDS for efficiency.) Each group is stored sparsely.
|
---|
| 332 | * Thus, inserting into the table causes some array to grow, which is
|
---|
| 333 | * slow but still constant time. Lookup involves doing a
|
---|
| 334 | * logical-position-to-sparse-position lookup, which is also slow but
|
---|
| 335 | * constant time. The larger M is, the slower these operations are
|
---|
| 336 | * but the less overhead (slightly).
|
---|
| 337 | *
|
---|
| 338 | * To store the sparse array, we store a bitmap B, where B[i] = 1 iff
|
---|
| 339 | * bucket i is non-empty. Then to look up bucket i we really look up
|
---|
| 340 | * array[# of 1s before i in B]. This is constant time for fixed M.
|
---|
| 341 | *
|
---|
| 342 | * Terminology: the position of an item in the overall table (from
|
---|
| 343 | * 1 .. t) is called its "location." The logical position in a group
|
---|
| 344 | * (from 1 .. M ) is called its "position." The actual location in
|
---|
| 345 | * the array (from 1 .. # of non-empty buckets in the group) is
|
---|
| 346 | * called its "offset."
|
---|
| 347 | *
|
---|
| 348 | * The following operations are supported:
|
---|
| 349 | * o Allocate an array with t buckets, all empty
|
---|
| 350 | * o Free a array (but not whatever was stored in the buckets)
|
---|
| 351 | * o Tell whether or not a bucket is empty
|
---|
| 352 | * o Return a bucket with a given location
|
---|
| 353 | * o Set the value of a bucket at a given location
|
---|
| 354 | * o Iterate through all the buckets in the array
|
---|
| 355 | * o Read and write an occupancy bitmap to disk
|
---|
| 356 | * o Return how much memory is being allocated by the array structure
|
---|
| 357 | */
|
---|
| 358 |
|
---|
| 359 | #ifndef SparseBucket /* by default, each bucket holds an HTItem */
|
---|
| 360 | #define SparseBucket HTItem
|
---|
| 361 | #endif
|
---|
| 362 |
|
---|
| 363 | typedef struct SparseBin {
|
---|
| 364 | SparseBucket *binSparse;
|
---|
| 365 | HTBitmap bmOccupied; /* bmOccupied[i] is 1 if bucket i has an item */
|
---|
| 366 | short cOccupied; /* size of binSparse; useful for iterators, eg */
|
---|
| 367 | } SparseBin;
|
---|
| 368 |
|
---|
| 369 | typedef struct SparseIterator {
|
---|
| 370 | long posGroup;
|
---|
| 371 | long posOffset;
|
---|
| 372 | SparseBin *binSparse; /* state info, to avoid args for NextBucket() */
|
---|
| 373 | ulong cBuckets;
|
---|
| 374 | } SparseIterator;
|
---|
| 375 |
|
---|
| 376 | #define LOG_LOW_BIN_SIZE ( LOG_BM_WORDS+LOG_WORD_SIZE )
|
---|
| 377 | #define SPARSE_GROUPS(cBuckets) ( (((cBuckets)-1) >> LOG_LOW_BIN_SIZE) + 1 )
|
---|
| 378 |
|
---|
| 379 | /* we need a small function to figure out # of items set in the bm */
|
---|
| 380 | static HTOffset EntriesUpto(HTBitmapPart *bm, int i)
|
---|
| 381 | { /* returns # of set bits in 0..i-1 */
|
---|
| 382 | HTOffset retval = 0;
|
---|
| 383 | static HTOffset rgcBits[256] = /* # of bits set in one char */
|
---|
| 384 | {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
---|
| 385 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
---|
| 386 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
---|
| 387 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
| 388 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
---|
| 389 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
| 390 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
| 391 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
---|
| 392 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
---|
| 393 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
| 394 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
| 395 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
---|
| 396 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
| 397 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
---|
| 398 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
---|
| 399 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
|
---|
| 400 |
|
---|
| 401 | if ( i == 0 ) return 0;
|
---|
| 402 | for ( ; i > sizeof(*bm)*8; i -= sizeof(*bm)*8, bm++ )
|
---|
| 403 | { /* think of it as loop unrolling */
|
---|
| 404 | #if LOG_WORD_SIZE >= 3 /* 1 byte per word, or more */
|
---|
| 405 | retval += rgcBits[*bm & 255]; /* get the low byte */
|
---|
| 406 | #if LOG_WORD_SIZE >= 4 /* at least 2 bytes */
|
---|
| 407 | retval += rgcBits[(*bm >> 8) & 255];
|
---|
| 408 | #if LOG_WORD_SIZE >= 5 /* at least 4 bytes */
|
---|
| 409 | retval += rgcBits[(*bm >> 16) & 255];
|
---|
| 410 | retval += rgcBits[(*bm >> 24) & 255];
|
---|
| 411 | #if LOG_WORD_SIZE >= 6 /* 8 bytes! */
|
---|
| 412 | retval += rgcBits[(*bm >> 32) & 255];
|
---|
| 413 | retval += rgcBits[(*bm >> 40) & 255];
|
---|
| 414 | retval += rgcBits[(*bm >> 48) & 255];
|
---|
| 415 | retval += rgcBits[(*bm >> 56) & 255];
|
---|
| 416 | #if LOG_WORD_SIZE >= 7 /* not a concern for a while... */
|
---|
| 417 | #error Need to rewrite EntriesUpto to support such big words
|
---|
| 418 | #endif /* >8 bytes */
|
---|
| 419 | #endif /* 8 bytes */
|
---|
| 420 | #endif /* 4 bytes */
|
---|
| 421 | #endif /* 2 bytes */
|
---|
| 422 | #endif /* 1 byte */
|
---|
| 423 | }
|
---|
| 424 | switch ( i ) { /* from 0 to 63 */
|
---|
| 425 | case 0:
|
---|
| 426 | return retval;
|
---|
| 427 | #if LOG_WORD_SIZE >= 3 /* 1 byte per word, or more */
|
---|
| 428 | case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
|
---|
| 429 | return (retval + rgcBits[*bm & ((1 << i)-1)]);
|
---|
| 430 | #if LOG_WORD_SIZE >= 4 /* at least 2 bytes */
|
---|
| 431 | case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16:
|
---|
| 432 | return (retval + rgcBits[*bm & 255] +
|
---|
| 433 | rgcBits[(*bm >> 8) & ((1 << (i-8))-1)]);
|
---|
| 434 | #if LOG_WORD_SIZE >= 5 /* at least 4 bytes */
|
---|
| 435 | case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24:
|
---|
| 436 | return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
---|
| 437 | rgcBits[(*bm >> 16) & ((1 << (i-16))-1)]);
|
---|
| 438 | case 25: case 26: case 27: case 28: case 29: case 30: case 31: case 32:
|
---|
| 439 | return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
---|
| 440 | rgcBits[(*bm >> 16) & 255] +
|
---|
| 441 | rgcBits[(*bm >> 24) & ((1 << (i-24))-1)]);
|
---|
| 442 | #if LOG_WORD_SIZE >= 6 /* 8 bytes! */
|
---|
| 443 | case 33: case 34: case 35: case 36: case 37: case 38: case 39: case 40:
|
---|
| 444 | return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
---|
| 445 | rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
|
---|
| 446 | rgcBits[(*bm >> 32) & ((1 << (i-32))-1)]);
|
---|
| 447 | case 41: case 42: case 43: case 44: case 45: case 46: case 47: case 48:
|
---|
| 448 | return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
---|
| 449 | rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
|
---|
| 450 | rgcBits[(*bm >> 32) & 255] +
|
---|
| 451 | rgcBits[(*bm >> 40) & ((1 << (i-40))-1)]);
|
---|
| 452 | case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56:
|
---|
| 453 | return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
---|
| 454 | rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
|
---|
| 455 | rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] +
|
---|
| 456 | rgcBits[(*bm >> 48) & ((1 << (i-48))-1)]);
|
---|
| 457 | case 57: case 58: case 59: case 60: case 61: case 62: case 63: case 64:
|
---|
| 458 | return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
---|
| 459 | rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
|
---|
| 460 | rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] +
|
---|
| 461 | rgcBits[(*bm >> 48) & 255] +
|
---|
| 462 | rgcBits[(*bm >> 56) & ((1 << (i-56))-1)]);
|
---|
| 463 | #endif /* 8 bytes */
|
---|
| 464 | #endif /* 4 bytes */
|
---|
| 465 | #endif /* 2 bytes */
|
---|
| 466 | #endif /* 1 byte */
|
---|
| 467 | }
|
---|
| 468 | assert("" == "word size is too big in EntriesUpto()");
|
---|
| 469 | return -1;
|
---|
| 470 | }
|
---|
| 471 | #define SPARSE_POS_TO_OFFSET(bm, i) ( EntriesUpto(&((bm)[0]), i) )
|
---|
| 472 | #define SPARSE_BUCKET(bin, location) \
|
---|
| 473 | ( (bin)[(location) >> LOG_LOW_BIN_SIZE].binSparse + \
|
---|
| 474 | SPARSE_POS_TO_OFFSET((bin)[(location)>>LOG_LOW_BIN_SIZE].bmOccupied, \
|
---|
| 475 | MOD2(location, LOG_LOW_BIN_SIZE)) )
|
---|
| 476 |
|
---|
| 477 |
|
---|
| 478 | /*************************************************************************\
|
---|
| 479 | | SparseAllocate() |
|
---|
| 480 | | SparseFree() |
|
---|
| 481 | | Allocates, sets-to-empty, and frees a sparse array. All you need |
|
---|
| 482 | | to tell me is how many buckets you want. I return the number of |
|
---|
| 483 | | buckets I actually allocated, setting the array as a parameter. |
|
---|
| 484 | | Note that you have to set auxilliary parameters, like cOccupied. |
|
---|
| 485 | \*************************************************************************/
|
---|
| 486 |
|
---|
| 487 | static ulong SparseAllocate(SparseBin **pbinSparse, ulong cBuckets)
|
---|
| 488 | {
|
---|
| 489 | int cGroups = SPARSE_GROUPS(cBuckets);
|
---|
| 490 |
|
---|
| 491 | *pbinSparse = (SparseBin *) HTscalloc(sizeof(**pbinSparse) * cGroups);
|
---|
| 492 | return cGroups << LOG_LOW_BIN_SIZE;
|
---|
| 493 | }
|
---|
| 494 |
|
---|
| 495 | static SparseBin *SparseFree(SparseBin *binSparse, ulong cBuckets)
|
---|
| 496 | {
|
---|
| 497 | ulong iGroup, cGroups = SPARSE_GROUPS(cBuckets);
|
---|
| 498 |
|
---|
| 499 | for ( iGroup = 0; iGroup < cGroups; iGroup++ )
|
---|
| 500 | HTfree(binSparse[iGroup].binSparse, (sizeof(*binSparse[iGroup].binSparse)
|
---|
| 501 | * binSparse[iGroup].cOccupied));
|
---|
| 502 | HTfree(binSparse, sizeof(*binSparse) * cGroups);
|
---|
| 503 | return NULL;
|
---|
| 504 | }
|
---|
| 505 |
|
---|
| 506 | /*************************************************************************\
|
---|
| 507 | | SparseIsEmpty() |
|
---|
| 508 | | SparseFind() |
|
---|
| 509 | | You give me a location (ie a number between 1 and t), and I |
|
---|
| 510 | | return the bucket at that location, or NULL if the bucket is |
|
---|
| 511 | | empty. It's OK to call Find() on an empty table. |
|
---|
| 512 | \*************************************************************************/
|
---|
| 513 |
|
---|
| 514 | static int SparseIsEmpty(SparseBin *binSparse, ulong location)
|
---|
| 515 | {
|
---|
| 516 | return !TEST_BITMAP(binSparse[location>>LOG_LOW_BIN_SIZE].bmOccupied,
|
---|
| 517 | MOD2(location, LOG_LOW_BIN_SIZE));
|
---|
| 518 | }
|
---|
| 519 |
|
---|
| 520 | static SparseBucket *SparseFind(SparseBin *binSparse, ulong location)
|
---|
| 521 | {
|
---|
| 522 | if ( SparseIsEmpty(binSparse, location) )
|
---|
| 523 | return NULL;
|
---|
| 524 | return SPARSE_BUCKET(binSparse, location);
|
---|
| 525 | }
|
---|
| 526 |
|
---|
| 527 | /*************************************************************************\
|
---|
| 528 | | SparseInsert() |
|
---|
| 529 | | You give me a location, and contents to put there, and I insert |
|
---|
| 530 | | into that location and RETURN a pointer to the location. If |
|
---|
| 531 | | bucket was already occupied, I write over the contents only if |
|
---|
| 532 | | *pfOverwrite is 1. We set *pfOverwrite to 1 if there was someone |
|
---|
| 533 | | there (whether or not we overwrote) and 0 else. |
|
---|
| 534 | \*************************************************************************/
|
---|
| 535 |
|
---|
| 536 | static SparseBucket *SparseInsert(SparseBin *binSparse, SparseBucket *bckInsert,
|
---|
| 537 | ulong location, int *pfOverwrite)
|
---|
| 538 | {
|
---|
| 539 | SparseBucket *bckPlace;
|
---|
| 540 | HTOffset offset;
|
---|
| 541 |
|
---|
| 542 | bckPlace = SparseFind(binSparse, location);
|
---|
| 543 | if ( bckPlace ) /* means we replace old contents */
|
---|
| 544 | {
|
---|
| 545 | if ( *pfOverwrite )
|
---|
| 546 | *bckPlace = *bckInsert;
|
---|
| 547 | *pfOverwrite = 1;
|
---|
| 548 | return bckPlace;
|
---|
| 549 | }
|
---|
| 550 |
|
---|
| 551 | binSparse += (location >> LOG_LOW_BIN_SIZE);
|
---|
| 552 | offset = SPARSE_POS_TO_OFFSET(binSparse->bmOccupied,
|
---|
| 553 | MOD2(location, LOG_LOW_BIN_SIZE));
|
---|
| 554 | binSparse->binSparse = (SparseBucket *)
|
---|
| 555 | HTsrealloc(binSparse->binSparse,
|
---|
| 556 | sizeof(*binSparse->binSparse) * ++binSparse->cOccupied,
|
---|
| 557 | sizeof(*binSparse->binSparse));
|
---|
| 558 | memmove(binSparse->binSparse + offset+1,
|
---|
| 559 | binSparse->binSparse + offset,
|
---|
| 560 | (binSparse->cOccupied-1 - offset) * sizeof(*binSparse->binSparse));
|
---|
| 561 | binSparse->binSparse[offset] = *bckInsert;
|
---|
| 562 | SET_BITMAP(binSparse->bmOccupied, MOD2(location, LOG_LOW_BIN_SIZE));
|
---|
| 563 | *pfOverwrite = 0;
|
---|
| 564 | return binSparse->binSparse + offset;
|
---|
| 565 | }
|
---|
| 566 |
|
---|
| 567 | /*************************************************************************\
|
---|
| 568 | | SparseFirstBucket() |
|
---|
| 569 | | SparseNextBucket() |
|
---|
| 570 | | SparseCurrentBit() |
|
---|
| 571 | | Iterate through the occupied buckets of a dense hashtable. You |
|
---|
| 572 | | must, of course, have allocated space yourself for the iterator. |
|
---|
| 573 | \*************************************************************************/
|
---|
| 574 |
|
---|
| 575 | static SparseBucket *SparseNextBucket(SparseIterator *iter)
|
---|
| 576 | {
|
---|
| 577 | if ( iter->posOffset != -1 && /* not called from FirstBucket()? */
|
---|
| 578 | (++iter->posOffset < iter->binSparse[iter->posGroup].cOccupied) )
|
---|
| 579 | return iter->binSparse[iter->posGroup].binSparse + iter->posOffset;
|
---|
| 580 |
|
---|
| 581 | iter->posOffset = 0; /* start the next group */
|
---|
| 582 | for ( iter->posGroup++; iter->posGroup < SPARSE_GROUPS(iter->cBuckets);
|
---|
| 583 | iter->posGroup++ )
|
---|
| 584 | if ( iter->binSparse[iter->posGroup].cOccupied > 0 )
|
---|
| 585 | return iter->binSparse[iter->posGroup].binSparse; /* + 0 */
|
---|
| 586 | return NULL; /* all remaining groups were empty */
|
---|
| 587 | }
|
---|
| 588 |
|
---|
| 589 | static SparseBucket *SparseFirstBucket(SparseIterator *iter,
|
---|
| 590 | SparseBin *binSparse, ulong cBuckets)
|
---|
| 591 | {
|
---|
| 592 | iter->binSparse = binSparse; /* set it up for NextBucket() */
|
---|
| 593 | iter->cBuckets = cBuckets;
|
---|
| 594 | iter->posOffset = -1; /* when we advance, we're at 0 */
|
---|
| 595 | iter->posGroup = -1;
|
---|
| 596 | return SparseNextBucket(iter);
|
---|
| 597 | }
|
---|
| 598 |
|
---|
| 599 | /*************************************************************************\
|
---|
| 600 | | SparseWrite() |
|
---|
| 601 | | SparseRead() |
|
---|
| 602 | | These are routines for storing a sparse hashtable onto disk. We |
|
---|
| 603 | | store the number of buckets and a bitmap indicating which buckets |
|
---|
| 604 | | are allocated (occupied). The actual contents of the buckets |
|
---|
| 605 | | must be stored separately. |
|
---|
| 606 | \*************************************************************************/
|
---|
| 607 |
|
---|
| 608 | static void SparseWrite(FILE *fp, SparseBin *binSparse, ulong cBuckets)
|
---|
| 609 | {
|
---|
| 610 | ulong i, j;
|
---|
| 611 |
|
---|
| 612 | WRITE_UL(fp, cBuckets);
|
---|
| 613 | for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ )
|
---|
| 614 | for ( j = 0; j < (1<<LOG_BM_WORDS); j++ )
|
---|
| 615 | WRITE_UL(fp, binSparse[i].bmOccupied[j]);
|
---|
| 616 | }
|
---|
| 617 |
|
---|
| 618 | static ulong SparseRead(FILE *fp, SparseBin **pbinSparse)
|
---|
| 619 | {
|
---|
| 620 | ulong i, j, cBuckets;
|
---|
| 621 |
|
---|
| 622 | READ_UL(fp, cBuckets); /* actually, cBuckets is stored */
|
---|
| 623 | cBuckets = SparseAllocate(pbinSparse, cBuckets);
|
---|
| 624 | for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ )
|
---|
| 625 | {
|
---|
| 626 | for ( j = 0; j < (1<<LOG_BM_WORDS); j++ )
|
---|
| 627 | READ_UL(fp, (*pbinSparse)[i].bmOccupied[j]);
|
---|
| 628 | (*pbinSparse)[i].cOccupied =
|
---|
| 629 | SPARSE_POS_TO_OFFSET((*pbinSparse)[i].bmOccupied,1<<LOG_LOW_BIN_SIZE);
|
---|
| 630 | (*pbinSparse)[i].binSparse =
|
---|
| 631 | (SparseBucket *) HTsmalloc(sizeof(*((*pbinSparse)[i].binSparse)) *
|
---|
| 632 | (*pbinSparse)[i].cOccupied);
|
---|
| 633 | }
|
---|
| 634 | return cBuckets;
|
---|
| 635 | }
|
---|
| 636 |
|
---|
| 637 | /*************************************************************************\
|
---|
| 638 | | SparseMemory() |
|
---|
| 639 | | SparseMemory() tells us how much memory is being allocated for |
|
---|
| 640 | | the dense table. You need to tell me not only how many buckets |
|
---|
| 641 | | there are, but how many are occupied. |
|
---|
| 642 | \*************************************************************************/
|
---|
| 643 |
|
---|
| 644 | static ulong SparseMemory(ulong cBuckets, ulong cOccupied)
|
---|
| 645 | {
|
---|
| 646 | return ( cOccupied * sizeof(SparseBucket) +
|
---|
| 647 | SPARSE_GROUPS(cBuckets) * sizeof(SparseBin) );
|
---|
| 648 | }
|
---|
| 649 |
|
---|
| 650 |
|
---|
| 651 | /* Just for fun, I also provide support for dense tables. These are
|
---|
| 652 | * just regulr arrays. Access is fast, but they can get big.
|
---|
| 653 | * Use Table(x) at the top of chash.h to decide which you want.
|
---|
| 654 | * A disadvantage is we need to steal more of the data space for
|
---|
| 655 | * indicating empty buckets. We choose -3.
|
---|
| 656 | */
|
---|
| 657 |
|
---|
| 658 | #ifndef DenseBucket /* by default, each bucket holds an HTItem */
|
---|
| 659 | #define DenseBucket HTItem
|
---|
| 660 | #endif
|
---|
| 661 |
|
---|
| 662 | typedef struct DenseBin { /* needs to be a struct for C typing reasons */
|
---|
| 663 | DenseBucket *rgBuckets; /* A bin is an array of buckets */
|
---|
| 664 | } DenseBin;
|
---|
| 665 |
|
---|
| 666 | typedef struct DenseIterator {
|
---|
| 667 | long pos; /* the actual iterator */
|
---|
| 668 | DenseBin *bin; /* state info, to avoid args for NextBucket() */
|
---|
| 669 | ulong cBuckets;
|
---|
| 670 | } DenseIterator;
|
---|
| 671 |
|
---|
| 672 | #define DENSE_IS_EMPTY(bin, i) ( (bin)[i].data == EMPTY )
|
---|
| 673 | #define DENSE_SET_EMPTY(bin, i) (bin)[i].data = EMPTY /* fks-hash.h */
|
---|
| 674 | #define DENSE_SET_OCCUPIED(bin, i) (bin)[i].data = 1 /* not EMPTY */
|
---|
| 675 |
|
---|
| 676 | static void DenseClear(DenseBin *bin, ulong cBuckets)
|
---|
| 677 | {
|
---|
| 678 | while ( cBuckets-- )
|
---|
| 679 | DENSE_SET_EMPTY(bin->rgBuckets, cBuckets);
|
---|
| 680 | }
|
---|
| 681 |
|
---|
| 682 | static ulong DenseAllocate(DenseBin **pbin, ulong cBuckets)
|
---|
| 683 | {
|
---|
| 684 | *pbin = (DenseBin *) HTsmalloc(sizeof(*pbin));
|
---|
| 685 | (*pbin)->rgBuckets = (DenseBucket *) HTsmalloc(sizeof(*(*pbin)->rgBuckets)
|
---|
| 686 | * cBuckets);
|
---|
| 687 | DenseClear(*pbin, cBuckets);
|
---|
| 688 | return cBuckets;
|
---|
| 689 | }
|
---|
| 690 |
|
---|
| 691 | static DenseBin *DenseFree(DenseBin *bin, ulong cBuckets)
|
---|
| 692 | {
|
---|
| 693 | HTfree(bin->rgBuckets, sizeof(*bin->rgBuckets) * cBuckets);
|
---|
| 694 | HTfree(bin, sizeof(*bin));
|
---|
| 695 | return NULL;
|
---|
| 696 | }
|
---|
| 697 |
|
---|
| 698 | static int DenseIsEmpty(DenseBin *bin, ulong location)
|
---|
| 699 | {
|
---|
| 700 | return DENSE_IS_EMPTY(bin->rgBuckets, location);
|
---|
| 701 | }
|
---|
| 702 |
|
---|
| 703 | static DenseBucket *DenseFind(DenseBin *bin, ulong location)
|
---|
| 704 | {
|
---|
| 705 | if ( DenseIsEmpty(bin, location) )
|
---|
| 706 | return NULL;
|
---|
| 707 | return bin->rgBuckets + location;
|
---|
| 708 | }
|
---|
| 709 |
|
---|
| 710 | static DenseBucket *DenseInsert(DenseBin *bin, DenseBucket *bckInsert,
|
---|
| 711 | ulong location, int *pfOverwrite)
|
---|
| 712 | {
|
---|
| 713 | DenseBucket *bckPlace;
|
---|
| 714 |
|
---|
| 715 | bckPlace = DenseFind(bin, location);
|
---|
| 716 | if ( bckPlace ) /* means something is already there */
|
---|
| 717 | {
|
---|
| 718 | if ( *pfOverwrite )
|
---|
| 719 | *bckPlace = *bckInsert;
|
---|
| 720 | *pfOverwrite = 1; /* set to 1 to indicate someone was there */
|
---|
| 721 | return bckPlace;
|
---|
| 722 | }
|
---|
| 723 | else
|
---|
| 724 | {
|
---|
| 725 | bin->rgBuckets[location] = *bckInsert;
|
---|
| 726 | *pfOverwrite = 0;
|
---|
| 727 | return bin->rgBuckets + location;
|
---|
| 728 | }
|
---|
| 729 | }
|
---|
| 730 |
|
---|
| 731 | static DenseBucket *DenseNextBucket(DenseIterator *iter)
|
---|
| 732 | {
|
---|
| 733 | for ( iter->pos++; iter->pos < iter->cBuckets; iter->pos++ )
|
---|
| 734 | if ( !DenseIsEmpty(iter->bin, iter->pos) )
|
---|
| 735 | return iter->bin->rgBuckets + iter->pos;
|
---|
| 736 | return NULL; /* all remaining groups were empty */
|
---|
| 737 | }
|
---|
| 738 |
|
---|
| 739 | static DenseBucket *DenseFirstBucket(DenseIterator *iter,
|
---|
| 740 | DenseBin *bin, ulong cBuckets)
|
---|
| 741 | {
|
---|
| 742 | iter->bin = bin; /* set it up for NextBucket() */
|
---|
| 743 | iter->cBuckets = cBuckets;
|
---|
| 744 | iter->pos = -1; /* thus the next bucket will be 0 */
|
---|
| 745 | return DenseNextBucket(iter);
|
---|
| 746 | }
|
---|
| 747 |
|
---|
| 748 | static void DenseWrite(FILE *fp, DenseBin *bin, ulong cBuckets)
|
---|
| 749 | {
|
---|
| 750 | ulong pos = 0, bit, bm;
|
---|
| 751 |
|
---|
| 752 | WRITE_UL(fp, cBuckets);
|
---|
| 753 | while ( pos < cBuckets )
|
---|
| 754 | {
|
---|
| 755 | bm = 0;
|
---|
| 756 | for ( bit = 0; bit < 8*sizeof(ulong); bit++ )
|
---|
| 757 | {
|
---|
| 758 | if ( !DenseIsEmpty(bin, pos) )
|
---|
| 759 | SET_BITMAP(&bm, bit); /* in fks-hash.h */
|
---|
| 760 | if ( ++pos == cBuckets )
|
---|
| 761 | break;
|
---|
| 762 | }
|
---|
| 763 | WRITE_UL(fp, bm);
|
---|
| 764 | }
|
---|
| 765 | }
|
---|
| 766 |
|
---|
| 767 | static ulong DenseRead(FILE *fp, DenseBin **pbin)
|
---|
| 768 | {
|
---|
| 769 | ulong pos = 0, bit, bm, cBuckets;
|
---|
| 770 |
|
---|
| 771 | READ_UL(fp, cBuckets);
|
---|
| 772 | cBuckets = DenseAllocate(pbin, cBuckets);
|
---|
| 773 | while ( pos < cBuckets )
|
---|
| 774 | {
|
---|
| 775 | READ_UL(fp, bm);
|
---|
| 776 | for ( bit = 0; bit < 8*sizeof(ulong); bit++ )
|
---|
| 777 | {
|
---|
| 778 | if ( TEST_BITMAP(&bm, bit) ) /* in fks-hash.h */
|
---|
| 779 | DENSE_SET_OCCUPIED((*pbin)->rgBuckets, pos);
|
---|
| 780 | else
|
---|
| 781 | DENSE_SET_EMPTY((*pbin)->rgBuckets, pos);
|
---|
| 782 | if ( ++pos == cBuckets )
|
---|
| 783 | break;
|
---|
| 784 | }
|
---|
| 785 | }
|
---|
| 786 | return cBuckets;
|
---|
| 787 | }
|
---|
| 788 |
|
---|
| 789 | static ulong DenseMemory(ulong cBuckets, ulong cOccupied)
|
---|
| 790 | {
|
---|
| 791 | return cBuckets * sizeof(DenseBucket);
|
---|
| 792 | }
|
---|
| 793 |
|
---|
| 794 |
|
---|
| 795 | /* ======================================================================== */
|
---|
| 796 | /* HASHING ROUTINES */
|
---|
| 797 | /* ---------------------- */
|
---|
| 798 |
|
---|
| 799 | /* Implements a simple quadratic hashing scheme. We have a single hash
|
---|
| 800 | * table of size t and a single hash function h(x). When inserting an
|
---|
| 801 | * item, first we try h(x) % t. If it's occupied, we try h(x) +
|
---|
| 802 | * i*(i-1)/2 % t for increasing values of i until we hit a not-occupied
|
---|
| 803 | * space. To make this dynamic, we double the size of the hash table as
|
---|
| 804 | * soon as more than half the cells are occupied. When deleting, we can
|
---|
| 805 | * choose to shrink the hashtable when less than a quarter of the
|
---|
| 806 | * cells are occupied, or we can choose never to shrink the hashtable.
|
---|
| 807 | * For lookup, we check h(x) + i*(i-1)/2 % t (starting with i=0) until
|
---|
| 808 | * we get a match or we hit an empty space. Note that as a result,
|
---|
| 809 | * we can't make a cell empty on deletion, or lookups may end prematurely.
|
---|
| 810 | * Instead we mark the cell as "deleted." We thus steal the value
|
---|
| 811 | * DELETED as a possible "data" value. As long as data are pointers,
|
---|
| 812 | * that's ok.
|
---|
| 813 | * The hash increment we use, i(i-1)/2, is not the standard quadratic
|
---|
| 814 | * hash increment, which is i^2. i(i-1)/2 covers the entire bucket space
|
---|
| 815 | * when the hashtable size is a power of two, as it is for us. In fact,
|
---|
| 816 | * the first n probes cover n distinct buckets; then it repeats. This
|
---|
| 817 | * guarantees insertion will always succeed.
|
---|
| 818 | * If you linear hashing, set JUMP in chash.h. You can also change
|
---|
| 819 | * various other parameters there.
|
---|
| 820 | */
|
---|
| 821 |
|
---|
| 822 | /*************************************************************************\
|
---|
| 823 | | Hash() |
|
---|
| 824 | | The hash function I use is due to Bob Jenkins (see |
|
---|
| 825 | | http://ourworld.compuserve.com/homepages/bob_jenkins/evahash.htm). |
|
---|
| 826 | | It takes 36 instructions, in 18 cycles if you're lucky. |
|
---|
| 827 | | hashing depends on the fact the hashtable size is always a |
|
---|
| 828 | | power of 2. cBuckets is probably ht->cBuckets. |
|
---|
| 829 | \*************************************************************************/
|
---|
| 830 |
|
---|
| 831 | #if LOG_WORD_SIZE == 5 /* 32 bit words */
|
---|
| 832 |
|
---|
| 833 | #define mix(a,b,c) \
|
---|
| 834 | { \
|
---|
| 835 | a -= b; a -= c; a ^= (c>>13); \
|
---|
| 836 | b -= c; b -= a; b ^= (a<<8); \
|
---|
| 837 | c -= a; c -= b; c ^= (b>>13); \
|
---|
| 838 | a -= b; a -= c; a ^= (c>>12); \
|
---|
| 839 | b -= c; b -= a; b ^= (a<<16); \
|
---|
| 840 | c -= a; c -= b; c ^= (b>>5); \
|
---|
| 841 | a -= b; a -= c; a ^= (c>>3); \
|
---|
| 842 | b -= c; b -= a; b ^= (a<<10); \
|
---|
| 843 | c -= a; c -= b; c ^= (b>>15); \
|
---|
| 844 | }
|
---|
| 845 | #ifdef WORD_HASH /* play with this on little-endian machines */
|
---|
| 846 | #define WORD_AT(ptr) ( *(ulong *)(ptr) )
|
---|
| 847 | #else
|
---|
| 848 | #define WORD_AT(ptr) ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \
|
---|
| 849 | ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) )
|
---|
| 850 | #endif
|
---|
| 851 |
|
---|
| 852 | #elif LOG_WORD_SIZE == 6 /* 64 bit words */
|
---|
| 853 |
|
---|
| 854 | #define mix(a,b,c) \
|
---|
| 855 | { \
|
---|
| 856 | a -= b; a -= c; a ^= (c>>43); \
|
---|
| 857 | b -= c; b -= a; b ^= (a<<9); \
|
---|
| 858 | c -= a; c -= b; c ^= (b>>8); \
|
---|
| 859 | a -= b; a -= c; a ^= (c>>38); \
|
---|
| 860 | b -= c; b -= a; b ^= (a<<23); \
|
---|
| 861 | c -= a; c -= b; c ^= (b>>5); \
|
---|
| 862 | a -= b; a -= c; a ^= (c>>35); \
|
---|
| 863 | b -= c; b -= a; b ^= (a<<49); \
|
---|
| 864 | c -= a; c -= b; c ^= (b>>11); \
|
---|
| 865 | a -= b; a -= c; a ^= (c>>12); \
|
---|
| 866 | b -= c; b -= a; b ^= (a<<18); \
|
---|
| 867 | c -= a; c -= b; c ^= (b>>22); \
|
---|
| 868 | }
|
---|
| 869 | #ifdef WORD_HASH /* alpha is little-endian, btw */
|
---|
| 870 | #define WORD_AT(ptr) ( *(ulong *)(ptr) )
|
---|
| 871 | #else
|
---|
| 872 | #define WORD_AT(ptr) ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \
|
---|
| 873 | ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) + \
|
---|
| 874 | ((ulong)(ptr)[4]<<32) + ((ulong)(ptr)[5]<<40) + \
|
---|
| 875 | ((ulong)(ptr)[6]<<48) + ((ulong)(ptr)[7]<<56) )
|
---|
| 876 | #endif
|
---|
| 877 |
|
---|
| 878 | #else /* neither 32 or 64 bit words */
|
---|
| 879 | #error This hash function can only hash 32 or 64 bit words. Sorry.
|
---|
| 880 | #endif
|
---|
| 881 |
|
---|
| 882 | static ulong Hash(HashTable *ht, char *key, ulong cBuckets)
|
---|
| 883 | {
|
---|
| 884 | ulong a, b, c, cchKey, cchKeyOrig;
|
---|
| 885 |
|
---|
| 886 | cchKeyOrig = ht->cchKey == NULL_TERMINATED ? strlen(key) : ht->cchKey;
|
---|
| 887 | a = b = c = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
---|
| 888 |
|
---|
| 889 | for ( cchKey = cchKeyOrig; cchKey >= 3 * sizeof(ulong);
|
---|
| 890 | cchKey -= 3 * sizeof(ulong), key += 3 * sizeof(ulong) )
|
---|
| 891 | {
|
---|
| 892 | a += WORD_AT(key);
|
---|
| 893 | b += WORD_AT(key + sizeof(ulong));
|
---|
| 894 | c += WORD_AT(key + sizeof(ulong)*2);
|
---|
| 895 | mix(a,b,c);
|
---|
| 896 | }
|
---|
| 897 |
|
---|
| 898 | c += cchKeyOrig;
|
---|
| 899 | switch ( cchKey ) { /* deal with rest. Cases fall through */
|
---|
| 900 | #if LOG_WORD_SIZE == 5
|
---|
| 901 | case 11: c += (ulong)key[10]<<24;
|
---|
| 902 | case 10: c += (ulong)key[9]<<16;
|
---|
| 903 | case 9 : c += (ulong)key[8]<<8;
|
---|
| 904 | /* the first byte of c is reserved for the length */
|
---|
| 905 | case 8 : b += WORD_AT(key+4); a+= WORD_AT(key); break;
|
---|
| 906 | case 7 : b += (ulong)key[6]<<16;
|
---|
| 907 | case 6 : b += (ulong)key[5]<<8;
|
---|
| 908 | case 5 : b += key[4];
|
---|
| 909 | case 4 : a += WORD_AT(key); break;
|
---|
| 910 | case 3 : a += (ulong)key[2]<<16;
|
---|
| 911 | case 2 : a += (ulong)key[1]<<8;
|
---|
| 912 | case 1 : a += key[0];
|
---|
| 913 | /* case 0 : nothing left to add */
|
---|
| 914 | #elif LOG_WORD_SIZE == 6
|
---|
| 915 | case 23: c += (ulong)key[22]<<56;
|
---|
| 916 | case 22: c += (ulong)key[21]<<48;
|
---|
| 917 | case 21: c += (ulong)key[20]<<40;
|
---|
| 918 | case 20: c += (ulong)key[19]<<32;
|
---|
| 919 | case 19: c += (ulong)key[18]<<24;
|
---|
| 920 | case 18: c += (ulong)key[17]<<16;
|
---|
| 921 | case 17: c += (ulong)key[16]<<8;
|
---|
| 922 | /* the first byte of c is reserved for the length */
|
---|
| 923 | case 16: b += WORD_AT(key+8); a+= WORD_AT(key); break;
|
---|
| 924 | case 15: b += (ulong)key[14]<<48;
|
---|
| 925 | case 14: b += (ulong)key[13]<<40;
|
---|
| 926 | case 13: b += (ulong)key[12]<<32;
|
---|
| 927 | case 12: b += (ulong)key[11]<<24;
|
---|
| 928 | case 11: b += (ulong)key[10]<<16;
|
---|
| 929 | case 10: b += (ulong)key[ 9]<<8;
|
---|
| 930 | case 9: b += (ulong)key[ 8];
|
---|
| 931 | case 8: a += WORD_AT(key); break;
|
---|
| 932 | case 7: a += (ulong)key[ 6]<<48;
|
---|
| 933 | case 6: a += (ulong)key[ 5]<<40;
|
---|
| 934 | case 5: a += (ulong)key[ 4]<<32;
|
---|
| 935 | case 4: a += (ulong)key[ 3]<<24;
|
---|
| 936 | case 3: a += (ulong)key[ 2]<<16;
|
---|
| 937 | case 2: a += (ulong)key[ 1]<<8;
|
---|
| 938 | case 1: a += (ulong)key[ 0];
|
---|
| 939 | /* case 0: nothing left to add */
|
---|
| 940 | #endif
|
---|
| 941 | }
|
---|
| 942 | mix(a,b,c);
|
---|
| 943 | return c & (cBuckets-1);
|
---|
| 944 | }
|
---|
| 945 |
|
---|
| 946 |
|
---|
| 947 | /*************************************************************************\
|
---|
| 948 | | Rehash() |
|
---|
| 949 | | You give me a hashtable, a new size, and a bucket to follow, and |
|
---|
| 950 | | I resize the hashtable's bin to be the new size, rehashing |
|
---|
| 951 | | everything in it. I keep particular track of the bucket you pass |
|
---|
| 952 | | in, and RETURN a pointer to where the item in the bucket got to. |
|
---|
| 953 | | (If you pass in NULL, I return an arbitrary pointer.) |
|
---|
| 954 | \*************************************************************************/
|
---|
| 955 |
|
---|
| 956 | static HTItem *Rehash(HashTable *ht, ulong cNewBuckets, HTItem *bckWatch)
|
---|
| 957 | {
|
---|
| 958 | Table *tableNew;
|
---|
| 959 | ulong iBucketFirst;
|
---|
| 960 | HTItem *bck, *bckNew = NULL;
|
---|
| 961 | ulong offset; /* the i in h(x) + i*(i-1)/2 */
|
---|
| 962 | int fOverwrite = 0; /* not an issue: there can be no collisions */
|
---|
| 963 |
|
---|
| 964 | assert( ht->table );
|
---|
| 965 | cNewBuckets = Table(Allocate)(&tableNew, cNewBuckets);
|
---|
| 966 | /* Since we RETURN the new position of bckWatch, we want *
|
---|
| 967 | * to make sure it doesn't get moved due to some table *
|
---|
| 968 | * rehashing that comes after it's inserted. Thus, we *
|
---|
| 969 | * have to put it in last. This makes the loop weird. */
|
---|
| 970 | for ( bck = HashFirstBucket(ht); ; bck = HashNextBucket(ht) )
|
---|
| 971 | {
|
---|
| 972 | if ( bck == NULL ) /* we're done iterating, so look at bckWatch */
|
---|
| 973 | {
|
---|
| 974 | bck = bckWatch;
|
---|
| 975 | if ( bck == NULL ) /* I guess bckWatch wasn't specified */
|
---|
| 976 | break;
|
---|
| 977 | }
|
---|
| 978 | else if ( bck == bckWatch )
|
---|
| 979 | continue; /* ignore if we see it during the iteration */
|
---|
| 980 |
|
---|
| 981 | offset = 0; /* a new i for a new bucket */
|
---|
| 982 | for ( iBucketFirst = Hash(ht, KEY_PTR(ht, bck->key), cNewBuckets);
|
---|
| 983 | !Table(IsEmpty)(tableNew, iBucketFirst);
|
---|
| 984 | iBucketFirst = (iBucketFirst + JUMP(KEY_PTR(ht,bck->key), offset))
|
---|
| 985 | & (cNewBuckets-1) )
|
---|
| 986 | ;
|
---|
| 987 | bckNew = Table(Insert)(tableNew, bck, iBucketFirst, &fOverwrite);
|
---|
| 988 | if ( bck == bckWatch ) /* we're done with the last thing to do */
|
---|
| 989 | break;
|
---|
| 990 | }
|
---|
| 991 | Table(Free)(ht->table, ht->cBuckets);
|
---|
| 992 | ht->table = tableNew;
|
---|
| 993 | ht->cBuckets = cNewBuckets;
|
---|
| 994 | ht->cDeletedItems = 0;
|
---|
| 995 | return bckNew; /* new position of bckWatch, which was inserted last */
|
---|
| 996 | }
|
---|
| 997 |
|
---|
| 998 | /*************************************************************************\
|
---|
| 999 | | Find() |
|
---|
| 1000 | | Does the quadratic searching stuff. RETURNS NULL if we don't |
|
---|
| 1001 | | find an object with the given key, and a pointer to the Item |
|
---|
| 1002 | | holding the key, if we do. Also sets posLastFind. If piEmpty is |
|
---|
| 1003 | | non-NULL, we set it to the first open bucket we pass; helpful for |
|
---|
| 1004 | | doing a later insert if the search fails, for instance. |
|
---|
| 1005 | \*************************************************************************/
|
---|
| 1006 |
|
---|
| 1007 | static HTItem *Find(HashTable *ht, ulong key, ulong *piEmpty)
|
---|
| 1008 | {
|
---|
| 1009 | ulong iBucketFirst;
|
---|
| 1010 | HTItem *item;
|
---|
| 1011 | ulong offset = 0; /* the i in h(x) + i*(i-1)/2 */
|
---|
| 1012 | int fFoundEmpty = 0; /* set when we pass over an empty bucket */
|
---|
| 1013 |
|
---|
| 1014 | ht->posLastFind = NULL; /* set up for failure: a new find starts */
|
---|
| 1015 | if ( ht->table == NULL ) /* empty hash table: find is bound to fail */
|
---|
| 1016 | return NULL;
|
---|
| 1017 |
|
---|
| 1018 | iBucketFirst = Hash(ht, KEY_PTR(ht, key), ht->cBuckets);
|
---|
| 1019 | while ( 1 ) /* now try all i > 0 */
|
---|
| 1020 | {
|
---|
| 1021 | item = Table(Find)(ht->table, iBucketFirst);
|
---|
| 1022 | if ( item == NULL ) /* it's not in the table */
|
---|
| 1023 | {
|
---|
| 1024 | if ( piEmpty && !fFoundEmpty ) *piEmpty = iBucketFirst;
|
---|
| 1025 | return NULL;
|
---|
| 1026 | }
|
---|
| 1027 | else
|
---|
| 1028 | {
|
---|
| 1029 | if ( IS_BCK_DELETED(item) ) /* always 0 ifdef INSERT_ONLY */
|
---|
| 1030 | {
|
---|
| 1031 | if ( piEmpty && !fFoundEmpty )
|
---|
| 1032 | {
|
---|
| 1033 | *piEmpty = iBucketFirst;
|
---|
| 1034 | fFoundEmpty = 1;
|
---|
| 1035 | }
|
---|
| 1036 | } else
|
---|
| 1037 | if ( !KEY_CMP(ht, key, item->key) ) /* must be occupied */
|
---|
| 1038 | {
|
---|
| 1039 | ht->posLastFind = item;
|
---|
| 1040 | return item; /* we found it! */
|
---|
| 1041 | }
|
---|
| 1042 | }
|
---|
| 1043 | iBucketFirst = ((iBucketFirst + JUMP(KEY_PTR(ht, key), offset))
|
---|
| 1044 | & (ht->cBuckets-1));
|
---|
| 1045 | }
|
---|
| 1046 | }
|
---|
| 1047 |
|
---|
| 1048 | /*************************************************************************\
|
---|
| 1049 | | Insert() |
|
---|
| 1050 | | If an item with the key already exists in the hashtable, RETURNS |
|
---|
| 1051 | | a pointer to the item (replacing its data if fOverwrite is 1). |
|
---|
| 1052 | | If not, we find the first place-to-insert (which Find() is nice |
|
---|
| 1053 | | enough to set for us) and insert the item there, RETURNing a |
|
---|
| 1054 | | pointer to the item. We might grow the hashtable if it's getting |
|
---|
| 1055 | | full. Note we include buckets holding DELETED when determining |
|
---|
| 1056 | | fullness, because they slow down searching. |
|
---|
| 1057 | \*************************************************************************/
|
---|
| 1058 |
|
---|
| 1059 | static ulong NextPow2(ulong x) /* returns next power of 2 > x, or 2^31 */
|
---|
| 1060 | {
|
---|
| 1061 | if ( ((x << 1) >> 1) != x ) /* next power of 2 overflows */
|
---|
| 1062 | x >>= 1; /* so we return highest power of 2 we can */
|
---|
| 1063 | while ( (x & (x-1)) != 0 ) /* blacks out all but the top bit */
|
---|
| 1064 | x &= (x-1);
|
---|
| 1065 | return x << 1; /* makes it the *next* power of 2 */
|
---|
| 1066 | }
|
---|
| 1067 |
|
---|
| 1068 | static HTItem *Insert(HashTable *ht, ulong key, ulong data, int fOverwrite)
|
---|
| 1069 | {
|
---|
| 1070 | HTItem *item, bckInsert;
|
---|
| 1071 | ulong iEmpty; /* first empty bucket key probes */
|
---|
| 1072 |
|
---|
| 1073 | if ( ht->table == NULL ) /* empty hash table: find is bound to fail */
|
---|
| 1074 | return NULL;
|
---|
| 1075 | item = Find(ht, key, &iEmpty);
|
---|
| 1076 | ht->posLastFind = NULL; /* last operation is insert, not find */
|
---|
| 1077 | if ( item )
|
---|
| 1078 | {
|
---|
| 1079 | if ( fOverwrite )
|
---|
| 1080 | item->data = data; /* key already matches */
|
---|
| 1081 | return item;
|
---|
| 1082 | }
|
---|
| 1083 |
|
---|
| 1084 | COPY_KEY(ht, bckInsert.key, key); /* make our own copy of the key */
|
---|
| 1085 | bckInsert.data = data; /* oh, and the data too */
|
---|
| 1086 | item = Table(Insert)(ht->table, &bckInsert, iEmpty, &fOverwrite);
|
---|
| 1087 | if ( fOverwrite ) /* we overwrote a deleted bucket */
|
---|
| 1088 | ht->cDeletedItems--;
|
---|
| 1089 | ht->cItems++; /* insert couldn't have overwritten */
|
---|
| 1090 | if ( ht->cDeltaGoalSize > 0 ) /* closer to our goal size */
|
---|
| 1091 | ht->cDeltaGoalSize--;
|
---|
| 1092 | if ( ht->cItems + ht->cDeletedItems >= ht->cBuckets * OCCUPANCY_PCT
|
---|
| 1093 | || ht->cDeltaGoalSize < 0 ) /* we must've overestimated # of deletes */
|
---|
| 1094 | item = Rehash(ht,
|
---|
| 1095 | NextPow2((ulong)(((ht->cDeltaGoalSize > 0 ?
|
---|
| 1096 | ht->cDeltaGoalSize : 0)
|
---|
| 1097 | + ht->cItems) / OCCUPANCY_PCT)),
|
---|
| 1098 | item);
|
---|
| 1099 | return item;
|
---|
| 1100 | }
|
---|
| 1101 |
|
---|
| 1102 | /*************************************************************************\
|
---|
| 1103 | | Delete() |
|
---|
| 1104 | | Removes the item from the hashtable, and if fShrink is 1, will |
|
---|
| 1105 | | shrink the hashtable if it's too small (ie even after halving, |
|
---|
| 1106 | | the ht would be less than half full, though in order to avoid |
|
---|
| 1107 | | oscillating table size, we insist that after halving the ht would |
|
---|
| 1108 | | be less than 40% full). RETURNS 1 if the item was found, 0 else. |
|
---|
| 1109 | | If fLastFindSet is true, then this function is basically |
|
---|
| 1110 | | DeleteLastFind. |
|
---|
| 1111 | \*************************************************************************/
|
---|
| 1112 |
|
---|
| 1113 | static int Delete(HashTable *ht, ulong key, int fShrink, int fLastFindSet)
|
---|
| 1114 | {
|
---|
| 1115 | if ( !fLastFindSet && !Find(ht, key, NULL) )
|
---|
| 1116 | return 0;
|
---|
| 1117 | SET_BCK_DELETED(ht, ht->posLastFind); /* find set this, how nice */
|
---|
| 1118 | ht->cItems--;
|
---|
| 1119 | ht->cDeletedItems++;
|
---|
| 1120 | if ( ht->cDeltaGoalSize < 0 ) /* heading towards our goal of deletion */
|
---|
| 1121 | ht->cDeltaGoalSize++;
|
---|
| 1122 |
|
---|
| 1123 | if ( fShrink && ht->cItems < ht->cBuckets * OCCUPANCY_PCT*0.4
|
---|
| 1124 | && ht->cDeltaGoalSize >= 0 /* wait until we're done deleting */
|
---|
| 1125 | && (ht->cBuckets >> 1) >= MIN_HASH_SIZE ) /* shrink */
|
---|
| 1126 | Rehash(ht,
|
---|
| 1127 | NextPow2((ulong)((ht->cItems+ht->cDeltaGoalSize)/OCCUPANCY_PCT)),
|
---|
| 1128 | NULL);
|
---|
| 1129 | ht->posLastFind = NULL; /* last operation is delete, not find */
|
---|
| 1130 | return 1;
|
---|
| 1131 | }
|
---|
| 1132 |
|
---|
| 1133 |
|
---|
| 1134 | /* ======================================================================== */
|
---|
| 1135 | /* USER-VISIBLE API */
|
---|
| 1136 | /* ---------------------- */
|
---|
| 1137 |
|
---|
| 1138 | /*************************************************************************\
|
---|
| 1139 | | AllocateHashTable() |
|
---|
| 1140 | | ClearHashTable() |
|
---|
| 1141 | | FreeHashTable() |
|
---|
| 1142 | | Allocate() allocates a hash table and sets up size parameters. |
|
---|
| 1143 | | Free() frees it. Clear() deletes all the items from the hash |
|
---|
| 1144 | | table, but frees not. |
|
---|
| 1145 | | cchKey is < 0 if the keys you send me are meant to be pointers |
|
---|
| 1146 | | to \0-terminated strings. Then -cchKey is the maximum key size. |
|
---|
| 1147 | | If cchKey < one word (ulong), the keys you send me are the keys |
|
---|
| 1148 | | themselves; else the keys you send me are pointers to the data. |
|
---|
| 1149 | | If fSaveKeys is 1, we copy any keys given to us to insert. We |
|
---|
| 1150 | | also free these keys when freeing the hash table. If it's 0, the |
|
---|
| 1151 | | user is responsible for key space management. |
|
---|
| 1152 | | AllocateHashTable() RETURNS a hash table; the others TAKE one. |
|
---|
| 1153 | \*************************************************************************/
|
---|
| 1154 |
|
---|
| 1155 | HashTable *AllocateHashTable(int cchKey, int fSaveKeys)
|
---|
| 1156 | {
|
---|
| 1157 | HashTable *ht;
|
---|
| 1158 |
|
---|
| 1159 | ht = (HashTable *) HTsmalloc(sizeof(*ht)); /* set everything to 0 */
|
---|
| 1160 | ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE);
|
---|
| 1161 | ht->cchKey = cchKey <= 0 ? NULL_TERMINATED : cchKey;
|
---|
| 1162 | ht->cItems = 0;
|
---|
| 1163 | ht->cDeletedItems = 0;
|
---|
| 1164 | ht->fSaveKeys = fSaveKeys;
|
---|
| 1165 | ht->cDeltaGoalSize = 0;
|
---|
| 1166 | ht->iter = HTsmalloc( sizeof(TableIterator) );
|
---|
| 1167 |
|
---|
| 1168 | ht->fpData = NULL; /* set by HashLoad, maybe */
|
---|
| 1169 | ht->bckData.data = (ulong) NULL; /* this must be done */
|
---|
| 1170 | HTSetupKeyTrunc(); /* in util.c */
|
---|
| 1171 | return ht;
|
---|
| 1172 | }
|
---|
| 1173 |
|
---|
| 1174 | void ClearHashTable(HashTable *ht)
|
---|
| 1175 | {
|
---|
| 1176 | HTItem *bck;
|
---|
| 1177 |
|
---|
| 1178 | if ( STORES_PTR(ht) && ht->fSaveKeys ) /* need to free keys */
|
---|
| 1179 | for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) )
|
---|
| 1180 | {
|
---|
| 1181 | FREE_KEY(ht, bck->key);
|
---|
| 1182 | if ( ht->fSaveKeys == 2 ) /* this means key stored in one block */
|
---|
| 1183 | break; /* ...so only free once */
|
---|
| 1184 | }
|
---|
| 1185 | Table(Free)(ht->table, ht->cBuckets);
|
---|
| 1186 | ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE);
|
---|
| 1187 |
|
---|
| 1188 | ht->cItems = 0;
|
---|
| 1189 | ht->cDeletedItems = 0;
|
---|
| 1190 | ht->cDeltaGoalSize = 0;
|
---|
| 1191 | ht->posLastFind = NULL;
|
---|
| 1192 | ht->fpData = NULL; /* no longer HashLoading */
|
---|
| 1193 | if ( ht->bckData.data ) free( (char *)(ht)->bckData.data);
|
---|
| 1194 | ht->bckData.data = (ulong) NULL;
|
---|
| 1195 | }
|
---|
| 1196 |
|
---|
| 1197 | void FreeHashTable(HashTable *ht)
|
---|
| 1198 | {
|
---|
| 1199 | ClearHashTable(ht);
|
---|
| 1200 | if ( ht->iter ) HTfree(ht->iter, sizeof(TableIterator));
|
---|
| 1201 | if ( ht->table ) Table(Free)(ht->table, ht->cBuckets);
|
---|
| 1202 | free(ht);
|
---|
| 1203 | }
|
---|
| 1204 |
|
---|
| 1205 | /*************************************************************************\
|
---|
| 1206 | | HashFind() |
|
---|
| 1207 | | HashFindLast() |
|
---|
| 1208 | | HashFind(): looks in h(x) + i(i-1)/2 % t as i goes up from 0 |
|
---|
| 1209 | | until we either find the key or hit an empty bucket. RETURNS a |
|
---|
| 1210 | | pointer to the item in the hit bucket, if we find it, else |
|
---|
| 1211 | | RETURNS NULL. |
|
---|
| 1212 | | HashFindLast() returns the item returned by the last |
|
---|
| 1213 | | HashFind(), which may be NULL if the last HashFind() failed. |
|
---|
| 1214 | | LOAD_AND_RETURN reads the data from off disk, if necessary. |
|
---|
| 1215 | \*************************************************************************/
|
---|
| 1216 |
|
---|
| 1217 | HTItem *HashFind(HashTable *ht, ulong key)
|
---|
| 1218 | {
|
---|
| 1219 | LOAD_AND_RETURN(ht, Find(ht, KEY_TRUNC(ht, key), NULL));
|
---|
| 1220 | }
|
---|
| 1221 |
|
---|
| 1222 | HTItem *HashFindLast(HashTable *ht)
|
---|
| 1223 | {
|
---|
| 1224 | LOAD_AND_RETURN(ht, ht->posLastFind);
|
---|
| 1225 | }
|
---|
| 1226 |
|
---|
| 1227 | /*************************************************************************\
|
---|
| 1228 | | HashFindOrInsert() |
|
---|
| 1229 | | HashFindOrInsertItem() |
|
---|
| 1230 | | HashInsert() |
|
---|
| 1231 | | HashInsertItem() |
|
---|
| 1232 | | HashDelete() |
|
---|
| 1233 | | HashDeleteLast() |
|
---|
| 1234 | | Pretty obvious what these guys do. Some take buckets (items), |
|
---|
| 1235 | | some take keys and data separately. All things RETURN the bucket |
|
---|
| 1236 | | (a pointer into the hashtable) if appropriate. |
|
---|
| 1237 | \*************************************************************************/
|
---|
| 1238 |
|
---|
| 1239 | HTItem *HashFindOrInsert(HashTable *ht, ulong key, ulong dataInsert)
|
---|
| 1240 | {
|
---|
| 1241 | /* This is equivalent to Insert without samekey-overwrite */
|
---|
| 1242 | return Insert(ht, KEY_TRUNC(ht, key), dataInsert, 0);
|
---|
| 1243 | }
|
---|
| 1244 |
|
---|
| 1245 | HTItem *HashFindOrInsertItem(HashTable *ht, HTItem *pItem)
|
---|
| 1246 | {
|
---|
| 1247 | return HashFindOrInsert(ht, pItem->key, pItem->data);
|
---|
| 1248 | }
|
---|
| 1249 |
|
---|
| 1250 | HTItem *HashInsert(HashTable *ht, ulong key, ulong data)
|
---|
| 1251 | {
|
---|
| 1252 | return Insert(ht, KEY_TRUNC(ht, key), data, SAMEKEY_OVERWRITE);
|
---|
| 1253 | }
|
---|
| 1254 |
|
---|
| 1255 | HTItem *HashInsertItem(HashTable *ht, HTItem *pItem)
|
---|
| 1256 | {
|
---|
| 1257 | return HashInsert(ht, pItem->key, pItem->data);
|
---|
| 1258 | }
|
---|
| 1259 |
|
---|
| 1260 | int HashDelete(HashTable *ht, ulong key)
|
---|
| 1261 | {
|
---|
| 1262 | return Delete(ht, KEY_TRUNC(ht, key), !FAST_DELETE, 0);
|
---|
| 1263 | }
|
---|
| 1264 |
|
---|
| 1265 | int HashDeleteLast(HashTable *ht)
|
---|
| 1266 | {
|
---|
| 1267 | if ( !ht->posLastFind ) /* last find failed */
|
---|
| 1268 | return 0;
|
---|
| 1269 | return Delete(ht, 0, !FAST_DELETE, 1); /* no need to specify a key */
|
---|
| 1270 | }
|
---|
| 1271 |
|
---|
| 1272 | /*************************************************************************\
|
---|
| 1273 | | HashFirstBucket() |
|
---|
| 1274 | | HashNextBucket() |
|
---|
| 1275 | | Iterates through the items in the hashtable by iterating through |
|
---|
| 1276 | | the table. Since we know about deleted buckets and loading data |
|
---|
| 1277 | | off disk, and the table doesn't, our job is to take care of these |
|
---|
| 1278 | | things. RETURNS a bucket, or NULL after the last bucket. |
|
---|
| 1279 | \*************************************************************************/
|
---|
| 1280 |
|
---|
| 1281 | HTItem *HashFirstBucket(HashTable *ht)
|
---|
| 1282 | {
|
---|
| 1283 | HTItem *retval;
|
---|
| 1284 |
|
---|
| 1285 | for ( retval = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
|
---|
| 1286 | retval; retval = Table(NextBucket)(ht->iter) )
|
---|
| 1287 | if ( !IS_BCK_DELETED(retval) )
|
---|
| 1288 | LOAD_AND_RETURN(ht, retval);
|
---|
| 1289 | return NULL;
|
---|
| 1290 | }
|
---|
| 1291 |
|
---|
| 1292 | HTItem *HashNextBucket(HashTable *ht)
|
---|
| 1293 | {
|
---|
| 1294 | HTItem *retval;
|
---|
| 1295 |
|
---|
| 1296 | while ( (retval=Table(NextBucket)(ht->iter)) )
|
---|
| 1297 | if ( !IS_BCK_DELETED(retval) )
|
---|
| 1298 | LOAD_AND_RETURN(ht, retval);
|
---|
| 1299 | return NULL;
|
---|
| 1300 | }
|
---|
| 1301 |
|
---|
| 1302 | /*************************************************************************\
|
---|
| 1303 | | HashSetDeltaGoalSize() |
|
---|
| 1304 | | If we're going to insert 100 items, set the delta goal size to |
|
---|
| 1305 | | 100 and we take that into account when inserting. Likewise, if |
|
---|
| 1306 | | we're going to delete 10 items, set it to -100 and we won't |
|
---|
| 1307 | | rehash until all 100 have been done. It's ok to be wrong, but |
|
---|
| 1308 | | it's efficient to be right. Returns the delta value. |
|
---|
| 1309 | \*************************************************************************/
|
---|
| 1310 |
|
---|
| 1311 | int HashSetDeltaGoalSize(HashTable *ht, int delta)
|
---|
| 1312 | {
|
---|
| 1313 | ht->cDeltaGoalSize = delta;
|
---|
| 1314 | #if FAST_DELETE == 1 || defined INSERT_ONLY
|
---|
| 1315 | if ( ht->cDeltaGoalSize < 0 ) /* for fast delete, we never */
|
---|
| 1316 | ht->cDeltaGoalSize = 0; /* ...rehash after deletion */
|
---|
| 1317 | #endif
|
---|
| 1318 | return ht->cDeltaGoalSize;
|
---|
| 1319 | }
|
---|
| 1320 |
|
---|
| 1321 |
|
---|
| 1322 | /*************************************************************************\
|
---|
| 1323 | | HashSave() |
|
---|
| 1324 | | HashLoad() |
|
---|
| 1325 | | HashLoadKeys() |
|
---|
| 1326 | | Routines for saving and loading the hashtable from disk. We can |
|
---|
| 1327 | | then use the hashtable in two ways: loading it back into memory |
|
---|
| 1328 | | (HashLoad()) or loading only the keys into memory, in which case |
|
---|
| 1329 | | the data for a given key is loaded off disk when the key is |
|
---|
| 1330 | | retrieved. The data is freed when something new is retrieved in |
|
---|
| 1331 | | its place, so this is not a "lazy-load" scheme. |
|
---|
| 1332 | | The key is saved automatically and restored upon load, but the |
|
---|
| 1333 | | user needs to specify a routine for reading and writing the data. |
|
---|
| 1334 | | fSaveKeys is of course set to 1 when you read in a hashtable. |
|
---|
| 1335 | | HashLoad RETURNS a newly allocated hashtable. |
|
---|
| 1336 | | DATA_WRITE() takes an fp and a char * (representing the data |
|
---|
| 1337 | | field), and must perform two separate tasks. If fp is NULL, |
|
---|
| 1338 | | return the number of bytes written. If not, writes the data to |
|
---|
| 1339 | | disk at the place the fp points to. |
|
---|
| 1340 | | DATA_READ() takes an fp and the number of bytes in the data |
|
---|
| 1341 | | field, and returns a char * which points to wherever you've |
|
---|
| 1342 | | written the data. Thus, you must allocate memory for the data. |
|
---|
| 1343 | | Both dataRead and dataWrite may be NULL if you just wish to |
|
---|
| 1344 | | store the data field directly, as an integer. |
|
---|
| 1345 | \*************************************************************************/
|
---|
| 1346 |
|
---|
| 1347 | void HashSave(FILE *fp, HashTable *ht, int (*dataWrite)(FILE *, char *))
|
---|
| 1348 | {
|
---|
| 1349 | long cchData, posStart;
|
---|
| 1350 | HTItem *bck;
|
---|
| 1351 |
|
---|
| 1352 | /* File format: magic number (4 bytes)
|
---|
| 1353 | : cchKey (one word)
|
---|
| 1354 | : cItems (one word)
|
---|
| 1355 | : cDeletedItems (one word)
|
---|
| 1356 | : table info (buckets and a bitmap)
|
---|
| 1357 | : cchAllKeys (one word)
|
---|
| 1358 | Then the keys, in a block. If cchKey is NULL_TERMINATED, the keys
|
---|
| 1359 | are null-terminated too, otherwise this takes up cchKey*cItems bytes.
|
---|
| 1360 | Note that keys are not written for DELETED buckets.
|
---|
| 1361 | Then the data:
|
---|
| 1362 | : EITHER DELETED (one word) to indicate it's a deleted bucket,
|
---|
| 1363 | : OR number of bytes for this (non-empty) bucket's data
|
---|
| 1364 | (one word). This is not stored if dataWrite == NULL
|
---|
| 1365 | since the size is known to be sizeof(ul). Plus:
|
---|
| 1366 | : the data for this bucket (variable length)
|
---|
| 1367 | All words are in network byte order. */
|
---|
| 1368 |
|
---|
| 1369 | fprintf(fp, "%s", MAGIC_KEY);
|
---|
| 1370 | WRITE_UL(fp, ht->cchKey); /* WRITE_UL, READ_UL, etc in fks-hash.h */
|
---|
| 1371 | WRITE_UL(fp, ht->cItems);
|
---|
| 1372 | WRITE_UL(fp, ht->cDeletedItems);
|
---|
| 1373 | Table(Write)(fp, ht->table, ht->cBuckets); /* writes cBuckets too */
|
---|
| 1374 |
|
---|
| 1375 | WRITE_UL(fp, 0); /* to be replaced with sizeof(key block) */
|
---|
| 1376 | posStart = ftell(fp);
|
---|
| 1377 | for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) )
|
---|
| 1378 | fwrite(KEY_PTR(ht, bck->key), 1,
|
---|
| 1379 | (ht->cchKey == NULL_TERMINATED ?
|
---|
| 1380 | strlen(KEY_PTR(ht, bck->key))+1 : ht->cchKey), fp);
|
---|
| 1381 | cchData = ftell(fp) - posStart;
|
---|
| 1382 | fseek(fp, posStart - sizeof(unsigned long), SEEK_SET);
|
---|
| 1383 | WRITE_UL(fp, cchData);
|
---|
| 1384 | fseek(fp, 0, SEEK_END); /* done with our sojourn at the header */
|
---|
| 1385 |
|
---|
| 1386 | /* Unlike HashFirstBucket, TableFirstBucket iters through deleted bcks */
|
---|
| 1387 | for ( bck = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
|
---|
| 1388 | bck; bck = Table(NextBucket)(ht->iter) )
|
---|
| 1389 | if ( dataWrite == NULL || IS_BCK_DELETED(bck) )
|
---|
| 1390 | WRITE_UL(fp, bck->data);
|
---|
| 1391 | else /* write cchData followed by the data */
|
---|
| 1392 | {
|
---|
| 1393 | WRITE_UL(fp, (*dataWrite)(NULL, (char *)bck->data));
|
---|
| 1394 | (*dataWrite)(fp, (char *)bck->data);
|
---|
| 1395 | }
|
---|
| 1396 | }
|
---|
| 1397 |
|
---|
| 1398 | static HashTable *HashDoLoad(FILE *fp, char * (*dataRead)(FILE *, int),
|
---|
| 1399 | HashTable *ht)
|
---|
| 1400 | {
|
---|
| 1401 | ulong cchKey;
|
---|
| 1402 | char szMagicKey[4], *rgchKeys;
|
---|
| 1403 | HTItem *bck;
|
---|
| 1404 |
|
---|
| 1405 | fread(szMagicKey, 1, 4, fp);
|
---|
| 1406 | if ( strncmp(szMagicKey, MAGIC_KEY, 4) )
|
---|
| 1407 | {
|
---|
| 1408 | fprintf(stderr, "ERROR: not a hash table (magic key is %4.4s, not %s)\n",
|
---|
| 1409 | szMagicKey, MAGIC_KEY);
|
---|
| 1410 | exit(3);
|
---|
| 1411 | }
|
---|
| 1412 | Table(Free)(ht->table, ht->cBuckets); /* allocated in AllocateHashTable */
|
---|
| 1413 |
|
---|
| 1414 | READ_UL(fp, ht->cchKey);
|
---|
| 1415 | READ_UL(fp, ht->cItems);
|
---|
| 1416 | READ_UL(fp, ht->cDeletedItems);
|
---|
| 1417 | ht->cBuckets = Table(Read)(fp, &ht->table); /* next is the table info */
|
---|
| 1418 |
|
---|
| 1419 | READ_UL(fp, cchKey);
|
---|
| 1420 | rgchKeys = (char *) HTsmalloc( cchKey ); /* stores all the keys */
|
---|
| 1421 | fread(rgchKeys, 1, cchKey, fp);
|
---|
| 1422 | /* We use the table iterator so we don't try to LOAD_AND_RETURN */
|
---|
| 1423 | for ( bck = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
|
---|
| 1424 | bck; bck = Table(NextBucket)(ht->iter) )
|
---|
| 1425 | {
|
---|
| 1426 | READ_UL(fp, bck->data); /* all we need if dataRead is NULL */
|
---|
| 1427 | if ( IS_BCK_DELETED(bck) ) /* always 0 if defined(INSERT_ONLY) */
|
---|
| 1428 | continue; /* this is why we read the data first */
|
---|
| 1429 | if ( dataRead != NULL ) /* if it's null, we're done */
|
---|
| 1430 | if ( !ht->fpData ) /* load data into memory */
|
---|
| 1431 | bck->data = (ulong)dataRead(fp, bck->data);
|
---|
| 1432 | else /* store location of data on disk */
|
---|
| 1433 | {
|
---|
| 1434 | fseek(fp, bck->data, SEEK_CUR); /* bck->data held size of data */
|
---|
| 1435 | bck->data = ftell(fp) - bck->data - sizeof(unsigned long);
|
---|
| 1436 | }
|
---|
| 1437 |
|
---|
| 1438 | if ( ht->cchKey == NULL_TERMINATED ) /* now read the key */
|
---|
| 1439 | {
|
---|
| 1440 | bck->key = (ulong) rgchKeys;
|
---|
| 1441 | rgchKeys = strchr(rgchKeys, '\0') + 1; /* read past the string */
|
---|
| 1442 | }
|
---|
| 1443 | else
|
---|
| 1444 | {
|
---|
| 1445 | if ( STORES_PTR(ht) ) /* small keys stored directly */
|
---|
| 1446 | bck->key = (ulong) rgchKeys;
|
---|
| 1447 | else
|
---|
| 1448 | memcpy(&bck->key, rgchKeys, ht->cchKey);
|
---|
| 1449 | rgchKeys += ht->cchKey;
|
---|
| 1450 | }
|
---|
| 1451 | }
|
---|
| 1452 | if ( !STORES_PTR(ht) ) /* keys are stored directly */
|
---|
| 1453 | HTfree(rgchKeys - cchKey, cchKey); /* we've advanced rgchK to end */
|
---|
| 1454 | return ht;
|
---|
| 1455 | }
|
---|
| 1456 |
|
---|
| 1457 | HashTable *HashLoad(FILE *fp, char * (*dataRead)(FILE *, int))
|
---|
| 1458 | {
|
---|
| 1459 | HashTable *ht;
|
---|
| 1460 | ht = AllocateHashTable(0, 2); /* cchKey set later, fSaveKey should be 2! */
|
---|
| 1461 | return HashDoLoad(fp, dataRead, ht);
|
---|
| 1462 | }
|
---|
| 1463 |
|
---|
| 1464 | HashTable *HashLoadKeys(FILE *fp, char * (*dataRead)(FILE *, int))
|
---|
| 1465 | {
|
---|
| 1466 | HashTable *ht;
|
---|
| 1467 |
|
---|
| 1468 | if ( dataRead == NULL )
|
---|
| 1469 | return HashLoad(fp, NULL); /* no reason not to load the data here */
|
---|
| 1470 | ht = AllocateHashTable(0, 2); /* cchKey set later, fSaveKey should be 2! */
|
---|
| 1471 | ht->fpData = fp; /* tells HashDoLoad() to only load keys */
|
---|
| 1472 | ht->dataRead = dataRead;
|
---|
| 1473 | return HashDoLoad(fp, dataRead, ht);
|
---|
| 1474 | }
|
---|
| 1475 |
|
---|
| 1476 | /*************************************************************************\
|
---|
| 1477 | | PrintHashTable() |
|
---|
| 1478 | | A debugging tool. Prints the entire contents of the hash table, |
|
---|
| 1479 | | like so: <bin #>: key of the contents. Returns number of bytes |
|
---|
| 1480 | | allocated. If time is not -1, we print it as the time required |
|
---|
| 1481 | | for the hash. If iForm is 0, we just print the stats. If it's |
|
---|
| 1482 | | 1, we print the keys and data too, but the keys are printed as |
|
---|
| 1483 | | ulongs. If it's 2, we print the keys correctly (as long numbers |
|
---|
| 1484 | | or as strings). |
|
---|
| 1485 | \*************************************************************************/
|
---|
| 1486 |
|
---|
| 1487 | ulong PrintHashTable(HashTable *ht, double time, int iForm)
|
---|
| 1488 | {
|
---|
| 1489 | ulong cbData = 0, cbBin = 0, cItems = 0, cOccupied = 0;
|
---|
| 1490 | HTItem *item;
|
---|
| 1491 |
|
---|
| 1492 | printf("HASH TABLE.\n");
|
---|
| 1493 | if ( time > -1.0 )
|
---|
| 1494 | {
|
---|
| 1495 | printf("----------\n");
|
---|
| 1496 | printf("Time: %27.2f\n", time);
|
---|
| 1497 | }
|
---|
| 1498 |
|
---|
| 1499 | for ( item = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
|
---|
| 1500 | item; item = Table(NextBucket)(ht->iter) )
|
---|
| 1501 | {
|
---|
| 1502 | cOccupied++; /* this includes deleted buckets */
|
---|
| 1503 | if ( IS_BCK_DELETED(item) ) /* we don't need you for anything else */
|
---|
| 1504 | continue;
|
---|
| 1505 | cItems++; /* this is for a sanity check */
|
---|
| 1506 | if ( STORES_PTR(ht) )
|
---|
| 1507 | cbData += ht->cchKey == NULL_TERMINATED ?
|
---|
| 1508 | WORD_ROUND(strlen((char *)item->key)+1) : ht->cchKey;
|
---|
| 1509 | else
|
---|
| 1510 | cbBin -= sizeof(item->key), cbData += sizeof(item->key);
|
---|
| 1511 | cbBin -= sizeof(item->data), cbData += sizeof(item->data);
|
---|
| 1512 | if ( iForm != 0 ) /* we want the actual contents */
|
---|
| 1513 | {
|
---|
| 1514 | if ( iForm == 2 && ht->cchKey == NULL_TERMINATED )
|
---|
| 1515 | printf("%s/%lu\n", (char *)item->key, item->data);
|
---|
| 1516 | else if ( iForm == 2 && STORES_PTR(ht) )
|
---|
| 1517 | printf("%.*s/%lu\n",
|
---|
| 1518 | (int)ht->cchKey, (char *)item->key, item->data);
|
---|
| 1519 | else /* either key actually is a ulong, or iForm == 1 */
|
---|
| 1520 | printf("%lu/%lu\n", item->key, item->data);
|
---|
| 1521 | }
|
---|
| 1522 | }
|
---|
| 1523 | assert( cItems == ht->cItems ); /* sanity check */
|
---|
| 1524 | cbBin = Table(Memory)(ht->cBuckets, cOccupied);
|
---|
| 1525 |
|
---|
| 1526 | printf("----------\n");
|
---|
| 1527 | printf("%lu buckets (%lu bytes). %lu empty. %lu hold deleted items.\n"
|
---|
| 1528 | "%lu items (%lu bytes).\n"
|
---|
| 1529 | "%lu bytes total. %lu bytes (%2.1f%%) of this is ht overhead.\n",
|
---|
| 1530 | ht->cBuckets, cbBin, ht->cBuckets - cOccupied, cOccupied - ht->cItems,
|
---|
| 1531 | ht->cItems, cbData,
|
---|
| 1532 | cbData + cbBin, cbBin, cbBin*100.0/(cbBin+cbData));
|
---|
| 1533 |
|
---|
| 1534 | return cbData + cbBin;
|
---|
| 1535 | }
|
---|