strhash.c
1 /* 2 * 3 * Copyright 1990 4 * Terry Jones & Jordan Hubbard 5 * 6 * PCS Computer Systeme, GmbH. 7 * Munich, West Germany 8 * 9 * 10 * All rights reserved. 11 * 12 * This is unsupported software and is subject to change without notice. 13 * the author makes no representations about the suitability of this software 14 * for any purpose. It is supplied "as is" without express or implied 15 * warranty. 16 * 17 * Permission to use, copy, modify, and distribute this software and its 18 * documentation for any purpose and without fee is hereby granted, provided 19 * that the above copyright notice appear in all copies and that both that 20 * copyright notice and this permission notice appear in supporting 21 * documentation, and that the name of the author not be used in 22 * advertising or publicity pertaining to distribution of the software 23 * without specific, written prior permission. 24 * 25 */ 26 27 /* 28 * This is a fairly simple open addressing hash scheme. 29 * Terry did all the code, I just did the spec. 30 * Thanks again, you crazy Aussie.. 31 * 32 */ 33 34 /* 35 * $Log: strhash.c,v $ 36 * Revision 2.0 90/03/26 01:44:26 jkh 37 * pre-beta check-in 38 * 39 * Revision 1.8 90/03/09 19:22:35 jkh 40 * Fixed bogus comment. 41 * 42 * Revision 1.7 90/03/09 19:01:08 jkh 43 * Added comments, GPL. 44 * 45 * Revision 1.6 90/03/08 17:55:58 terry 46 * Rearranged hash_purge to be a tiny bit more efficient. 47 * Added verbose option to hash_stats. 48 * 49 * Revision 1.5 90/03/08 17:19:54 terry 50 * Added hash_purge. Added arg to hash_traverse. Changed all 51 * void * to Generic. 52 * 53 * Revision 1.4 90/03/08 12:02:35 terry 54 * Fixed problems with allocation that I screwed up last night. 55 * Changed bucket lists to be singly linked. Thanks to JKH, my hero. 56 * 57 * Revision 1.3 90/03/07 21:33:33 terry 58 * Cleaned up a few decls to keep gcc -Wall quiet. 59 * 60 * Revision 1.2 90/03/07 21:14:53 terry 61 * Comments. Added HASH_STATS define. Removed hash_find() 62 * and new_node(). 63 * 64 * Revision 1.1 90/03/07 20:49:45 terry 65 * Initial revision 66 * 67 * 68 */ 69 70 #pragma clang diagnostic push 71 #pragma clang diagnostic ignored "-Wstrict-prototypes" 72 73 #include <sys/cdefs.h> 74 __FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $"); 75 76 #include <stdio.h> 77 #include <stdlib.h> 78 #include <string.h> 79 #include <sys/types.h> 80 #include <strhash.h> 81 82 #define HASH_NULL (hash_table *)0 83 #define NODE_NULL (hash_node *)0 84 #define GENERIC_NULL (void *)0 85 86 #define HASH_SZ 97 87 88 89 static int _hash(int size, char *key); 90 static hash_node *list_find(caddr_t key, hash_node *head); 91 static int assign_key(char *key, hash_node *node); 92 93 94 /* 95 * hash_create() 96 * 97 * Malloc room for a new hash table and then room for its 98 * bucket pointers. Then set all the buckets to 99 * point to 0. Return the address of the new table. 100 */ 101 hash_table * 102 hash_create(int size) 103 { 104 int i; 105 hash_table *new = (hash_table *)malloc(sizeof(hash_table)); 106 107 if (!new || size < 0){ 108 return HASH_NULL; 109 } 110 111 if (size == 0){ 112 size = HASH_SZ; 113 } 114 115 if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){ 116 return HASH_NULL; 117 } 118 119 for (i = 0; i < size; i++){ 120 new->buckets[i] = NODE_NULL; 121 } 122 new->size = size; 123 124 return new; 125 } 126 127 128 /* 129 * list_find() 130 * 131 * Find the key in the linked list pointed to by head. 132 */ 133 static hash_node * 134 list_find(caddr_t key, hash_node *head) 135 { 136 while (head){ 137 if (!strcmp(head->key, key)){ 138 return head; 139 } 140 head = head->next; 141 } 142 return NODE_NULL; 143 } 144 145 146 /* 147 * _hash() 148 * 149 * Compute the hash value for the given key. 150 */ 151 static int 152 _hash(int size, char *key) 153 { 154 unsigned int h = 0x0; 155 156 while (*key){ 157 h = (h << 1) ^ (h ^ (unsigned char) *key++); 158 } 159 160 h %= size; 161 return h; 162 } 163 164 /* 165 * hash_destroy() 166 * 167 * Find the key and (if it's there) remove it entirely. 168 * The function (*nukefunc)() is in charge of disposing 169 * of the storage help by the data associated with the node. 170 */ 171 void 172 hash_destroy(hash_table *table, char *key, void (*nukefunc)()) 173 { 174 int bucket = _hash(table->size, key); 175 hash_node *found = table->buckets[bucket]; 176 hash_node *to_free = NODE_NULL; 177 178 if (!found) { 179 return; 180 } 181 182 if (!strcmp(found->key, key)) { 183 /* 184 * It was the head of the list. 185 */ 186 table->buckets[bucket] = found->next; 187 to_free = found; 188 } 189 else { 190 /* 191 * Walk the list, looking one ahead. 192 */ 193 while (found->next) { 194 if (!strcmp(found->next->key, key)) { 195 to_free = found->next; 196 found->next = found->next->next; 197 break; 198 } 199 found = found->next; 200 } 201 202 if (!to_free){ 203 return; 204 } 205 } 206 207 if (nukefunc) 208 (*nukefunc)(to_free->key, to_free->data); 209 free(to_free); 210 return; 211 } 212 213 214 /* 215 * hash_search() 216 * 217 * Search the table for the given key. Then: 218 * 219 * 1) If you find it and there is no replacement function, just 220 * return what you found. (This is a simple search). 221 * 2) If you find it and there is a replacement function, run 222 * the function on the data you found, and replace the old 223 * data with whatever is passed in datum. Return 0. 224 * 3) If you don't find it and there is some datum, insert a 225 * new item into the table. Insertions go at the front of 226 * the bucket. Return 0. 227 * 4) Otherwise just return 0. 228 * 229 */ 230 void * 231 hash_search(hash_table *table, caddr_t key, void *datum, 232 void (*replace_func)()) 233 { 234 int bucket = _hash(table->size, key); 235 hash_node *found = list_find(key, table->buckets[bucket]); 236 237 if (found){ 238 if (!replace_func){ 239 return found->data; 240 } 241 else{ 242 (*replace_func)(found->data); 243 found->data = datum; 244 } 245 } 246 else{ 247 if (datum){ 248 249 hash_node *new = (hash_node *)malloc(sizeof(hash_node)); 250 251 if (!new || !assign_key(key, new)){ 252 return GENERIC_NULL; 253 } 254 new->data = datum; 255 new->next = table->buckets[bucket]; 256 table->buckets[bucket] = new; 257 return new; 258 } 259 } 260 return GENERIC_NULL; 261 } 262 263 264 /* 265 * assign_key() 266 * 267 * Set the key value of a node to be 'key'. Get some space from 268 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise. 269 */ 270 static int 271 assign_key(char *key, hash_node *node) 272 { 273 if (!node || !key){ 274 return 0; 275 } 276 277 if (!(node->key = (char *)malloc(strlen(key) + 1))){ 278 return 0; 279 } 280 281 node->key[0] = '\0'; 282 strcat(node->key, key); 283 return 1; 284 } 285 286 /* 287 * hash_traverse() 288 * 289 * Traverse the hash table and run the function func on the 290 * data found at each node and the argument we're passed for it. 291 */ 292 void 293 hash_traverse(hash_table *table, int (*func)(), void *arg) 294 { 295 int i; 296 int size = table->size; 297 298 if (!func) 299 return; 300 301 for (i = 0; i < size; i++) { 302 hash_node *n = table->buckets[i]; 303 while (n) { 304 if ((*func)(n->key, n->data, arg) == 0) 305 return; 306 n = n->next; 307 } 308 } 309 return; 310 } 311 312 /* 313 * hash_purge() 314 * 315 * Run through the entire hash table. Call purge_func 316 * on the data found at each node, and then free the node. 317 * Set all the bucket pointers to 0. 318 */ 319 void 320 hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2)) 321 { 322 int i; 323 int size = table->size; 324 325 for (i = 0; i < size; i++) { 326 hash_node *n = table->buckets[i]; 327 if (n) { 328 do { 329 hash_node *to_free = n; 330 if (purge_func) { 331 (*purge_func)(n->key, n->data); 332 } 333 n = n->next; 334 free(to_free); 335 } while (n); 336 table->buckets[i] = NODE_NULL; 337 } 338 } 339 } 340 341 #undef min 342 #define min(a, b) (a) < (b) ? (a) : (b) 343 344 /* 345 * hash_stats() 346 * 347 * Print statistics about the current table allocation to stdout. 348 */ 349 void 350 hash_stats(hash_table *table, int verbose) 351 { 352 int i; 353 int total_elements = 0; 354 int non_empty_buckets = 0; 355 int max_count = 0; 356 int max_repeats = 0; 357 int *counts; 358 int size = table->size; 359 360 if (!(counts = (int *)malloc(size * sizeof(int)))){ 361 fprintf(stderr, "malloc returns 0\n"); 362 exit(1); 363 } 364 365 for (i = 0; i < size; i++){ 366 int x = 0; 367 hash_node *n = table->buckets[i]; 368 counts[i] = 0; 369 while (n){ 370 if (!x){ 371 x = 1; 372 non_empty_buckets++; 373 if (verbose){ 374 printf("bucket %2d: ", i); 375 } 376 } 377 if (verbose){ 378 printf(" %s", n->key); 379 } 380 counts[i]++; 381 n = n->next; 382 } 383 384 total_elements += counts[i]; 385 if (counts[i] > max_count){ 386 max_count = counts[i]; 387 max_repeats = 1; 388 } 389 else if (counts[i] == max_count){ 390 max_repeats++; 391 } 392 393 if (counts[i] && verbose){ 394 printf(" (%d)\n", counts[i]); 395 } 396 } 397 398 printf("\n"); 399 printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s"); 400 401 if (total_elements){ 402 printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size, 403 (double)100 * (double)non_empty_buckets / (double)(size)); 404 printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats); 405 printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets); 406 printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements))); 407 } 408 return; 409 } 410 #pragma clang diagnostic pop