/ stdlib / FreeBSD / strhash.c
strhash.c
  1  /*
  2   *
  3   *                      Copyright 1990
  4   *               Terry Jones & Jordan Hubbard
  5   *
  6   *		  PCS Computer Systeme, GmbH.
  7   *	             Munich, West Germany
  8   *
  9   *
 10   *  All rights reserved.
 11   *
 12   *  This is unsupported software and is subject to change without notice.
 13   *  the author makes no representations about the suitability of this software
 14   *  for any purpose. It is supplied "as is" without express or implied
 15   *  warranty.
 16   *
 17   *  Permission to use, copy, modify, and distribute this software and its
 18   *  documentation for any purpose and without fee is hereby granted, provided
 19   *  that the above copyright notice appear in all copies and that both that
 20   *  copyright notice and this permission notice appear in supporting
 21   *  documentation, and that the name of the author not be used in
 22   *  advertising or publicity pertaining to distribution of the software
 23   *  without specific, written prior permission.
 24   *
 25   */
 26  
 27  /*
 28   * This is a fairly simple open addressing hash scheme.
 29   * Terry did all the code, I just did the spec.
 30   * Thanks again, you crazy Aussie..
 31   *
 32   */
 33  
 34  /*
 35   * $Log: strhash.c,v $
 36   * Revision 2.0  90/03/26  01:44:26  jkh
 37   * pre-beta check-in
 38   *
 39   * Revision 1.8  90/03/09  19:22:35  jkh
 40   * Fixed bogus comment.
 41   *
 42   * Revision 1.7  90/03/09  19:01:08  jkh
 43   * Added comments, GPL.
 44   *
 45   * Revision 1.6  90/03/08  17:55:58  terry
 46   * Rearranged hash_purge to be a tiny bit more efficient.
 47   * Added verbose option to hash_stats.
 48   *
 49   * Revision 1.5  90/03/08  17:19:54  terry
 50   * Added hash_purge. Added arg to hash_traverse. Changed all
 51   * void * to Generic.
 52   *
 53   * Revision 1.4  90/03/08  12:02:35  terry
 54   * Fixed problems with allocation that I screwed up last night.
 55   * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
 56   *
 57   * Revision 1.3  90/03/07  21:33:33  terry
 58   * Cleaned up a few decls to keep gcc -Wall quiet.
 59   *
 60   * Revision 1.2  90/03/07  21:14:53  terry
 61   * Comments. Added HASH_STATS define. Removed hash_find()
 62   * and new_node().
 63   *
 64   * Revision 1.1  90/03/07  20:49:45  terry
 65   * Initial revision
 66   *
 67   *
 68   */
 69  
 70  #pragma clang diagnostic push
 71  #pragma clang diagnostic ignored "-Wstrict-prototypes"
 72  
 73  #include <sys/cdefs.h>
 74  __FBSDID("$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.10 2002/03/22 21:53:10 obrien Exp $");
 75  
 76  #include <stdio.h>
 77  #include <stdlib.h>
 78  #include <string.h>
 79  #include <sys/types.h>
 80  #include <strhash.h>
 81  
 82  #define HASH_NULL    (hash_table *)0
 83  #define NODE_NULL    (hash_node *)0
 84  #define GENERIC_NULL (void *)0
 85  
 86  #define HASH_SZ 97
 87  
 88  
 89  static int _hash(int size, char *key);
 90  static hash_node *list_find(caddr_t key, hash_node *head);
 91  static int assign_key(char *key, hash_node *node);
 92  
 93  
 94  /*
 95   * hash_create()
 96   *
 97   * Malloc room for a new hash table and then room for its
 98   * bucket pointers. Then set all the buckets to
 99   * point to 0. Return the address of the new table.
100   */
101  hash_table *
102  hash_create(int size)
103  {
104      int i;
105      hash_table *new = (hash_table *)malloc(sizeof(hash_table));
106  
107      if (!new || size < 0){
108  	return HASH_NULL;
109      }
110  
111      if (size == 0){
112  	size = HASH_SZ;
113      }
114  
115      if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
116  	return HASH_NULL;
117      }
118  
119      for (i = 0; i < size; i++){
120  	new->buckets[i] = NODE_NULL;
121      }
122      new->size = size;
123  
124      return new;
125  }
126  
127  
128  /*
129   * list_find()
130   *
131   * Find the key in the linked list pointed to by head.
132   */
133  static hash_node *
134  list_find(caddr_t key, hash_node *head)
135  {
136      while (head){
137  	if (!strcmp(head->key, key)){
138  	    return head;
139  	}
140  	head = head->next;
141      }
142      return NODE_NULL;
143  }
144  
145  
146  /*
147   * _hash()
148   *
149   * Compute the hash value for the given key.
150   */
151  static int
152  _hash(int size, char *key)
153  {
154      unsigned int h = 0x0;
155  
156      while (*key){
157  	h = (h << 1) ^ (h ^ (unsigned char) *key++);
158      }
159  
160      h %= size;
161      return h;
162  }
163  
164  /*
165   * hash_destroy()
166   *
167   * Find the key and (if it's there) remove it entirely.
168   * The function (*nukefunc)() is in charge of disposing
169   * of the storage help by the data associated with the node.
170   */
171  void
172  hash_destroy(hash_table *table, char *key, void (*nukefunc)())
173  {
174      int bucket = _hash(table->size, key);
175      hash_node *found = table->buckets[bucket];
176      hash_node *to_free = NODE_NULL;
177  
178      if (!found) {
179  	return;
180      }
181  
182      if (!strcmp(found->key, key)) {
183  	/*
184  	 * It was the head of the list.
185  	 */
186  	table->buckets[bucket] = found->next;
187  	to_free = found;
188      }
189      else {
190  	/*
191  	 * Walk the list, looking one ahead.
192  	 */
193  	while (found->next) {
194  	    if (!strcmp(found->next->key, key)) {
195  		to_free = found->next;
196  		found->next = found->next->next;
197  		break;
198  	    }
199  	    found = found->next;
200  	}
201  
202  	if (!to_free){
203  	    return;
204  	}
205      }
206  
207      if (nukefunc)
208  	(*nukefunc)(to_free->key, to_free->data);
209      free(to_free);
210      return;
211  }
212  
213  
214  /*
215   * hash_search()
216   *
217   * Search the table for the given key. Then:
218   *
219   * 1) If you find it and there is no replacement function, just
220   *    return what you found. (This is a simple search).
221   * 2) If you find it and there is a replacement function, run
222   *    the function on the data you found, and replace the old
223   *    data with whatever is passed in datum. Return 0.
224   * 3) If you don't find it and there is some datum, insert a
225   *    new item into the table. Insertions go at the front of
226   *    the bucket. Return 0.
227   * 4) Otherwise just return 0.
228   *
229   */
230  void *
231  hash_search(hash_table *table, caddr_t key, void *datum,
232  	    void (*replace_func)())
233  {
234      int bucket = _hash(table->size, key);
235      hash_node *found = list_find(key, table->buckets[bucket]);
236  
237      if (found){
238  	if (!replace_func){
239  	    return found->data;
240  	}
241  	else{
242  	    (*replace_func)(found->data);
243  	    found->data = datum;
244  	}
245      }
246      else{
247  	if (datum){
248  
249  	    hash_node *new = (hash_node *)malloc(sizeof(hash_node));
250  
251  	    if (!new || !assign_key(key, new)){
252  		return GENERIC_NULL;
253  	    }
254  	    new->data = datum;
255  	    new->next = table->buckets[bucket];
256  	    table->buckets[bucket] = new;
257  	    return new;
258  	}
259      }
260      return GENERIC_NULL;
261  }
262  
263  
264  /*
265   * assign_key()
266   *
267   * Set the key value of a node to be 'key'. Get some space from
268   * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
269   */
270  static int
271  assign_key(char *key, hash_node *node)
272  {
273      if (!node || !key){
274  	return 0;
275      }
276  
277      if (!(node->key = (char *)malloc(strlen(key) + 1))){
278  	return 0;
279      }
280  
281      node->key[0] = '\0';
282      strcat(node->key, key);
283      return 1;
284  }
285  
286  /*
287   * hash_traverse()
288   *
289   * Traverse the hash table and run the function func on the
290   * data found at each node and the argument we're passed for it.
291   */
292  void
293  hash_traverse(hash_table *table, int (*func)(), void *arg)
294  {
295      int i;
296      int size = table->size;
297  
298      if (!func)
299  	return;
300  
301      for (i = 0; i < size; i++) {
302  	hash_node *n = table->buckets[i];
303  	while (n) {
304  	    if ((*func)(n->key, n->data, arg) == 0)
305  		return;
306  	    n = n->next;
307  	}
308      }
309      return;
310  }
311  
312  /*
313   * hash_purge()
314   *
315   * Run through the entire hash table. Call purge_func
316   * on the data found at each node, and then free the node.
317   * Set all the bucket pointers to 0.
318   */
319  void
320  hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
321  {
322      int i;
323      int size = table->size;
324  
325      for (i = 0; i < size; i++) {
326  	hash_node *n = table->buckets[i];
327  	if (n) {
328  	    do {
329  		hash_node *to_free = n;
330  		if (purge_func) {
331  		    (*purge_func)(n->key, n->data);
332  		}
333  		n = n->next;
334  		free(to_free);
335  	    } while (n);
336  	    table->buckets[i] = NODE_NULL;
337  	}
338      }
339  }
340  
341  #undef min
342  #define min(a, b) (a) < (b) ? (a) : (b)
343  
344  /*
345   * hash_stats()
346   *
347   * Print statistics about the current table allocation to stdout.
348   */
349  void
350  hash_stats(hash_table *table, int verbose)
351  {
352      int i;
353      int total_elements = 0;
354      int non_empty_buckets = 0;
355      int max_count = 0;
356      int max_repeats = 0;
357      int *counts;
358      int size = table->size;
359  
360      if (!(counts = (int *)malloc(size * sizeof(int)))){
361  	fprintf(stderr, "malloc returns 0\n");
362  	exit(1);
363      }
364  
365      for (i = 0; i < size; i++){
366  	int x = 0;
367  	hash_node *n = table->buckets[i];
368  	counts[i] = 0;
369  	while (n){
370  	    if (!x){
371  		x = 1;
372  		non_empty_buckets++;
373  		if (verbose){
374  		    printf("bucket %2d: ", i);
375  		}
376  	    }
377  	    if (verbose){
378  		printf(" %s", n->key);
379  	    }
380  	    counts[i]++;
381  	    n = n->next;
382  	}
383  
384  	total_elements += counts[i];
385  	if (counts[i] > max_count){
386  	    max_count = counts[i];
387  	    max_repeats = 1;
388  	}
389  	else if (counts[i] == max_count){
390  	    max_repeats++;
391  	}
392  
393  	if (counts[i] && verbose){
394  	    printf(" (%d)\n", counts[i]);
395  	}
396      }
397  
398      printf("\n");
399      printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
400  
401      if (total_elements){
402  	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
403  	       (double)100 * (double)non_empty_buckets / (double)(size));
404  	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
405  	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
406  	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
407      }
408      return;
409  }
410  #pragma clang diagnostic pop