hash.c

Go to the documentation of this file.
00001 /*                          H A S H . C
00002  * BRL-CAD
00003  *
00004  * Copyright (c) 2004-2006 United States Government as represented by
00005  * the U.S. Army Research Laboratory.
00006  *
00007  * This library is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License
00009  * as published by the Free Software Foundation; either version 2 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * This library is distributed in the hope that it will be useful, but
00013  * WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Library General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with this file; see the file named COPYING for more
00019  * information.
00020  */
00021 
00022 /** \addtogroup bu_hash */
00023 /*@{*/
00024 /** @file hash.c
00025  *
00026  * @brief
00027  *      An implimentation of hash tables.
00028  */
00029 
00030 #ifndef lint
00031 static const char libbu_hash_RCSid[] = "@(#) $Header: /cvsroot/brlcad/brlcad/src/libbu/hash.c,v 14.11 2006/08/31 05:50:24 lbutler Exp $";
00032 #endif
00033 
00034 #include "common.h"
00035 
00036 #include <stdlib.h>
00037 #include <stdio.h>
00038 #include <math.h>
00039 #ifdef HAVE_STRING_H
00040 #  include <string.h>
00041 #else
00042 #  include <strings.h>
00043 #endif
00044 #include "machine.h"
00045 #include "bu.h"
00046 
00047 
00048 /**             B U _ H A S H
00049  * the hashing function
00050  */
00051 unsigned long
00052 bu_hash(unsigned char *str, int len)
00053 {
00054         unsigned long hash = 5381;
00055         int i, c;
00056 
00057         c = *str;
00058         for( i=0 ; i<len ; i++ ) {
00059                 c = *str;
00060                 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
00061                 str++;
00062         }
00063 
00064         return hash;
00065 }
00066 
00067 /**                     B U _ C R E A T E _ H A S H _ T B L
00068  *@brief
00069  *      Create an empty hash table
00070  *      The input is the number of desired hash bins.
00071  *      This number will be rounded up to the nearest power of two.
00072  */
00073 struct bu_hash_tbl *
00074 bu_create_hash_tbl( unsigned long tbl_size )
00075 {
00076         struct bu_hash_tbl *hsh_tbl;
00077         unsigned long power_of_two=64;
00078         int power=6;
00079         int max_power=(sizeof( unsigned long ) * 8) - 1;
00080 
00081         /* allocate the table structure (do not use bu_malloc() as this may be used for MEM_DEBUG) */
00082         hsh_tbl = (struct bu_hash_tbl *)malloc( sizeof( struct bu_hash_tbl ) );
00083         if( !hsh_tbl ) {
00084                 fprintf( stderr, "Failed to allocate hash table\n" );
00085                 return( (struct bu_hash_tbl *)NULL );
00086         }
00087 
00088         /* the number of bins in the hash table will be a power of two */
00089         while( power_of_two < tbl_size && power < max_power ) {
00090                 power_of_two = power_of_two << 1;
00091                 power++;
00092         }
00093 
00094         if( power == max_power ) {
00095                 int i;
00096 
00097                 hsh_tbl->mask = 1;
00098                 for( i=1 ; i<max_power ; i++ ) {
00099                         hsh_tbl->mask = (hsh_tbl->mask << 1) + 1;
00100                 }
00101                 hsh_tbl->num_lists = hsh_tbl->mask + 1;
00102         } else {
00103                 hsh_tbl->num_lists = power_of_two;
00104                 hsh_tbl->mask = power_of_two - 1;
00105         }
00106 
00107         /* allocate the bins (do not use bu_malloc() as this may be used for MEM_DEBUG) */
00108         hsh_tbl->lists = (struct bu_hash_entry **)calloc( hsh_tbl->num_lists, sizeof( struct bu_hash_entry *) );
00109 
00110         hsh_tbl->num_entries = 0;
00111         hsh_tbl->magic = BU_HASH_TBL_MAGIC;
00112 
00113         return( hsh_tbl );
00114 }
00115 
00116 /**                     B U _ F I N D _ H A S H _ E N T R Y
00117  *
00118  * @brief
00119  *      Find the hash table entry corresponding to the provided key
00120  *
00121  * 
00122  * @param[in] hsh_tbl - The hash table to look in
00123  * @param[in] key - the key to look for
00124  * @param[in] key_len - the length of the key in bytes
00125  *
00126  * Output:
00127  * @param[out] prev - the previous hash table entry (non-null for entries that not the first in hash bin)
00128  * @param[out] index - the index of the hash bin for this key
00129  *
00130  * @return
00131  *      the hash table entry corresponding to the provided key, or NULL if not found
00132  */
00133 struct bu_hash_entry *
00134 bu_find_hash_entry( struct bu_hash_tbl *hsh_tbl, unsigned char *key, int key_len, struct bu_hash_entry **prev, unsigned long *index )
00135 {
00136         struct bu_hash_entry *hsh_entry=NULL;
00137         int found=0;
00138 
00139         BU_CK_HASH_TBL( hsh_tbl );
00140 
00141         /* calculate the index into the bin array */
00142         *index = bu_hash( key, key_len ) & hsh_tbl->mask;
00143         if( *index >= hsh_tbl->num_lists ) {
00144                 fprintf( stderr, "hash function returned too large value (%ld), only have %ld lists\n",
00145                          *index, hsh_tbl->num_lists );
00146                 *prev = NULL;
00147                 return( (struct bu_hash_entry *)NULL );
00148         }
00149 
00150         /* look for the provided key in the list of entries in this bin */
00151         *prev = NULL;
00152         if( hsh_tbl->lists[*index] ) {
00153                 *prev = NULL;
00154                 hsh_entry = hsh_tbl->lists[*index];
00155                 while( hsh_entry ) {
00156                         unsigned char *c1, *c2;
00157                         int i;
00158 
00159                         /* compare key lengths first for performance */
00160                         if( hsh_entry->key_len != key_len ) {
00161                                 *prev = hsh_entry;
00162                                 hsh_entry = hsh_entry->next;
00163                                 continue;
00164                         }
00165 
00166                         /* key lengths are the same, now compare the actual keys */
00167                         found = 1;
00168                         c1 = key;
00169                         c2 = hsh_entry->key;
00170                         for( i=0 ; i<key_len ; i++ ) {
00171                                 if( *c1 != *c2 ) {
00172                                         found = 0;
00173                                         break;
00174                                 }
00175                                 c1++;
00176                                 c2++;
00177                         }
00178 
00179                         /* if we have a match, get out of this loop */
00180                         if( found ) break;
00181 
00182                         /* step to next entry in this bin */
00183                         *prev = hsh_entry;
00184                         hsh_entry = hsh_entry->next;
00185                 }
00186         }
00187 
00188         if( found ) {
00189                 /* return the found entry */
00190                 return( hsh_entry );
00191         } else {
00192                 /* did not find the entry, return NULL */
00193                 return( (struct bu_hash_entry *)NULL );
00194         }
00195 }
00196 
00197 
00198 /**                     B U _ S E T _ H A S H _ V A L U E
00199  *@brief
00200  *      Set the value for a hash table entry
00201  *
00202  *      Note that this is just a pointer copy, the hash table does not maintain its own copy
00203  *      of this value.
00204  */
00205 void
00206 bu_set_hash_value( struct bu_hash_entry *hsh_entry, unsigned char *value )
00207 {
00208         BU_CK_HASH_ENTRY( hsh_entry );
00209 
00210         /* just copy a pointer */
00211         hsh_entry->value = value;
00212 }
00213 
00214 /**                     B U _ G E T _ H A S H _ V A L U E
00215  *
00216  *      get the value pointer stored for the specified hash table entry
00217  */
00218 unsigned char *
00219 bu_get_hash_value( struct bu_hash_entry *hsh_entry )
00220 {
00221         BU_CK_HASH_ENTRY( hsh_entry );
00222 
00223         return( hsh_entry->value );
00224 }
00225 
00226 /**                     B U _ G E T _ H A S H _ K E Y
00227  *
00228  *      get the key pointer stored for the specified hash table entry
00229  */
00230 unsigned char *
00231 bu_get_hash_key( struct bu_hash_entry *hsh_entry )
00232 {
00233         BU_CK_HASH_ENTRY( hsh_entry );
00234 
00235         return( hsh_entry->key );
00236 }
00237 
00238 
00239 /**                     B U _ H A S H _ A D D _ E N T R Y
00240  *
00241  *      Add an new entry to a hash table
00242  *
00243  * 
00244  * @param[in] hsh_tbl - the hash table to accept thye new entry
00245  * @param[in] key - the key (any byte string)
00246  * @param[in] key_len - the number of bytes in the key
00247  *
00248  * @param[out] new - a flag, non-zero indicates that a new entry was created.
00249  *                    zero indicates that an entry already exists with the specified key and key length
00250  *
00251  * @return
00252  *      a hash table entry. If "new" is non-zero, a new, empty entry is returned.
00253  *      if "new" is zero, the returned entry is the one matching the specified key and key_len
00254  */
00255 struct bu_hash_entry *
00256 bu_hash_add_entry( struct bu_hash_tbl *hsh_tbl, unsigned char *key, int key_len, int *new )
00257 {
00258         struct bu_hash_entry *hsh_entry, *prev;
00259         unsigned long index;
00260 
00261         BU_CK_HASH_TBL( hsh_tbl );
00262 
00263         /* do a find for three reasons
00264          * does this key already exist in the table?
00265          * get the hash bin index for this key
00266          * find the previous entry to link the new one to
00267          */
00268         hsh_entry = bu_find_hash_entry( hsh_tbl, key, key_len, &prev, &index );
00269 
00270         if( hsh_entry ) {
00271                 /* this key is already in the table, return the entry, with flag set to 0 */
00272                 *new = 0;
00273                 return( hsh_entry );
00274         }
00275 
00276         if( prev ) {
00277                 /* already have an entry in this bin, just link to it */
00278                 prev->next = (struct bu_hash_entry *)calloc( 1, sizeof( struct bu_hash_entry ) );
00279                 hsh_entry = prev->next;
00280         } else {
00281                 /* first entry in this bin */
00282                 hsh_entry = (struct bu_hash_entry *)calloc( 1, sizeof( struct bu_hash_entry ) );
00283                 hsh_tbl->lists[index] = hsh_entry;
00284         }
00285 
00286         /* fill in the structure */
00287         hsh_entry->next = NULL;
00288         hsh_entry->value = NULL;
00289         hsh_entry->key_len = key_len;
00290         hsh_entry->magic = BU_HASH_ENTRY_MAGIC;
00291 
00292         /* make a copy of the key */
00293         hsh_entry->key = (unsigned char *)malloc( key_len );
00294         memcpy( hsh_entry->key, key, key_len );
00295 
00296         /* set "new" flag, increment count of entries, and return new entry */
00297         *new = 1;
00298         hsh_tbl->num_entries++;
00299         return( hsh_entry );
00300 }
00301 
00302 /**                     B U _ H A S H _ T B L _ P R
00303  *@brief
00304  *      Print the specified hash table to stderr.
00305  *      (Note that the keys and values are printed as pointers)
00306  */
00307 void
00308 bu_hash_tbl_pr( struct bu_hash_tbl *hsh_tbl, char *str )
00309 {
00310         unsigned long index;
00311         struct bu_hash_entry *hsh_entry;
00312 
00313         BU_CK_HASH_TBL( hsh_tbl );
00314 
00315         fprintf( stderr, "%s\n", str );
00316         fprintf( stderr, "bu_hash_table (%ld entries):\n", hsh_tbl->num_entries );
00317 
00318         /* visit all the entries in this table */
00319         for( index=0 ; index<hsh_tbl->num_lists ; index++ ) {
00320                 hsh_entry = hsh_tbl->lists[index];
00321                 while( hsh_entry ) {
00322                         BU_CK_HASH_ENTRY( hsh_entry );
00323                         fprintf( stderr, "\tindex=%ld, key=x%lx, value=x%lx\n", index, (unsigned long int)hsh_entry->key, (unsigned long int)hsh_entry->value );
00324                         hsh_entry = hsh_entry->next;
00325                 }
00326         }
00327 }
00328 
00329 /**                     B U _ H A S H _ T B L _ F R E E
00330  *@brief
00331  *      Free all the memory associated with the specified hash table.
00332  *      Note that the keys are freed (they are copies), but the "values" are not freed.
00333  *      (The values are merely pointers)
00334  */
00335 void
00336 bu_hash_tbl_free( struct bu_hash_tbl *hsh_tbl )
00337 {
00338         unsigned long index;
00339         struct bu_hash_entry *hsh_entry, *tmp;
00340 
00341         BU_CK_HASH_TBL( hsh_tbl );
00342 
00343         /* loop through all the bins in this hash table */
00344         for( index=0 ; index<hsh_tbl->num_lists ; index++ ) {
00345                 /* traverse all the entries in the list for this bin */
00346                 hsh_entry = hsh_tbl->lists[index];
00347                 while( hsh_entry ) {
00348                         BU_CK_HASH_ENTRY( hsh_entry );
00349                         tmp = hsh_entry->next;
00350 
00351                         /* free the copy of the key, and this entry */
00352                         free( hsh_entry->key );
00353                         free( hsh_entry );
00354 
00355                         /* step to next entry in libnked list */
00356                         hsh_entry = tmp;
00357                 }
00358         }
00359 
00360         /* free the array of bins */
00361         free( hsh_tbl->lists );
00362 
00363         /* free the actual hash table structure */
00364         free( hsh_tbl );
00365 }
00366 
00367 
00368 /**                     B U _ H A S H _ T B L _ F I R S T
00369  *@brief
00370  *      get the "first" entry in a hash table
00371  *
00372  *
00373  * @param[in] hsh_tbl - the hash table of interest
00374  * @param[in] rec - an empty "bu_hash_record" structure for use by this routine and "bu_hash_tbl_next"
00375  *
00376  * @return
00377  *      the first non-null entry in the hash table, or NULL if there are no entries
00378  *      (Note that the order of enties is not likely to have any significance)
00379  */
00380 struct bu_hash_entry *
00381 bu_hash_tbl_first( struct bu_hash_tbl *hsh_tbl, struct bu_hash_record *rec )
00382 {
00383         BU_CK_HASH_TBL( hsh_tbl );
00384 
00385         /* initialize the record structure */
00386         rec->magic = BU_HASH_RECORD_MAGIC;
00387         rec->tbl = hsh_tbl;
00388         rec->index = -1;
00389         rec->hsh_entry = (struct bu_hash_entry *)NULL;
00390 
00391         if( hsh_tbl->num_entries == 0 ) {
00392                 /* this table is empty */
00393                 return( (struct bu_hash_entry *)NULL );
00394         }
00395 
00396         /* loop through all the bins in this hash table, looking for a non-null entry */
00397         for( rec->index=0 ; rec->index < hsh_tbl->num_lists ; rec->index++ ) {
00398                 rec->hsh_entry = hsh_tbl->lists[rec->index];
00399                 if( rec->hsh_entry ) {
00400                         return( rec->hsh_entry );
00401                 }
00402         }
00403 
00404         /* no entry found, return NULL */
00405         return( (struct bu_hash_entry *)NULL );
00406 }
00407 
00408 /**                     B U _ H A S H _ T B L _ N E X T
00409  *
00410  *      get the "next" entry in a hash table
00411  *
00412  * input:
00413  *      rec - the "bu_hash_record" structure that was passed to "bu_hash_tbl_first"
00414  *
00415  * return:
00416  *      the "next" non-null hash entry in this hash table
00417  */
00418 struct bu_hash_entry *
00419 bu_hash_tbl_next( struct bu_hash_record *rec )
00420 {
00421         struct bu_hash_tbl *hsh_tbl;
00422 
00423         BU_CK_HASH_RECORD( rec );
00424         hsh_tbl = rec->tbl;
00425         BU_CK_HASH_TBL( hsh_tbl );
00426 
00427         /* if the entry in the record structure has a non-null "next",
00428          * return it, and update the record structure
00429          */
00430         if( rec->hsh_entry ) {
00431                 rec->hsh_entry = rec->hsh_entry->next;
00432                 if( rec->hsh_entry ) {
00433                         return( rec->hsh_entry );
00434                 }
00435         }
00436 
00437         /* must move to a new bin to find another entry */
00438         for( rec->index++ ; rec->index < hsh_tbl->num_lists ; rec->index++ ) {
00439                 rec->hsh_entry = hsh_tbl->lists[rec->index];
00440                 if( rec->hsh_entry ) {
00441                         return( rec->hsh_entry );
00442                 }
00443         }
00444 
00445         /* no more entries, return NULL */
00446         return( (struct bu_hash_entry *)NULL );
00447 }
00448 /*@}*/
00449 /*
00450  * Local Variables:
00451  * mode: C
00452  * tab-width: 8
00453  * c-basic-offset: 4
00454  * indent-tabs-mode: t
00455  * End:
00456  * ex: shiftwidth=4 tabstop=8
00457  */

Generated on Mon Sep 18 01:24:47 2006 for BRL-CAD by  doxygen 1.4.6