Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

Hash.cc

Go to the documentation of this file.
00001 // $Id: Hash.cc,v 1.5 2005/09/07 17:04:18 vern Exp $
00002 //
00003 // Copyright (c) 1995, 1996, 1997, 1998, 1999, 2002
00004 //      The Regents of the University of California.  All rights reserved.
00005 //
00006 // Redistribution and use in source and binary forms, with or without
00007 // modification, are permitted provided that: (1) source code distributions
00008 // retain the above copyright notice and this paragraph in its entirety, (2)
00009 // distributions including binary code include the above copyright notice and
00010 // this paragraph in its entirety in the documentation or other materials
00011 // provided with the distribution, and (3) all advertising materials mentioning
00012 // features or use of this software display the following acknowledgement:
00013 // ``This product includes software developed by the University of California,
00014 // Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
00015 // the University nor the names of its contributors may be used to endorse
00016 // or promote products derived from this software without specific prior
00017 // written permission.
00018 // THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
00019 // WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
00020 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
00021 
00022 // The hash function works as follows:
00023 //
00024 // 1) For short data we have a number of universal hash functions:
00025 // UHASH (ax + b (mod p)), H3, Dietzfelbinger and UMAC_NH (UMAC_NH is
00026 // not as strongly universal as the others, but probably enough). All
00027 // these functions require number of random bits linear to the data
00028 // length. And we use them for data no longer than UHASH_KEY_SIZE.
00029 // They are faster than HMAC/MD5 used for longer data, and most hash
00030 // operations are on short data.
00031 //
00032 // 2) As a fall-back, we use HMAC/MD5 (keyed MD5) for data of arbitrary
00033 // length. MD5 is used as a scrambling scheme so that it is difficult
00034 // for the adversary to construct conflicts, though I do not know if
00035 // HMAC/MD5 is provably universal.
00036 
00037 #include "config.h"
00038 
00039 #include "Hash.h"
00040 
00041 // #define USE_UHASH
00042 // #define USE_UMAC_NH
00043 // #define USE_H3
00044 // #define USE_DIETZFELBINGER
00045 
00046 int hash_cnt_all = 0, hash_cnt_uhash = 0;
00047 
00048 #if defined(USE_UHASH) || defined(USE_UMAC_NH)
00049 uint8 uhash_key[UHASH_KEY_SIZE];
00050 #endif
00051 
00052 #ifdef USE_DIETZFELBINGER
00053 #include "TwoWise.h"
00054 const TwoWise* two_wise;
00055 #endif
00056 
00057 #ifdef USE_H3
00058 #include "h3.h"
00059 const H3<hash_t, UHASH_KEY_SIZE>* h3;
00060 #endif
00061 
00062 void init_hash_function()
00063         {
00064         if ( UHASH_KEY_SIZE % 16 != 0 )
00065                 internal_error("uhash key size must be multiply of 16");
00066 
00067         // Both Dietzfelbinger and H3 use random() to generate keys
00068         // -- is it strong enough?
00069 #ifdef USE_DIETZFELBINGER
00070         two_wise = new TwoWise((UHASH_KEY_SIZE + 3) >> 2);
00071 #endif
00072 
00073 #ifdef USE_H3
00074         h3 = new H3<hash_t, UHASH_KEY_SIZE>();
00075 #endif
00076 
00077 #if defined(USE_UHASH) || defined(USE_UMAC_NH)
00078         uint32 buf2[5];
00079         memcpy(buf2, shared_hmac_md5_key, sizeof(shared_hmac_md5_key));
00080 
00081         for ( int i = 0; i < UHASH_KEY_SIZE; i += 16 )
00082                 {
00083                 buf2[4] = i;
00084                 hmac_md5(sizeof(buf2), (const unsigned char*) buf2, uhash_key + i);
00085                 }
00086 #endif
00087         }
00088 
00089 HashKey::HashKey(bro_int_t i)
00090         {
00091         key_u.i = i;
00092         key = (void*) &key_u;
00093         size = sizeof(i);
00094         hash = HashBytes(key, size);
00095         is_our_dynamic = 0;
00096         }
00097 
00098 HashKey::HashKey(bro_uint_t u)
00099         {
00100         key_u.i = bro_int_t(u);
00101         key = (void*) &key_u;
00102         size = sizeof(u);
00103         hash = HashBytes(key, size);
00104         is_our_dynamic = 0;
00105         }
00106 
00107 #ifdef USE_INT64
00108 
00109 HashKey::HashKey(uint32 u)
00110         {
00111         key_u.u32 = u;
00112         key = (void*) &key_u;
00113         size = sizeof(u);
00114         hash = HashBytes(key, size);
00115         is_our_dynamic = 0;
00116         }
00117 
00118 #endif // USE_INT64
00119 
00120 HashKey::HashKey(const uint32 u[], int n)
00121         {
00122         size = n * sizeof(u[0]);
00123         key = (void*) u;
00124         hash = HashBytes(key, size);
00125         is_our_dynamic = 0;
00126         }
00127 
00128 HashKey::HashKey(double d)
00129         {
00130         union {
00131                 double d;
00132                 int i[2];
00133         } u;
00134 
00135         key_u.d = u.d = d;
00136         key = (void*) &key_u;
00137         size = sizeof(d);
00138         hash = HashBytes(key, size);
00139         is_our_dynamic = 0;
00140         }
00141 
00142 HashKey::HashKey(const void* p)
00143         {
00144         key_u.p = p;
00145         key = (void*) &key_u;
00146         size = sizeof(p);
00147         hash = HashBytes(key, size);
00148         is_our_dynamic = 0;
00149         }
00150 
00151 HashKey::HashKey(const char* s)
00152         {
00153         size = strlen(s);       // note - skip final \0
00154         key = (void*) s;
00155         hash = HashBytes(key, size);
00156         is_our_dynamic = 0;
00157         }
00158 
00159 HashKey::HashKey(const BroString* s)
00160         {
00161         size = s->Len();
00162         key = (void*) s->Bytes();
00163         hash = HashBytes(key, size);
00164         is_our_dynamic = 0;
00165         }
00166 
00167 HashKey::HashKey(int copy_key, void* arg_key, int arg_size)
00168         {
00169         size = arg_size;
00170         is_our_dynamic = 1;
00171 
00172         if ( copy_key )
00173                 {
00174                 key = (void*) new char[size];
00175                 memcpy(key, arg_key, size);
00176                 }
00177         else
00178                 key = arg_key;
00179 
00180         hash = HashBytes(key, size);
00181         }
00182 
00183 HashKey::HashKey(const void* arg_key, int arg_size, hash_t arg_hash)
00184         {
00185         size = arg_size;
00186         hash = arg_hash;
00187         key = CopyKey(arg_key, size);
00188         is_our_dynamic = 1;
00189         }
00190 
00191 HashKey::HashKey(const void* arg_key, int arg_size, hash_t arg_hash,
00192                 bool /* dont_copy */)
00193         {
00194         size = arg_size;
00195         hash = arg_hash;
00196         key = const_cast<void*>(arg_key);
00197         is_our_dynamic = 0;
00198         }
00199 
00200 HashKey::HashKey(const void* bytes, int arg_size)
00201         {
00202         size = arg_size;
00203         key = CopyKey(bytes, size);
00204         hash = HashBytes(key, size);
00205         is_our_dynamic = 1;
00206         }
00207 
00208 void* HashKey::TakeKey()
00209         {
00210         if ( is_our_dynamic )
00211                 {
00212                 is_our_dynamic = 0;
00213                 return key;
00214                 }
00215         else
00216                 return CopyKey(key, size);
00217         }
00218 
00219 void* HashKey::CopyKey(const void* k, int s) const
00220         {
00221         void* k_copy = (void*) new char[s];
00222         memcpy(k_copy, k, s);
00223         return k_copy;
00224         }
00225 
00226 hash_t HashKey::HashBytes(const void* bytes, int size)
00227         {
00228 #ifdef USE_OLD_BRO_HASH
00229         // ### Shall we remove this?
00230         const unsigned char* cp = (const unsigned char*) bytes;
00231         hash_t h = 0;
00232 
00233         for ( int i = 0; i < size; ++i )
00234                 // Overflow is okay here.
00235                 h = (h >> 31) + (h << 1) + cp[i];
00236 
00237         return h;
00238 #endif /* USE_OLD_BRO_HASH */
00239 
00240         ++hash_cnt_all;
00241 
00242 #ifdef USE_H3
00243         if ( size == 0 ) // H3 doesn't check if size is zero
00244                 return 0;
00245 
00246         if ( size <= UHASH_KEY_SIZE )
00247                 {
00248                 ++hash_cnt_uhash;
00249                 return (*h3)(bytes, size);
00250                 }
00251 #endif /* USE_H3 */
00252 
00253 #ifdef USE_DIETZFELBINGER
00254         if ( size <= UHASH_KEY_SIZE )
00255                 {
00256                 ++hash_cnt_uhash;
00257                 return two_wise->Hash(size, bytes);
00258                 }
00259 #endif /* USE_DIETZFELBINGER */
00260 
00261 #ifdef USE_UMAC_NH
00262         // Use the NH hash function proposed in UMAC.
00263         // (See http://www.cs.ucdavis.edu/~rogaway/umac/)
00264         //
00265         // Essentially, it is computed as:
00266         // H = (x_0 +_16 k_0) * (x_1 +_16 k_1) +
00267         //      (x_2 +_16 k_2) * (x_3 +_16 k_3) + ...
00268         // where {k_i} are keys for universal hashing,
00269         // {x_i} are data words, and +_16 means plus mod 2^16.
00270         //
00271         // This is faster than UHASH because no modulo operation
00272         // is needed.
00273 
00274         typedef uint16 uhash_word_t;
00275         typedef uint32 uhash_dword_t;
00276 
00277         if ( (unsigned int) size <= UHASH_KEY_SIZE - sizeof(uhash_word_t) )
00278                 {
00279                 ++hash_cnt_uhash;
00280 
00281                 const uhash_word_t* uhash = (const uhash_word_t *) uhash_key;
00282                 const uint8* data = (const uint8 *) bytes;
00283                 uhash_dword_t sum = 0;
00284 
00285                 int i, j, k;
00286                 uhash_word_t x, y;
00287                 k = 0;
00288 
00289                 for ( i = 0; i + sizeof(uhash_word_t) < size; )
00290                         {
00291                         x = 0;
00292 
00293                         for ( j = 0; j < sizeof(uhash_word_t) && i + j < size; ++j )
00294                                 x = (x << 8) | data[i+j];
00295 
00296                         i += j;
00297                         x += uhash[k++];
00298                         y = 0;
00299 
00300                         for ( j = 0; j < sizeof(uhash_word_t) && i + j < size; ++j )
00301                                 y = (y << 8) | data[i+j];
00302 
00303                         i += j;
00304                         y += uhash[k++];
00305 
00306                         sum += ((uhash_dword_t) x) * ((uhash_dword_t) y);
00307                         }
00308 
00309                 x = 0;
00310                 for ( j = 0; j < sizeof(uhash_word_t) && i + j < size; ++j )
00311                         x = (x << 8) | data[i+j];
00312                 x += uhash[k++];
00313 
00314                 y = size + uhash[k++];
00315                 sum += ((uhash_dword_t) x) * ((uhash_dword_t) y);
00316 
00317                 return hash_t(sum);
00318                 }
00319 #endif /* USE_UMAC_NH */
00320 
00321 #ifdef USE_UHASH
00322         // Universal hashing by division (ax mod p).
00323         typedef uint32 uhash_word_t;
00324 
00325         // We do this typedef here rather than in util.h because
00326         // "long long" is not in fact ANSI C++ (though it *is* ANSI C),
00327         // so if it turns out to be problematic we can turn it off
00328         // by not using UHASH.
00329         typedef unsigned long long uint64;
00330 
00331         typedef uint64 uhash_dword_t;
00332 
00333         if ( (unsigned int) size <= UHASH_KEY_SIZE - sizeof(uhash_word_t) )
00334                 {
00335                 ++hash_cnt_uhash;
00336 
00337                 const uhash_word_t* uhash = (const uhash_word_t *) uhash_key;
00338                 static const uhash_dword_t uhash_prime = (uint64) 1 << 32 + 15;
00339 
00340                 uhash_dword_t sum =
00341                         (uhash_dword_t) (size + 1) *
00342                         ((uhash_dword_t) uhash[0] + 1);
00343 
00344                 int i, j, k;
00345                 const uint8* data = (const uint8*) bytes;
00346                 for ( i = 0, k = 1; i < size; i += sizeof(uhash_word_t), ++k )
00347                         {
00348                         uhash_dword_t x = 0;
00349                         for ( j = 0; unsigned(j) < sizeof(uhash_word_t) && i + j < size; ++j )
00350                                 x = (x << 8) | (uhash_dword_t) data[i+j];
00351 
00352                         sum += ((x + 1) * ((uhash_dword_t) uhash[k] + 1)) %
00353                                 uhash_prime;
00354                         }
00355 
00356                 return hash_t((sum % uhash_prime) & 0xffffffff);
00357                 }
00358 
00359 #endif /* USE_UHASH */
00360 
00361         // Fall back to HMAC/MD5 for longer data (which is usually rare).
00362         hash_t digest[16];
00363         hmac_md5(size, (unsigned char*) bytes, (unsigned char*) digest);
00364         return digest[0];
00365         }

Generated on Wed Sep 14 02:56:11 2005 for bro_docs by doxygen 1.3.5