Hash.h

Go to the documentation of this file.
00001 #ifndef __LDK_HASH_HH__
00002 #define __LDK_HASH_HH__
00003 
00005 //   Copyright (C) 2003-2007 Lorien Dunn.
00006 //
00007 //   Contact: loriendunn AT users DOT sourceforge DOT net
00008 //
00009 //   This software is provided 'as-is', without any express or implied warranty.
00010 //   In no event will the authors be held liable for any damages arising from
00011 //   the use of this software.
00012 //
00013 //   Permission is granted to anyone to use this software for any purpose,
00014 //   including commercial applications, and to alter it and redistribute it
00015 //   freely, subject to the following restrictions:
00016 //
00017 //   1. The origin of this software must not be misrepresented; you must not
00018 //   claim that you wrote the original software. If you use this software in a
00019 //   product, an acknowledgment in the product documentation would be
00020 //   appreciated but is not required.
00021 //
00022 //   2. Altered source versions must be plainly marked as such, and must not be
00023 //   misrepresented as being the original software.
00024 //
00025 //   3. This notice may not be removed or altered from any source distribution.
00026 //
00028 
00034 
00035 #include "LDK/Memory.h"
00036 #include "LDK/String.h"
00037 #include <string.h>
00038 
00039 
00040 namespace LDK
00041 {
00042 
00043 //Table of prime numbers 2^n+a, 2<=n<=30.
00044 //from st.c in the ruby source
00045 extern const LDK_API size_t PrimeConsts[];
00046 
00049 
00062 template <class Key> class Hasher;
00063 
00064 
00065 template <>
00066 class LDK_API Hasher<int>
00067 {
00068 public:
00069     size_t operator()(int key, size_t size)
00070     {
00071         return (static_cast<size_t>(key)) % size;
00072     }
00073 };
00074 
00075 template <>
00076 class LDK_API Hasher<int64>
00077 {
00078 public:
00079     size_t operator()(int64 key, size_t size)
00080     {
00081         return (static_cast<size_t>(key % size));
00082     }
00083 };
00084 
00085 template <>
00086 class LDK_API Hasher<const char*>
00087 {
00088 public:
00089     size_t operator()(const char* key, size_t size)
00090     {
00091         size_t h = 0;
00092         size_t g;
00093         while(*key)
00094         {
00095             h = (h << 4) + *key++;
00096 
00097             if((g = h & 0xF0000000)!=0)
00098                 h ^= g >> 24;
00099             h &= ~g;
00100         }
00101         return (h % size);
00102     }
00103 };
00104 
00105 template<>
00106 class LDK_API Hasher<std::string>
00107 {
00108 public:
00109     size_t operator()(const std::string& key, size_t size)
00110     {
00111         size_t h = 0;
00112         size_t g;
00113         const char* k = key.c_str();
00114         while(*k)
00115         {
00116             h = (h << 4) + *k++;
00117 
00118             if((g = h & 0xF0000000)!=0)
00119                 h ^= g >> 24;
00120             h &= ~g;
00121         }
00122         return (h % size);
00123     }
00124 };
00125 
00126 template<>
00127 class LDK_API Hasher<String>
00128 {
00129 public:
00130     size_t operator()(const String& key, size_t size)
00131     {
00132         size_t h = 0;
00133         size_t g;
00134         const char* k = key.c_str();
00135         while(*k)
00136         {
00137             h = (h << 4) + *k++;
00138 
00139             if((g = h & 0xF0000000)!=0)
00140                 h ^= g >> 24;
00141             h &= ~g;
00142         }
00143         return (h % size);
00144     }
00145 };
00146 
00167 template
00168 <
00169     class Key,
00170     class Data,
00171     template <class> class Alloc=Allocator
00172 >
00173 class Hash : public Object
00174 {
00175 public:
00180     class Pair
00181     {
00182     private:
00183         void operator=(const Pair& rhs) {}
00184     public:
00186         Key mKey;
00188         Data mData;
00190         inline Pair(const Key& k, const Data& d) : mKey(k), mData(d) {}
00192         inline Pair(const Pair& rhs) : mKey(rhs.mKey), mData(rhs.mData) {}
00193     };
00194 protected:
00195     typedef Alloc<Pair> PairAlloc;
00196     PairAlloc mPairAlloc;
00197     typedef Alloc<Pair*> PairPtrAlloc;
00198     PairPtrAlloc mPairPtrAlloc;
00199     Pair**  mArray;
00200     size_t  mAllocated;
00201     size_t  mFreeSlots;
00202     size_t  mSize;
00203     static const int cIncBy = 7;
00204 
00205     static size_t primeGreaterThan(size_t size)
00206     {
00207         const size_t* prime = PrimeConsts;
00208 
00209         while(prime)
00210         {
00211             if((*prime) == size)
00212             {
00213                 prime++;
00214                 break;
00215             }
00216             if((*prime) > size)
00217                 break;
00218             prime++;
00219         }
00220 
00221         assert((*prime) != 0);
00222 
00223         return (*prime);
00224     }
00225 
00226     static size_t newSize(size_t currentSize, size_t freeSlots)
00227     {
00228     size_t newSize = currentSize;
00229 
00230         //Make sure at least 10% slots are free.
00231         while (freeSlots <= (newSize/10))
00232         {
00233             newSize =  primeGreaterThan(newSize);
00234             // As the array grows more slots will be available
00235             freeSlots = freeSlots + (newSize-currentSize);
00236         }
00237         return newSize;
00238     }
00239 
00240     size_t hash(const Key& k, size_t allocated) const
00241     {
00242         Hasher<Key> hasher;
00243         return hasher(k, allocated);
00244     }
00245 
00246     void rehash(size_t newAlloc)
00247     {
00248         size_t oldAllocated = mAllocated;
00249         Pair** newArray = (Pair**) mPairPtrAlloc.allocate(newAlloc+1);
00250         memset(newArray, 0, sizeof(newArray[0])*(newAlloc+1));
00251 
00252         size_t newFreeSlots = newAlloc;
00253 
00254         for(size_t i=0; i<oldAllocated; ++i)
00255         {
00256             Pair* element = mArray[i];
00257             if(element)
00258             {
00259                 size_t newHashValue = hash(element->mKey, newAlloc);
00260                 size_t index = newHashValue;
00261                 Pair* newElement = newArray[newHashValue];
00262 
00263                 while(newElement!=0)
00264                 {
00265                     index = (index + cIncBy) % newAlloc;
00266                     newElement = newArray[index];
00267                 }
00268 
00269                 // Move the element to the new array
00270                 newArray[index] = element;
00271                 newFreeSlots--;
00272             }
00273         }
00274 
00275         // Elements from the old array have been moved to the new array. Hence no realloc.
00276         if(mArray!=0)
00277             mPairPtrAlloc.deallocate(mArray, mAllocated);
00278         mArray = newArray;
00279         mAllocated = newAlloc;
00280         mFreeSlots = newFreeSlots;
00281     }
00282 
00283     void assign(const Hash& rhs)
00284     {
00285         mArray = (Pair**) mPairPtrAlloc.allocate(rhs.mAllocated+1);
00286         mAllocated = rhs.mAllocated;
00287         mFreeSlots = rhs.mFreeSlots;
00288         mSize = rhs.mSize;
00289         for(size_t i=0;i<=mAllocated;i++)
00290         {
00291             if(rhs.mArray[i])
00292             {
00293                 mArray[i] = mPairAlloc.allocate(1);
00294                 mPairAlloc.construct(mArray[i],*(rhs.mArray[i]));
00295             }
00296             else
00297                 mArray[i] = NULL;
00298         }
00299     }
00300 
00301 public:
00302 
00310     class Iterator
00311     {
00312     private:
00313         friend class Hash;
00314         Hash* mHash;
00315         size_t mIndex;
00316     public:
00317 
00321         inline Iterator(Hash* ht=NULL, size_t index=0) : mHash(ht),
00322          mIndex(index)
00323         {
00324             if(mHash)
00325                 if(index<mHash->allocated() && mHash->element(index)==0)
00326                     ++(*this);
00327         }
00328 
00330         inline Iterator(const Iterator& rhs) : mHash(rhs.mHash),
00331          mIndex(rhs.mIndex)
00332         {
00333         }
00334 
00336         inline Iterator& operator=(const Iterator& rhs)
00337         {
00338             mHash = rhs.mHash;
00339             mIndex = rhs.mIndex;
00340             return *this;
00341         }
00342 
00344         inline bool operator==(const Iterator& rhs) const
00345         {
00346             return mHash == rhs.mHash && mIndex == rhs.mIndex;
00347         }
00348 
00350         inline bool operator!=(const Iterator& rhs) const { return !(*this == rhs); }
00351 
00353         inline Pair& operator*() { return *(mHash->element(mIndex)); }
00354 
00356         inline size_t index() const { return mIndex; }
00357 
00359         inline Iterator& operator++()
00360         {
00361             mIndex++;
00362             while (mIndex<mHash->allocated() && mHash->element(mIndex)==NULL)
00363             {
00364                 mIndex++;
00365             }
00366             return (*this);
00367         }
00368 
00370         inline Iterator& operator++(int)
00371         {
00372             mIndex++;
00373             while (mIndex<mHash->allocated() && mHash->element(mIndex)==NULL)
00374             {
00375                 mIndex++;
00376             }
00377             return (*this);
00378         }
00379 
00381         inline Iterator& operator--()
00382         {
00383             if(mIndex > 0)
00384             {
00385                 mIndex--;
00386                 while (mIndex>0 && mHash->element(mIndex)==NULL)
00387                 {
00388                     mIndex--;
00389                 }
00390             }
00391             return (*this);
00392         }
00393 
00395         inline Iterator& operator--(int)
00396         {
00397             if(mIndex > 0)
00398             {
00399                 mIndex--;
00400                 while (mIndex>0 && mHash->element(mIndex)==NULL)
00401                 {
00402                     mIndex--;
00403                 }
00404             }
00405             return (*this);
00406         }
00407     };
00408 
00409     typedef Iterator iterator;
00410 
00418     class ConstIterator
00419     {
00420     private:
00421         friend class Hash;
00422         Hash* mHash;
00423         size_t mIndex;
00424     public:
00425 
00429         inline ConstIterator(const Hash* ht=NULL, size_t index=0) : mHash(const_cast<Hash*>(ht)),
00430          mIndex(index)
00431         {
00432             if(mHash)
00433                 if(index<mHash->allocated() && mHash->element(index)==0)
00434                     ++(*this);
00435         }
00436 
00438         inline ConstIterator(const ConstIterator& rhs) : mHash(rhs.mHash),
00439          mIndex(rhs.mIndex)
00440         {
00441         }
00442 
00444         inline ConstIterator& operator=(const ConstIterator& rhs)
00445         {
00446             mHash = rhs.mHash;
00447             mIndex = rhs.mIndex;
00448             return *this;
00449         }
00450 
00452         inline bool operator==(const ConstIterator& rhs) const
00453         {
00454             return mHash == rhs.mHash && mIndex == rhs.mIndex;
00455         }
00456 
00458         inline bool operator!=(const ConstIterator& rhs) const { return !(*this == rhs); }
00459 
00461         inline const Pair& operator*() { return *(mHash->element(mIndex)); }
00462 
00464         inline size_t index() const { return mIndex; }
00465 
00467         inline ConstIterator& operator++()
00468         {
00469             mIndex++;
00470             while (mIndex<mHash->allocated() && mHash->element(mIndex)==NULL)
00471             {
00472                 mIndex++;
00473             }
00474             return (*this);
00475         }
00476 
00478         inline ConstIterator& operator++(int junk)
00479         {
00480             mIndex++;
00481             while (mIndex<mHash->allocated() && mHash->element(mIndex)==NULL)
00482             {
00483                 mIndex++;
00484             }
00485             return (*this);
00486         }
00487 
00489         inline ConstIterator& operator--()
00490         {
00491             if(mIndex > 0)
00492             {
00493                 mIndex--;
00494                 while (mIndex>0 && mHash->element(mIndex)==NULL)
00495                 {
00496                     mIndex--;
00497                 }
00498             }
00499             return (*this);
00500         }
00501 
00503         inline ConstIterator& operator--(int)
00504         {
00505             if(mIndex > 0)
00506             {
00507                 mIndex--;
00508                 while (mIndex>0 && mHash->element(mIndex)==NULL)
00509                 {
00510                     mIndex--;
00511                 }
00512             }
00513             return (*this);
00514         }
00515     };
00516 
00517     typedef ConstIterator const_iterator;
00523     class DataProxy
00524     {
00525     private:
00526         Hash& mHash;
00527         const Key& mKey;
00528         DataProxy();
00529     public:
00530         inline DataProxy(Hash& h, const Key&k) : mHash(h), mKey(k) {} //does this need to be public???
00533         inline void operator=(const Data& value) { mHash.set(mKey,value); }
00534 
00538         inline operator Data()
00539         {
00540             Iterator i = mHash.find(mKey);
00541 
00542             if(i==mHash.end())
00543                 throw LDK_INDEX_ERROR("cannot find key");
00544             return (*i).mData;
00545         }
00546     };
00547 
00548 
00550 
00555     inline Hash(size_t initialSize=100) : mArray(NULL),
00556      mAllocated(0),
00557      mFreeSlots(0),
00558      mSize(0)
00559     {
00560         size_t newAlloc = primeGreaterThan(initialSize);
00561         mArray = (Pair**) mPairPtrAlloc.allocate(newAlloc+1);
00562         mAllocated = newAlloc;
00563         mFreeSlots = mAllocated;
00564         memset(mArray, 0, newAlloc);
00565         mSize = 0;
00566     }
00567 
00571     inline Hash(const Hash& rhs)
00572     {
00573         assign(rhs);
00574     }
00575 
00577     inline ~Hash() throw()
00578     {
00579         clear();
00580         mPairPtrAlloc.deallocate(((Pair*)mArray), mAllocated);
00581         mArray = NULL;
00582         mAllocated = 0;
00583         mFreeSlots = 0;
00584         mSize = 0;
00585     }
00586 
00590     inline Hash& operator=(const Hash& rhs)
00591     {
00592         size_t i;
00593         for(i=0;i<mAllocated;i++)
00594         {
00595             if(mArray[i])
00596             {
00597                 mPairAlloc.destroy(mArray[i]);
00598                 mPairAlloc.deallocate(mArray[i],1);
00599             }
00600         }
00601         assign(rhs);
00602         return *this;
00603     }
00604 
00610     ConstIterator find(const Key& key) const
00611     {
00612         size_t hashValue = hash(key, mAllocated);
00613         size_t index = hashValue;
00614 
00615         const Pair* element = mArray[index];
00616         const Data* v = NULL;
00617 
00618         bool searchedAll = false;
00619 
00620         while (!searchedAll)
00621         {
00622             if (element!=0 && element->mKey == key)
00623             {
00624                 v = &element->mData;
00625                 break;
00626             }
00627 
00628             index = (index + cIncBy) % mAllocated;
00629             searchedAll = index==hashValue;
00630             element = mArray[index];
00631         }
00632         if(v == 0)
00633             return end();
00634         else
00635             return ConstIterator(this,index);
00636     }
00637 
00643     inline Iterator find(const Key& key)
00644     {
00645         size_t hashValue = hash(key, mAllocated);
00646         size_t index = hashValue;
00647 
00648         Pair* element = mArray[index];
00649         Data* v=0;
00650 
00651         bool searchedAll = false;
00652 
00653         while (!searchedAll)
00654         {
00655             if (element!=0 && element->mKey == key)
00656             {
00657                 v = &element->mData;
00658                 break;
00659             }
00660 
00661             index = (index + cIncBy) % mAllocated;
00662             searchedAll = index==hashValue;
00663             element = mArray[index];
00664         }
00665         if(v == 0)
00666             return end();
00667         else
00668             return Iterator(this,index);
00669     }
00670 
00674     inline Pair* element(size_t idx)
00675     {
00676         if(idx >= mAllocated)
00677             throw LDK_INDEX_ERROR("Hash index out of range");
00678         return mArray[idx];
00679 
00680     }
00684     inline const Pair* element(size_t idx) const
00685     {
00686         if(idx >= mAllocated)
00687             throw LDK_INDEX_ERROR("Hash index out of range");
00688         return mArray[idx];
00689     }
00690 
00696     inline void set(const Key& k, const Data& d)
00697     {
00698         Iterator i = find(k);
00699         if(i == end())
00700         {
00701             if(!insert(k, d)) //slightly slow because it calls find again
00702                 throw LDK_INDEX_ERROR("cannot find key");
00703         }
00704         else
00705             (*i).mData = d;
00706     }
00707 
00712     bool insert(const Key& key, const Data& data)
00713     {
00714         if(find(key) != end())
00715             return false;
00716 
00717         size_t newAlloc = newSize(mAllocated, mFreeSlots);
00718         bool allSearched=false;
00719         if(newAlloc > mAllocated)
00720             rehash(newAlloc);
00721 
00722         size_t hashValue = hash(key, mAllocated);
00723         size_t index = hashValue;
00724         Pair*  element = mArray[hashValue];
00725 
00726         while(element && !allSearched)
00727         {
00728             index = (index + cIncBy) % mAllocated;
00729             element = mArray[index];
00730             allSearched = index == hashValue;
00731         }
00732 
00733         if(allSearched)
00734             return false;
00735         else if(!element)
00736         {
00737             Pair *mem = mPairAlloc.allocate(1);
00738             mArray[index] = new(mem) Pair(key, data);
00739             mFreeSlots--;
00740             mSize++;
00741             return true;
00742         }
00743         return false;
00744     }
00745 
00749     inline bool insert(const Pair& p) { return insert(p.mKey, p.mData); }
00750 
00755     inline Iterator erase(const Key& key)
00756     {
00757         Iterator it = find(key);
00758 
00759         if(it == end())
00760             return it;
00761         size_t index = it.index();
00762         it++;
00763         mPairAlloc.destroy(mArray[index]);
00764         mPairAlloc.deallocate(mArray[index]);
00765         mArray[index] = NULL;
00766         mFreeSlots++;
00767         mSize--;
00768         return it;
00769     }
00770 
00776     inline Iterator erase(Iterator it)
00777     {
00778         assert(it.mHash == this);
00779         if(it == end())
00780             return it;
00781         size_t index = it.index();
00782         it++;
00783         mPairAlloc.destroy(mArray[index]);
00784         mPairAlloc.deallocate(mArray[index], 1);
00785         mArray[index] = NULL;
00786         mFreeSlots++;
00787         mSize--;
00788         return it;
00789     }
00790 
00793     inline size_t size() const { return mSize; }
00794 
00797     inline size_t allocated() const { return mAllocated; }
00798 
00801     inline Iterator        begin() { return Iterator(this, 0); }
00802 
00805     inline ConstIterator   begin() const { return ConstIterator(this, 0); }
00806 
00809     inline Iterator        end() { return Iterator(this, mAllocated); }
00810 
00813     inline ConstIterator   end() const { return ConstIterator(this, mAllocated); }
00814 
00816     inline void clear()
00817     {
00818         Iterator i = begin();
00819         Iterator e = end();
00820         size_t idx;
00821         while(i != e)
00822         {
00823             idx = i.index();
00824             mPairAlloc.destroy(mArray[idx]);
00825             mPairAlloc.deallocate(mArray[idx], 1);
00826             i++;
00827         }
00828         mFreeSlots = mAllocated;
00829         memset(mArray, 0, mAllocated);
00830 
00831     }
00832 
00837     inline DataProxy operator[](const Key& k) { return DataProxy(*this, k); }
00838 
00843     inline const Data& operator[](const Key& k) const
00844     {
00845         ConstIterator i = find(k);
00846         if(i == end())
00847             throw LDK_INDEX_ERROR("cannot find key");
00848         return (*i).mData;
00849     }
00850 };
00851 
00853 }//namespace LDK
00854 
00855 #include "LDK/CStringHash.h"
00856 
00857 #endif //__LDK_HASH_HH__

Generated on Fri Aug 17 18:32:26 2007 for LDK by  doxygen 1.5.1