CStringHash.h

00001 #ifndef __LDK_CSTRINGHASH_H__
00002 #define __LDK_CSTRINGHASH_H__
00003 
00005 //   Copyright (C) 2003-2007 Lorien Dunn.
00006 //
00007 //   Contact: loriendunn AT users DOT sourceforge DOT net
00008 //
00009 //   This software is provided 'as-is', without any express or implied warranty.
00010 //   In no event will the authors be held liable for any damages arising from
00011 //   the use of this software.
00012 //
00013 //   Permission is granted to anyone to use this software for any purpose,
00014 //   including commercial applications, and to alter it and redistribute it
00015 //   freely, subject to the following restrictions:
00016 //
00017 //   1. The origin of this software must not be misrepresented; you must not
00018 //   claim that you wrote the original software. If you use this software in a
00019 //   product, an acknowledgment in the product documentation would be
00020 //   appreciated but is not required.
00021 //
00022 //   2. Altered source versions must be plainly marked as such, and must not be
00023 //   misrepresented as being the original software.
00024 //
00025 //   3. This notice may not be removed or altered from any source distribution.
00026 //
00028 
00029 #include "LDK/Hash.h"
00030 
00031 namespace LDK
00032 {
00033 
00034 //Nasty partial specialisation for Key==const char* .
00035 //Pair holds copies of the strings
00041 template
00042 <
00043     class Data,
00044     template <class> class Alloc
00045 >
00046 class Hash<const char*,Data,Alloc> : public Object
00047 {
00048 public:
00049     class Pair
00050     {
00051     private:
00052         void operator=(const Pair& rhs) {}
00053         typedef Alloc<char> CharAlloc;
00054         CharAlloc mCharAlloc;
00055     public:
00056     const char* mKey;
00057     Data mData;
00058         inline Pair(const char* k, const Data& d) : mKey(NULL), mData(d)
00059         {
00060             size_t len = strlen(k);
00061             if(len > 1024)
00062                 throw LDK_ARGUMENT_ERROR("CStringHash key too long");
00063             mKey = (const char*) mCharAlloc.allocate(len+1);
00064             strncpy(const_cast<char*>(mKey),k,len+1);
00065         }
00066         inline Pair(const Pair& rhs)
00067         {
00068             size_t len = strlen(rhs.mKey);
00069             mKey = (const char*) mCharAlloc.allocate(len+1);
00070             strcpy(const_cast<char*>(mKey),rhs.mKey);
00071         }
00072         inline ~Pair()
00073         {
00074             mCharAlloc.deallocate((void*)mKey,1);
00075             mKey = NULL;
00076         }
00077     };
00078 protected:
00079     typedef Alloc<Pair> PairAlloc;
00080     PairAlloc mPairAlloc;
00081     typedef Alloc<Pair*> PairPtrAlloc;
00082     PairPtrAlloc mPairPtrAlloc;
00083     Pair**  mArray;
00084     size_t  mAllocated;
00085     size_t  mFreeSlots;
00086     size_t  mSize;
00087     static const int cIncBy = 7;
00088 
00089     static size_t primeGreaterThan(size_t size)
00090     {
00091         const size_t* prime = PrimeConsts;
00092 
00093         while(prime)
00094         {
00095             if((*prime) == size)
00096             {
00097                 prime++;
00098                 break;
00099             }
00100             if((*prime) > size)
00101                 break;
00102             prime++;
00103         }
00104 
00105         assert((*prime) != 0);
00106 
00107         return (*prime);
00108     }
00109 
00110     static size_t newSize(size_t currentSize, size_t freeSlots)
00111     {
00112     size_t newSize = currentSize;
00113 
00114         //Make sure at least 10% slots are free.
00115         while (freeSlots <= (newSize/10))
00116         {
00117             newSize =  primeGreaterThan(newSize);
00118             // As the array grows more slots will be available
00119             freeSlots = freeSlots + (newSize-currentSize);
00120         }
00121         return newSize;
00122     }
00123 
00124     size_t hash(const char* k, size_t allocated) const
00125     {
00126         Hasher<const char*> hasher;
00127         return hasher(k, allocated);
00128     }
00129 
00130     void rehash(size_t newAlloc)
00131     {
00132         size_t oldAllocated = mAllocated;
00133         Pair** newArray = (Pair**) mPairPtrAlloc.allocate(newAlloc+1);
00134         memset(newArray, 0, sizeof(newArray[0])*(newAlloc+1));
00135 
00136         size_t newFreeSlots = newAlloc;
00137 
00138         for(size_t i=0; i<oldAllocated; ++i)
00139         {
00140             Pair* element = mArray[i];
00141             if(element)
00142             {
00143                 size_t newHashValue = hash(element->mKey, newAlloc);
00144                 size_t index = newHashValue;
00145                 Pair* newElement = newArray[newHashValue];
00146 
00147                 while(newElement!=0)
00148                 {
00149                     index = (index + cIncBy) % newAlloc;
00150                     newElement = newArray[index];
00151                 }
00152 
00153                 // Move the element to the new array
00154                 newArray[index] = element;
00155                 newFreeSlots--;
00156             }
00157         }
00158 
00159         // Elements from the old array have been moved to the new array. Hence no realloc.
00160         if(mArray!=0)
00161             mPairPtrAlloc.deallocate(mArray,1);
00162         mArray = newArray;
00163         mAllocated = newAlloc;
00164         mFreeSlots = newFreeSlots;
00165     }
00166 
00167     void assign(const Hash& rhs)
00168     {
00169         mArray = (Pair**) mPairPtrAlloc.allocate(rhs.mAllocated+1);
00170         mAllocated = rhs.mAllocated;
00171         mFreeSlots = rhs.mFreeSlots;
00172         mSize = rhs.mSize;
00173         for(size_t i=0;i<=mAllocated;i++)
00174         {
00175             if(rhs.mArray[i])
00176             {
00177                 mArray[i] = mPairAlloc.allocate(1);
00178                 mPairAlloc.construct(mArray[i],*(rhs.mArray[i]));
00179             }
00180             else
00181                 mArray[i] = NULL;
00182         }
00183     }
00184 
00185 public:
00186 
00187     class Iterator
00188     {
00189     private:
00190         friend class Hash;
00191         Hash* mHash;
00192         size_t mIndex;
00193     public:
00194 
00195         inline Iterator(Hash* ht=NULL, size_t index=0) : mHash(ht),
00196          mIndex(index)
00197         {
00198             if(mHash)
00199                 if(index<mHash->allocated() && mHash->element(index)==0)
00200                     ++(*this);
00201         }
00202 
00203         inline Iterator(const Iterator& rhs) : mHash(rhs.mHash),
00204          mIndex(rhs.mIndex)
00205         {
00206         }
00207 
00208         inline Iterator& operator=(const Iterator& rhs)
00209         {
00210             mHash = rhs.mHash;
00211             mIndex = rhs.mIndex;
00212             return *this;
00213         }
00214         inline bool operator==(const Iterator& rhs) const
00215         {
00216             return mHash == rhs.mHash && mIndex == rhs.mIndex;
00217         }
00218         inline bool operator!=(const Iterator& rhs) const { return !(*this == rhs); }
00219         inline Pair& operator*() { return *(mHash->element(mIndex)); }
00220         inline size_t index() const { return mIndex; }
00221 
00222         inline Iterator& operator++()
00223         {
00224             mIndex++;
00225             while (mIndex<mHash->allocated() && mHash->element(mIndex)==NULL)
00226             {
00227                 mIndex++;
00228             }
00229             return (*this);
00230         }
00231         inline Iterator& operator++(int)
00232         {
00233             mIndex++;
00234             while (mIndex<mHash->allocated() && mHash->element(mIndex)==NULL)
00235             {
00236                 mIndex++;
00237             }
00238             return (*this);
00239         }
00240 
00241         inline Iterator& operator--()
00242         {
00243             if(mIndex > 0)
00244             {
00245                 mIndex--;
00246                 while (mIndex>0 && mHash->element(mIndex)==NULL)
00247                 {
00248                     mIndex--;
00249                 }
00250             }
00251             return (*this);
00252         }
00253         inline Iterator& operator--(int)
00254         {
00255             if(mIndex > 0)
00256             {
00257                 mIndex--;
00258                 while (mIndex>0 && mHash->element(mIndex)==NULL)
00259                 {
00260                     mIndex--;
00261                 }
00262             }
00263             return (*this);
00264         }
00265     };
00266 
00267     typedef Iterator iterator;
00268 
00269     class ConstIterator
00270     {
00271     private:
00272         friend class Hash;
00273         Hash* mHash;
00274         size_t mIndex;
00275     public:
00276 
00277         inline ConstIterator(const Hash* ht=NULL, size_t index=0) : mHash(const_cast<Hash*>(ht)),
00278          mIndex(index)
00279         {
00280             if(mHash)
00281                 if(index<mHash->allocated() && mHash->element(index)==0)
00282                     ++(*this);
00283         }
00284 
00285         inline ConstIterator(const ConstIterator& rhs) : mHash(rhs.mHash),
00286          mIndex(rhs.mIndex)
00287         {
00288         }
00289 
00290         inline ConstIterator& operator=(const ConstIterator& rhs)
00291         {
00292             mHash = rhs.mHash;
00293             mIndex = rhs.mIndex;
00294             return *this;
00295         }
00296         inline bool operator==(const ConstIterator& rhs) const
00297         {
00298             return mHash == rhs.mHash && mIndex == rhs.mIndex;
00299         }
00300         inline bool operator!=(const ConstIterator& rhs) const { return !(*this == rhs); }
00301         inline const Pair& operator*() { return *(mHash->element(mIndex)); }
00302         inline size_t index() const { return mIndex; }
00303 
00304         inline ConstIterator& operator++()
00305         {
00306             mIndex++;
00307             while (mIndex<mHash->allocated() && mHash->element(mIndex)==NULL)
00308             {
00309                 mIndex++;
00310             }
00311             return (*this);
00312         }
00313         inline ConstIterator& operator++(int junk)
00314         {
00315             mIndex++;
00316             while (mIndex<mHash->allocated() && mHash->element(mIndex)==NULL)
00317             {
00318                 mIndex++;
00319             }
00320             return (*this);
00321         }
00322 
00323         inline ConstIterator& operator--()
00324         {
00325             if(mIndex > 0)
00326             {
00327                 mIndex--;
00328                 while (mIndex>0 && mHash->element(mIndex)==NULL)
00329                 {
00330                     mIndex--;
00331                 }
00332             }
00333             return (*this);
00334         }
00335         inline ConstIterator& operator--(int)
00336         {
00337             if(mIndex > 0)
00338             {
00339                 mIndex--;
00340                 while (mIndex>0 && mHash->element(mIndex)==NULL)
00341                 {
00342                     mIndex--;
00343                 }
00344             }
00345             return (*this);
00346         }
00347     };
00348 
00349     typedef ConstIterator const_iterator;
00350 
00351     class DataProxy
00352     {
00353     private:
00354         Hash& mHash;
00355         const char* mKey;
00356         DataProxy();
00357     public:
00358         inline DataProxy(Hash& h, const char* k) : mHash(h), mKey(k) {}
00359         inline void operator=(const Data& value) { mHash.set(mKey,value); }
00360         inline operator Data()
00361         {
00362             Iterator i = mHash.find(mKey);
00363 
00364             if(i==mHash.end())
00365                 throw LDK_INDEX_ERROR("cannot find key");
00366             return (*i).mData;
00367         }
00368     };
00369 
00370     Hash(size_t initialSize=100) : mArray(NULL),
00371      mAllocated(0),
00372      mFreeSlots(0),
00373      mSize(0)
00374     {
00375         size_t newAlloc = primeGreaterThan(initialSize);
00376         mArray = (Pair**) mPairPtrAlloc.allocate(newAlloc+1);
00377         mAllocated = newAlloc;
00378         mFreeSlots = mAllocated;
00379         memset(mArray, 0, newAlloc);
00380         mSize = 0;
00381     }
00382     //todo: store length of string in pair
00383     inline Hash(const Hash& rhs)
00384     {
00385         assign(rhs);
00386     }
00387 
00388     ~Hash() throw()
00389     {
00390         clear();
00391         mPairPtrAlloc.deallocate(mArray,mAllocated);
00392         mArray = NULL;
00393         mAllocated = 0;
00394         mFreeSlots = 0;
00395         mSize = 0;
00396     }
00397 
00398     inline Hash& operator=(const Hash& rhs)
00399     {
00400         size_t i;
00401         for(i=0;i<mAllocated;i++)
00402         {
00403             if(mArray[i])
00404             {
00405                 mPairAlloc.destroy(mArray[i]);
00406                 mPairAlloc.deallocate(mArray[i],1);
00407             }
00408         }
00409         assign(rhs);
00410         return *this;
00411     }
00412 
00413     ConstIterator find(const char* key) const
00414     {
00415         size_t hashValue = hash(key, mAllocated);
00416         size_t index = hashValue;
00417 
00418         const Pair* element = mArray[index];
00419         const Data* v = NULL;
00420 
00421         bool searchedAll = false;
00422 
00423         while (!searchedAll)
00424         {
00425             if (element!=0 && !strcmp(element->mKey, key))
00426             {
00427                 v = &element->mData;
00428                 break;
00429             }
00430 
00431             index = (index + cIncBy) % mAllocated;
00432             searchedAll = index==hashValue;
00433             element = mArray[index];
00434         }
00435         if(v == 0)
00436             return end();
00437         else
00438             return ConstIterator(this,index);
00439     }
00440 
00441     inline Iterator find(const char* key)
00442     {
00443         size_t hashValue = hash(key, mAllocated);
00444         size_t index = hashValue;
00445 
00446         Pair* element = mArray[index];
00447         Data* v=0;
00448 
00449         bool searchedAll = false;
00450 
00451         while (!searchedAll)
00452         {
00453             if (element!=0 && !strcmp(element->mKey, key))
00454             {
00455                 v = &element->mData;
00456                 break;
00457             }
00458 
00459             index = (index + cIncBy) % mAllocated;
00460             searchedAll = index==hashValue;
00461             element = mArray[index];
00462         }
00463         if(v == 0)
00464             return end();
00465         else
00466             return Iterator(this,index);
00467     }
00468 
00469     inline Pair* element(size_t idx)
00470     {
00471         if(idx >= mAllocated)
00472             throw LDK_INDEX_ERROR("Hash index out of range");
00473         return mArray[idx];
00474 
00475     }
00476 
00477     inline const Pair* element(size_t idx) const
00478     {
00479         if(idx >= mAllocated)
00480             throw LDK_INDEX_ERROR("Hash index out of range");
00481         return mArray[idx];
00482     }
00483 
00484     inline void set(const char* k, const Data& d)
00485     {
00486         Iterator i = find(k);
00487         if(i == end())
00488         {
00489             if (!insert(k, d))
00490                 throw LDK_RUNTIME_ERROR("cannot find key");
00491         }
00492         else
00493             (*i).mData = d;
00494     }
00495 
00496     inline bool insert(const Pair& p) { return insert(p.mKey, p.mData); }
00497 
00498     inline bool insert(const char* key, const Data& data)
00499     {
00500         if(find(key) != end())
00501             return false;
00502 
00503         size_t newAlloc = newSize(mAllocated, mFreeSlots);
00504         bool allSearched=false;
00505         if(newAlloc > mAllocated)
00506             rehash(newAlloc);
00507 
00508         size_t hashValue = hash(key, mAllocated);
00509         size_t index = hashValue;
00510         Pair*  element = mArray[hashValue];
00511 
00512         while(element && !allSearched)
00513         {
00514             index = (index + cIncBy) % mAllocated;
00515             element = mArray[index];
00516             allSearched = index == hashValue;
00517         }
00518 
00519         if(allSearched)
00520             return false;
00521         else if(!element)
00522         {
00523             Pair *mem = mPairAlloc.allocate(1);
00524             mArray[index] = new(mem) Pair(key, data);
00525             mFreeSlots--;
00526             mSize++;
00527             return true;
00528         }
00529         return false;
00530     }
00531 
00532     inline Iterator erase(const char* key)
00533     {
00534         Iterator it = find(key);
00535 
00536         if(it == end())
00537             return it;
00538         size_t index = it.index();
00539         it++;
00540         mPairAlloc.destroy(mArray[index]);
00541         mPairAlloc.deallocate(mArray[index]);
00542         mArray[index] = NULL;
00543         mFreeSlots++;
00544         mSize--;
00545         return it;
00546     }
00547 
00548     inline Iterator erase(Iterator it)
00549     {
00550         assert(it.mHash == this);
00551         if(it == end())
00552             return it;
00553         size_t index = it.index();
00554         it++;
00555         mPairAlloc.destroy(mArray[index]);
00556         mPairAlloc.deallocate(mArray[index],1);
00557         mArray[index] = NULL;
00558         mFreeSlots++;
00559         mSize--;
00560         return it;
00561     }
00562 
00563     inline size_t size() const { return mSize; }
00564 
00565     inline size_t allocated() const { return mAllocated; }
00566 
00567     inline Iterator        begin() { return Iterator(this, 0); }
00568     inline ConstIterator   begin() const { return ConstIterator(this, 0); }
00569     inline Iterator        end() { return Iterator(this, mAllocated); }
00570     inline ConstIterator   end() const { return ConstIterator(this, mAllocated); }
00571 
00572     inline void clear()
00573     {
00574         Iterator i = begin();
00575         Iterator e = end();
00576         size_t idx;
00577         while(i != e)
00578         {
00579             idx = i.index();
00580             mPairAlloc.destroy(mArray[idx]);
00581             mPairAlloc.deallocate(mArray[idx],1);
00582             i++;
00583         }
00584         mFreeSlots = mAllocated;
00585         memset(mArray, 0, mAllocated);
00586 
00587     }
00588 
00589     inline DataProxy operator[](const char* k) { return DataProxy(*this, k); }
00590     inline const Data& operator[](const char* k) const
00591     {
00592         ConstIterator i = find(k);
00593         if(i == end())
00594             throw LDK_INDEX_ERROR("cannot find key");
00595         return (*i).mData;
00596     }
00597 };
00598 
00599 }//namespace LDK
00600 
00601 #endif //__LDK_CSTRINGHASH_H__

Generated on Fri Aug 17 18:32:26 2007 for LDK by  doxygen 1.5.1