hash.hpp

    1: #ifndef HASH_HPP
    2: #define HASH_HPP
    3: /*----------------------------------------------------------------------------
    4: 
    5:   Author:    Andy Rushton
    6:   Copyright: (c) Southampton University 1999-2004
    7:              (c) Andy Rushton           2004-2008
    8:   License:   BSD License, see ../docs/license.html
    9: 
   10:   A chained hash table using STL semantics
   11: 
   12: ------------------------------------------------------------------------------*/
   13: #include "os_fixes.hpp"
   14: #include "textio.hpp"
   15: #include "persistent.hpp"
   16: #include "exceptions.hpp"
   17: #include <map>
   18: 
   19: ////////////////////////////////////////////////////////////////////////////////
   20: // internals
   21: 
   22: template<typename K, typename T, class H, class E> class hash;
   23: template<typename K, typename T> class hash_element;
   24: 
   25: ////////////////////////////////////////////////////////////////////////////////
   26: // iterator class
   27: 
   28: template<typename K, typename T, class H, class E, typename V>
   29: class hash_iterator
   30: {
   31: public:
   32:   friend class hash<K,T,H,E>;
   33: 
   34:   // local type definitions
   35:   // an iterator points to a value whilst a const_iterator points to a const value
   36:   typedef V                                                  value_type;
   37:   typedef hash_iterator<K,T,H,E,std::pair<const K,T> >       iterator;
   38:   typedef hash_iterator<K,T,H,E,const std::pair<const K,T> > const_iterator;
   39:   typedef hash_iterator<K,T,H,E,V>                           this_iterator;
   40:   typedef V&                                                 reference;
   41:   typedef V*                                                 pointer;
   42: 
   43:   // constructor to create a null iterator - you must assign a valid value to this iterator before using it
   44:   // any attempt to dereference or use a null iterator is an error
   45:   // the only valid thing you can do is assign an iterator to it
   46:   hash_iterator(void);
   47:   ~hash_iterator(void);
   48: 
   49:   // tests
   50:   // a null iterator is one that has not been initialised with a value yet
   51:   // i.e. you just declared it but didn't assign to it
   52:   bool null(void) const;
   53:   // an end iterator is one that points to the end element of the list of nodes
   54:   // in STL conventions this is one past the last valid element and must not be dereferenced
   55:   bool end(void) const;
   56:   // a valid iterator is one that can be dereferenced
   57:   // i.e. non-null and non-end
   58:   bool valid(void) const;
   59: 
   60:   // get the hash container that created this iterator
   61:   // a null iterator doesn't have an owner so returns a null pointer
   62:   const hash<K,T,H,E>* owner(void) const;
   63: 
   64:   // Type conversion methods allow const_iterator and iterator to be converted
   65:   // convert an iterator/const_iterator to a const_iterator
   66:   const_iterator constify(void) const;
   67:   // convert an iterator/const_iterator to an iterator
   68:   iterator deconstify(void) const;
   69: 
   70:   // increment operators used to step through the set of all values in a hash
   71:   // it is only legal to increment a valid iterator
   72:   // there's no decrement - I've only implemented this as a unidirectional iterator
   73:   // pre-increment
   74:   this_iterator& operator ++ (void)
   75:     throw(null_dereference,end_dereference);
   76:   // post-increment
   77:   this_iterator operator ++ (int)
   78:     throw(null_dereference,end_dereference);
   79: 
   80:   // tests useful for putting iterators into other STL structures and for testing whether iteration has completed
   81:   bool operator == (const this_iterator& r) const;
   82:   bool operator != (const this_iterator& r) const;
   83:   bool operator < (const this_iterator& r) const;
   84: 
   85:   // access the value - a const_iterator gives you a const value, an iterator a non-const value
   86:   // it is illegal to dereference an invalid (i.e. null or end) iterator
   87:   reference operator*(void) const
   88:     throw(null_dereference,end_dereference);
   89:   pointer operator->(void) const
   90:     throw(null_dereference,end_dereference);
   91: 
   92:   // Note: hash iterators are not persistent for a good reason: they are
   93:   // invalidated by rehashing and so it is not a good idea to build data
   94:   // structures containing hash iterators in the first place
   95: 
   96: private:
   97:   friend class hash_element<K,T>;
   98: 
   99:   const hash<K,T,H,E>* m_owner;
  100:   unsigned m_bin;
  101:   hash_element<K,T>* m_element;
  102: 
  103:   void check_owner(const hash<K,T,H,E>* owner) const
  104:     throw(wrong_object);
  105:   void check_non_null(void) const
  106:     throw(null_dereference);
  107:   void check_non_end(void) const
  108:     throw(end_dereference);
  109:   void check_valid(void) const
  110:     throw(null_dereference,end_dereference);
  111:   void check(const hash<K,T,H,E>* owner) const
  112:     throw(wrong_object,null_dereference,end_dereference);
  113: 
  114:   // constructor used by hash to create a non-null iterator
  115:   // you cannot create a valid iterator except by calling a hash method that returns one
  116:   hash_iterator(const hash<K,T,H,E>* owner, unsigned bin, hash_element<K,T>* element);
  117: };
  118: 
  119: ////////////////////////////////////////////////////////////////////////////////
  120: // Hash class
  121: // K = key type
  122: // T = value type
  123: // H = hash function object with the profile 'unsigned H(const K&)'
  124: // E = equal function object with the profile 'bool E(const K&, const K&)' defaults to equal_to which in turn calls '=='
  125: 
  126: template<typename K, typename T, class H, class E = std::equal_to<K> >
  127: class hash
  128: {
  129: public:
  130:   typedef unsigned                                size_type;
  131:   typedef K                                       key_type;
  132:   typedef T                                       data_type;
  133:   typedef T                                       mapped_type;
  134:   typedef std::pair<const K, T>                   value_type;
  135:   typedef hash_iterator<K,T,H,E,value_type>       iterator;
  136:   typedef hash_iterator<K,T,H,E,const value_type> const_iterator;
  137: 
  138:   // construct a hash table with specified number of bins
  139:   // the default 0 bins means leave it to the table to decide
  140:   // specifying 0 bins also enables auto-rehashing, otherwise auto-rehashing defaults off
  141:   hash(unsigned bins = 0);
  142:   ~hash(void);
  143: 
  144:   // copy and equality copy the data elements but not the size of the copied table
  145:   hash(const hash&);
  146:   hash& operator = (const hash&);
  147: 
  148:   // test for an empty table and for the size of a table - efficient because the size is stored separately from the table contents
  149:   bool empty(void) const;
  150:   unsigned size(void) const;
  151: 
  152:   // test for equality - two hashes are equal if they contain equal values
  153:   bool operator == (const hash&) const;
  154:   bool operator != (const hash&) const;
  155: 
  156:   // switch auto-rehash on
  157:   void auto_rehash(void);
  158:   // switch auto-rehash off
  159:   void manual_rehash(void);
  160:   // force a rehash now
  161:   // default of 0 means implement built-in size calculation for rehashing (recommended - it doubles the number of bins)
  162:   void rehash(unsigned bins = 0);
  163:   // test the loading ratio, which is the size divided by the number of bins
  164:   // use this if you are doing your own rehashing
  165:   // the recommendation is to double the bins when the loading exceeds 0.5 which is what auto-rehashing does
  166:   float loading(void) const;
  167: 
  168:   // test for the presence of a key
  169:   bool present(const K& key) const;
  170:   // provide map equivalent key count function (0 or 1, as not a multimap)
  171:   size_type count(const K& key) const;
  172: 
  173:   // insert a new key/data pair - replaces any previous value for this key
  174:   iterator insert(const K& key, const T& data);
  175:   // insert a copy of the pair into the table (std::map compatible)
  176:   std::pair<iterator, bool> insert(const value_type& value);
  177:   // insert a new key and return the iterator so that the data can be filled in
  178:   iterator insert(const K& key);
  179: 
  180:   // remove a key/data pair from the hash table
  181:   bool erase(const K& key);
  182:   // remove all elements from the hash table
  183:   void erase(void);
  184:   // provide the std::map equivalent clear function
  185:   void clear(void);
  186: 
  187:   // find a key and return an iterator to it
  188:   // The iterator is like a pointer to a pair<const K,T>
  189:   // end() is returned if the find fails
  190:   const_iterator find(const K& key) const;
  191:   iterator find(const K& key);
  192:   // returns the data corresponding to the key
  193:   // the const version is used by the compiler on const hashes and cannot change the hash, so find failure causes an exception
  194:   // the non-const version is used by the compiler on non-const hashes and is like map - it creates a new key/data pair if find fails
  195:   const T& operator[] (const K& key) const;
  196:   T& operator[] (const K& key);
  197: 
  198:   // iterators allow the hash table to be traversed
  199:   // iterators remain valid unless an item is removed or unless a rehash happens
  200:   const_iterator begin(void) const;
  201:   iterator begin(void);
  202:   const_iterator end(void) const;
  203:   iterator end(void);
  204: 
  205:   // diagnostic report shows the number of items in each bin so can be used to diagnose effectiveness of hash functions
  206:   void debug_report(otext&, unsigned indent = 0) const;
  207:   // the same diagnostic function that returns a string, rather than use an otext stream
  208:   std::string debug_report(unsigned indent = 0) const;
  209: 
  210:   // persistence methods
  211:   void dump(dump_context&) const
  212:     throw(persistent_dump_failed);
  213:   void restore(restore_context&)
  214:     throw(persistent_restore_failed);
  215: 
  216:   // internals
  217: private:
  218:   friend class hash_element<K,T>;
  219:   friend class hash_iterator<K,T,H,E,std::pair<const K,T> >;
  220:   friend class hash_iterator<K,T,H,E,const std::pair<const K,T> >;
  221: 
  222:   unsigned m_rehash;
  223:   unsigned m_bins;
  224:   unsigned m_size;
  225:   hash_element<K,T>** m_values;
  226: };
  227: 
  228: ////////////////////////////////////////////////////////////////////////////////
  229: 
  230: template<typename K, typename T, class H, class E>
  231: otext& print(otext& str, const hash<K,T,H,E>& table, unsigned indent = 0);
  232: 
  233: template<typename K, typename T, class H, class E>
  234: otext& operator << (otext& str, const hash<K,T,H,E>& table);
  235: 
  236: ////////////////////////////////////////////////////////////////////////////////
  237: 
  238: template<typename K, typename T, class H, class E>
  239: void dump_hash(dump_context& str, const hash<K,T,H,E>& data)
  240:   throw(persistent_dump_failed);
  241: 
  242: template<typename K, typename T, class H, class E>
  243: void restore_hash(restore_context& str, hash<K,T,H,E>& data)
  244:   throw(persistent_restore_failed);
  245: 
  246: ////////////////////////////////////////////////////////////////////////////////
  247: 
  248: #include "hash.tpp"
  249: #endif