hash.hpp
1: #ifndef HASH_HPP
2: #define HASH_HPP
3: /*----------------------------------------------------------------------------
4:
5: Author: Andy Rushton
6: Copyright: (c) Southampton University 1999-2004
7: (c) Andy Rushton 2004-2008
8: License: BSD License, see ../docs/license.html
9:
10: A chained hash table using STL semantics
11:
12: ------------------------------------------------------------------------------*/
13: #include "os_fixes.hpp"
14: #include "textio.hpp"
15: #include "persistent.hpp"
16: #include "exceptions.hpp"
17: #include <map>
18:
19: ////////////////////////////////////////////////////////////////////////////////
20: // internals
21:
22: template<typename K, typename T, class H, class E> class hash;
23: template<typename K, typename T> class hash_element;
24:
25: ////////////////////////////////////////////////////////////////////////////////
26: // iterator class
27:
28: template<typename K, typename T, class H, class E, typename V>
29: class hash_iterator
30: {
31: public:
32: friend class hash<K,T,H,E>;
33:
34: // local type definitions
35: // an iterator points to a value whilst a const_iterator points to a const value
36: typedef V value_type;
37: typedef hash_iterator<K,T,H,E,std::pair<const K,T> > iterator;
38: typedef hash_iterator<K,T,H,E,const std::pair<const K,T> > const_iterator;
39: typedef hash_iterator<K,T,H,E,V> this_iterator;
40: typedef V& reference;
41: typedef V* pointer;
42:
43: // constructor to create a null iterator - you must assign a valid value to this iterator before using it
44: // any attempt to dereference or use a null iterator is an error
45: // the only valid thing you can do is assign an iterator to it
46: hash_iterator(void);
47: ~hash_iterator(void);
48:
49: // tests
50: // a null iterator is one that has not been initialised with a value yet
51: // i.e. you just declared it but didn't assign to it
52: bool null(void) const;
53: // an end iterator is one that points to the end element of the list of nodes
54: // in STL conventions this is one past the last valid element and must not be dereferenced
55: bool end(void) const;
56: // a valid iterator is one that can be dereferenced
57: // i.e. non-null and non-end
58: bool valid(void) const;
59:
60: // get the hash container that created this iterator
61: // a null iterator doesn't have an owner so returns a null pointer
62: const hash<K,T,H,E>* owner(void) const;
63:
64: // Type conversion methods allow const_iterator and iterator to be converted
65: // convert an iterator/const_iterator to a const_iterator
66: const_iterator constify(void) const;
67: // convert an iterator/const_iterator to an iterator
68: iterator deconstify(void) const;
69:
70: // increment operators used to step through the set of all values in a hash
71: // it is only legal to increment a valid iterator
72: // there's no decrement - I've only implemented this as a unidirectional iterator
73: // pre-increment
74: this_iterator& operator ++ (void)
75: throw(null_dereference,end_dereference);
76: // post-increment
77: this_iterator operator ++ (int)
78: throw(null_dereference,end_dereference);
79:
80: // tests useful for putting iterators into other STL structures and for testing whether iteration has completed
81: bool operator == (const this_iterator& r) const;
82: bool operator != (const this_iterator& r) const;
83: bool operator < (const this_iterator& r) const;
84:
85: // access the value - a const_iterator gives you a const value, an iterator a non-const value
86: // it is illegal to dereference an invalid (i.e. null or end) iterator
87: reference operator*(void) const
88: throw(null_dereference,end_dereference);
89: pointer operator->(void) const
90: throw(null_dereference,end_dereference);
91:
92: // Note: hash iterators are not persistent for a good reason: they are
93: // invalidated by rehashing and so it is not a good idea to build data
94: // structures containing hash iterators in the first place
95:
96: private:
97: friend class hash_element<K,T>;
98:
99: const hash<K,T,H,E>* m_owner;
100: unsigned m_bin;
101: hash_element<K,T>* m_element;
102:
103: void check_owner(const hash<K,T,H,E>* owner) const
104: throw(wrong_object);
105: void check_non_null(void) const
106: throw(null_dereference);
107: void check_non_end(void) const
108: throw(end_dereference);
109: void check_valid(void) const
110: throw(null_dereference,end_dereference);
111: void check(const hash<K,T,H,E>* owner) const
112: throw(wrong_object,null_dereference,end_dereference);
113:
114: // constructor used by hash to create a non-null iterator
115: // you cannot create a valid iterator except by calling a hash method that returns one
116: hash_iterator(const hash<K,T,H,E>* owner, unsigned bin, hash_element<K,T>* element);
117: };
118:
119: ////////////////////////////////////////////////////////////////////////////////
120: // Hash class
121: // K = key type
122: // T = value type
123: // H = hash function object with the profile 'unsigned H(const K&)'
124: // E = equal function object with the profile 'bool E(const K&, const K&)' defaults to equal_to which in turn calls '=='
125:
126: template<typename K, typename T, class H, class E = std::equal_to<K> >
127: class hash
128: {
129: public:
130: typedef unsigned size_type;
131: typedef K key_type;
132: typedef T data_type;
133: typedef T mapped_type;
134: typedef std::pair<const K, T> value_type;
135: typedef hash_iterator<K,T,H,E,value_type> iterator;
136: typedef hash_iterator<K,T,H,E,const value_type> const_iterator;
137:
138: // construct a hash table with specified number of bins
139: // the default 0 bins means leave it to the table to decide
140: // specifying 0 bins also enables auto-rehashing, otherwise auto-rehashing defaults off
141: hash(unsigned bins = 0);
142: ~hash(void);
143:
144: // copy and equality copy the data elements but not the size of the copied table
145: hash(const hash&);
146: hash& operator = (const hash&);
147:
148: // test for an empty table and for the size of a table - efficient because the size is stored separately from the table contents
149: bool empty(void) const;
150: unsigned size(void) const;
151:
152: // test for equality - two hashes are equal if they contain equal values
153: bool operator == (const hash&) const;
154: bool operator != (const hash&) const;
155:
156: // switch auto-rehash on
157: void auto_rehash(void);
158: // switch auto-rehash off
159: void manual_rehash(void);
160: // force a rehash now
161: // default of 0 means implement built-in size calculation for rehashing (recommended - it doubles the number of bins)
162: void rehash(unsigned bins = 0);
163: // test the loading ratio, which is the size divided by the number of bins
164: // use this if you are doing your own rehashing
165: // the recommendation is to double the bins when the loading exceeds 0.5 which is what auto-rehashing does
166: float loading(void) const;
167:
168: // test for the presence of a key
169: bool present(const K& key) const;
170: // provide map equivalent key count function (0 or 1, as not a multimap)
171: size_type count(const K& key) const;
172:
173: // insert a new key/data pair - replaces any previous value for this key
174: iterator insert(const K& key, const T& data);
175: // insert a copy of the pair into the table (std::map compatible)
176: std::pair<iterator, bool> insert(const value_type& value);
177: // insert a new key and return the iterator so that the data can be filled in
178: iterator insert(const K& key);
179:
180: // remove a key/data pair from the hash table
181: bool erase(const K& key);
182: // remove all elements from the hash table
183: void erase(void);
184: // provide the std::map equivalent clear function
185: void clear(void);
186:
187: // find a key and return an iterator to it
188: // The iterator is like a pointer to a pair<const K,T>
189: // end() is returned if the find fails
190: const_iterator find(const K& key) const;
191: iterator find(const K& key);
192: // returns the data corresponding to the key
193: // the const version is used by the compiler on const hashes and cannot change the hash, so find failure causes an exception
194: // the non-const version is used by the compiler on non-const hashes and is like map - it creates a new key/data pair if find fails
195: const T& operator[] (const K& key) const;
196: T& operator[] (const K& key);
197:
198: // iterators allow the hash table to be traversed
199: // iterators remain valid unless an item is removed or unless a rehash happens
200: const_iterator begin(void) const;
201: iterator begin(void);
202: const_iterator end(void) const;
203: iterator end(void);
204:
205: // diagnostic report shows the number of items in each bin so can be used to diagnose effectiveness of hash functions
206: void debug_report(otext&, unsigned indent = 0) const;
207: // the same diagnostic function that returns a string, rather than use an otext stream
208: std::string debug_report(unsigned indent = 0) const;
209:
210: // persistence methods
211: void dump(dump_context&) const
212: throw(persistent_dump_failed);
213: void restore(restore_context&)
214: throw(persistent_restore_failed);
215:
216: // internals
217: private:
218: friend class hash_element<K,T>;
219: friend class hash_iterator<K,T,H,E,std::pair<const K,T> >;
220: friend class hash_iterator<K,T,H,E,const std::pair<const K,T> >;
221:
222: unsigned m_rehash;
223: unsigned m_bins;
224: unsigned m_size;
225: hash_element<K,T>** m_values;
226: };
227:
228: ////////////////////////////////////////////////////////////////////////////////
229:
230: template<typename K, typename T, class H, class E>
231: otext& print(otext& str, const hash<K,T,H,E>& table, unsigned indent = 0);
232:
233: template<typename K, typename T, class H, class E>
234: otext& operator << (otext& str, const hash<K,T,H,E>& table);
235:
236: ////////////////////////////////////////////////////////////////////////////////
237:
238: template<typename K, typename T, class H, class E>
239: void dump_hash(dump_context& str, const hash<K,T,H,E>& data)
240: throw(persistent_dump_failed);
241:
242: template<typename K, typename T, class H, class E>
243: void restore_hash(restore_context& str, hash<K,T,H,E>& data)
244: throw(persistent_restore_failed);
245:
246: ////////////////////////////////////////////////////////////////////////////////
247:
248: #include "hash.tpp"
249: #endif