Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

densehashtable.h @ 2162

Revision 2162, 37.2 KB checked in by mattausch, 17 years ago (diff)
improved hash performance with google hashmap

Line
1	// Copyright (c) 2005, Google Inc.
2	// All rights reserved.
3	//
4	// Redistribution and use in source and binary forms, with or without
5	// modification, are permitted provided that the following conditions are
6	// met:
7	//
8	// * Redistributions of source code must retain the above copyright
9	// notice, this list of conditions and the following disclaimer.
10	// * Redistributions in binary form must reproduce the above
11	// copyright notice, this list of conditions and the following disclaimer
12	// in the documentation and/or other materials provided with the
13	// distribution.
14	// * Neither the name of Google Inc. nor the names of its
15	// contributors may be used to endorse or promote products derived from
16	// this software without specific prior written permission.
17	//
18	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30	// ---
31	// Author: Craig Silverstein
32	//
33	// A dense hashtable is a particular implementation of
34	// a hashtable: one that is meant to minimize memory allocation.
35	// It does this by using an array to store all the data. We
36	// steal a value from the key space to indicate "empty" array
37	// elements (ie indices where no item lives) and another to indicate
38	// "deleted" elements.
39	//
40	// (Note it is possible to change the value of the delete key
41	// on the fly; you can even remove it, though after that point
42	// the hashtable is insert_only until you set it again. The empty
43	// value however can't be changed.)
44	//
45	// This code is possibly a bit faster if the empty value is 0.
46	//
47	// To minimize allocation and pointer overhead, we use internal
48	// probing, in which the hashtable is a single table, and collisions
49	// are resolved by trying to insert again in another bucket. The
50	// most cache-efficient internal probing schemes are linear probing
51	// (which suffers, alas, from clumping) and quadratic probing, which
52	// is what we implement by default.
53	//
54	// Type requirements: T must be a Plain Old Data Type (POD Type), since
55	// we use placement new but never call a destructor. To ensure this,
56	// you must defined a __type_traits struct for T if it is not a basic
57	// type (see type_traits.h). Also, those imposed by the requirements
58	// of Random Access Container.
59	//
60	// You probably shouldn't use this code directly. Use
61	// <google/dense_hash_map> or <google/dense_hash_set> instead.
62
63	// You can change the following below:
64	// HT_OCCUPANCY_FLT -- how full before we double size
65	// HT_EMPTY_FLT -- how empty before we halve size
66	// HT_MIN_BUCKETS -- default smallest bucket size
67	//
68	// How to decide what values to use?
69	// HT_EMPTY_FLT's default of .4 * OCCUPANCY_FLT, is probably good.
70	// HT_MIN_BUCKETS is probably unnecessary since you can specify
71	// (indirectly) the starting number of buckets at construct-time.
72	// For HT_OCCUPANCY_FLT, you can use this chart to try to trade-off
73	// expected lookup time to the space taken up. By default, this
74	// code uses quadratic probing, though you can change it to linear
75	// via _JUMP below if you really want to.
76	//
77	// From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
78	// NUMBER OF PROBES / LOOKUP Successful Unsuccessful
79	// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
80	// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
81	//
82	// -- HT_OCCUPANCY_FLT -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
83	// QUADRATIC COLLISION RES.
84	// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
85	// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
86	// LINEAR COLLISION RES.
87	// probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5
88	// probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0
89
90	#ifndef _DENSEHASHTABLE_H_
91	#define _DENSEHASHTABLE_H_
92
93	// The probing method
94	// Linear probing
95	// #define JUMP_(key, num_probes) ( 1 )
96	// Quadratic-ish probing
97	#define JUMP_(key, num_probes) ( num_probes )
98
99
100	// Hashtable class, used to implement the hashed associative containers
101	// hash_set and hash_map.
102
103	#include <google/sparsehash/config.h>
104	#include <assert.h>
105	#include <algorithm> // For swap(), eg
106
107	_START_GOOGLE_NAMESPACE_
108
109	using STL_NAMESPACE::pair;
110
111	template <class Value, class Key, class HashFcn,
112	class ExtractKey, class EqualKey, class Alloc>
113	class dense_hashtable;
114
115	template <class V, class K, class HF, class ExK, class EqK, class A>
116	struct dense_hashtable_iterator;
117
118	template <class V, class K, class HF, class ExK, class EqK, class A>
119	struct dense_hashtable_const_iterator;
120
121	// We're just an array, but we need to skip over empty and deleted elements
122	template <class V, class K, class HF, class ExK, class EqK, class A>
123	struct dense_hashtable_iterator {
124	public:
125	typedef dense_hashtable<V,K,HF,ExK,EqK,A> dense_hashtable;
126	typedef dense_hashtable_iterator<V,K,HF,ExK,EqK,A> iterator;
127	typedef dense_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
128
129	#ifdef UNDERSTANDS_ITERATOR_TAGS
130	typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
131	#endif
132	typedef V value_type;
133	typedef ptrdiff_t difference_type;
134	typedef size_t size_type;
135	typedef V& reference; // Value
136	typedef V* pointer;
137
138	// "Real" constructor and default constructor
139	dense_hashtable_iterator(const dense_hashtable *h,
140	pointer it, pointer it_end, bool advance)
141	: ht(h), pos(it), end(it_end) {
142	if (advance) advance_past_empty_and_deleted();
143	}
144	dense_hashtable_iterator() { }
145	// The default destructor is fine; we don't define one
146	// The default operator= is fine; we don't define one
147
148	// Happy dereferencer
149	reference operator() const { return pos; }
150	pointer operator->() const { return &(operator*()); }
151
152	// Arithmetic. The only hard part is making sure that
153	// we're not on an empty or marked-deleted array element
154	void advance_past_empty_and_deleted() {
155	while ( pos != end && (ht->test_empty(this) \|\| ht->test_deleted(this)) )
156	++pos;
157	}
158	iterator& operator++() {
159	assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
160	}
161	iterator operator++(int) { iterator tmp(this); ++this; return tmp; }
162
163	// Comparison.
164	bool operator==(const iterator& it) const { return pos == it.pos; }
165	bool operator!=(const iterator& it) const { return pos != it.pos; }
166
167
168	// The actual data
169	const dense_hashtable *ht;
170	pointer pos, end;
171	};
172
173
174	// Now do it all again, but with const-ness!
175	template <class V, class K, class HF, class ExK, class EqK, class A>
176	struct dense_hashtable_const_iterator {
177	public:
178	typedef dense_hashtable<V,K,HF,ExK,EqK,A> dense_hashtable;
179	typedef dense_hashtable_iterator<V,K,HF,ExK,EqK,A> iterator;
180	typedef dense_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
181
182	#ifdef UNDERSTANDS_ITERATOR_TAGS
183	typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
184	#endif
185	typedef V value_type;
186	typedef ptrdiff_t difference_type;
187	typedef size_t size_type;
188	typedef const V& reference; // Value
189	typedef const V* pointer;
190
191	// "Real" constructor and default constructor
192	dense_hashtable_const_iterator(const dense_hashtable *h,
193	pointer it, pointer it_end, bool advance)
194	: ht(h), pos(it), end(it_end) {
195	if (advance) advance_past_empty_and_deleted();
196	}
197	dense_hashtable_const_iterator() { }
198	// This lets us convert regular iterators to const iterators
199	dense_hashtable_const_iterator(const iterator &it)
200	: ht(it.ht), pos(it.pos), end(it.end) { }
201	// The default destructor is fine; we don't define one
202	// The default operator= is fine; we don't define one
203
204	// Happy dereferencer
205	reference operator() const { return pos; }
206	pointer operator->() const { return &(operator*()); }
207
208	// Arithmetic. The only hard part is making sure that
209	// we're not on an empty or marked-deleted array element
210	void advance_past_empty_and_deleted() {
211	while ( pos != end && (ht->test_empty(this) \|\| ht->test_deleted(this)) )
212	++pos;
213	}
214	const_iterator& operator++() {
215	assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
216	}
217	const_iterator operator++(int) { iterator tmp(this); ++this; return tmp; }
218
219	// Comparison.
220	bool operator==(const const_iterator& it) const { return pos == it.pos; }
221	bool operator!=(const const_iterator& it) const { return pos != it.pos; }
222
223
224	// The actual data
225	const dense_hashtable *ht;
226	pointer pos, end;
227	};
228
229	template <class Value, class Key, class HashFcn,
230	class ExtractKey, class EqualKey, class Alloc>
231	class dense_hashtable {
232	public:
233	typedef Key key_type;
234	typedef Value value_type;
235	typedef HashFcn hasher;
236	typedef EqualKey key_equal;
237
238	typedef size_t size_type;
239	typedef ptrdiff_t difference_type;
240	typedef value_type* pointer;
241	typedef const value_type* const_pointer;
242	typedef value_type& reference;
243	typedef const value_type& const_reference;
244	typedef dense_hashtable_iterator<Value, Key, HashFcn,
245	ExtractKey, EqualKey, Alloc>
246	iterator;
247
248	typedef dense_hashtable_const_iterator<Value, Key, HashFcn,
249	ExtractKey, EqualKey, Alloc>
250	const_iterator;
251
252	// How full we let the table get before we resize. Knuth says .8 is
253	// good -- higher causes us to probe too much, though saves memory
254	static const float HT_OCCUPANCY_FLT; // = 0.8;
255
256	// How empty we let the table get before we resize lower.
257	// It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
258	static const float HT_EMPTY_FLT; // = 0.4 * HT_OCCUPANCY_FLT
259
260	// Minimum size we're willing to let hashtables be.
261	// Must be a power of two, and at least 4.
262	// Note, however, that for a given hashtable, the minimum size is
263	// determined by the first constructor arg, and may be >HT_MIN_BUCKETS.
264	static const size_t HT_MIN_BUCKETS = 32;
265
266
267	// ITERATOR FUNCTIONS
268	iterator begin() { return iterator(this, table,
269	table + num_buckets, true); }
270	iterator end() { return iterator(this, table + num_buckets,
271	table + num_buckets, true); }
272	const_iterator begin() const { return const_iterator(this, table,
273	table+num_buckets,true);}
274	const_iterator end() const { return const_iterator(this, table + num_buckets,
275	table+num_buckets,true);}
276
277	// ACCESSOR FUNCTIONS for the things we templatize on, basically
278	hasher hash_funct() const { return hash; }
279	key_equal key_eq() const { return equals; }
280
281	// Annoyingly, we can't copy values around, because they might have
282	// const components (they're probably pair<const X, Y>). We use
283	// placement new to get around this. Arg.
284	private:
285	void set_value(value_type* dst, const value_type src) {
286	new(dst) value_type(src);
287	}
288
289	void set_key(key_type* dst, const key_type src) {
290	new(dst) key_type(src); // used for set_deleted_key(), etc
291	}
292
293	// DELETE HELPER FUNCTIONS
294	// This lets the user describe a key that will indicate deleted
295	// table entries. This key should be an "impossible" entry --
296	// if you try to insert it for real, you won't be able to retrieve it!
297	// (NB: while you pass in an entire value, only the key part is looked
298	// at. This is just because I don't know how to assign just a key.)
299	private:
300	void squash_deleted() { // gets rid of any deleted entries we have
301	if ( num_deleted ) { // get rid of deleted before writing
302	dense_hashtable tmp(*this); // copying will get rid of deleted
303	swap(tmp); // now we are tmp
304	}
305	assert(num_deleted == 0);
306	}
307
308	public:
309	void set_deleted_key(const key_type &key) {
310	// It's only safe to change what "deleted" means if we purge deleted guys
311	squash_deleted();
312	use_deleted = true;
313	set_key(&delkey, key); // save the key
314	}
315	void clear_deleted_key() {
316	squash_deleted();
317	use_deleted = false;
318	}
319
320	// These are public so the iterators can use them
321	// True if the item at position bucknum is "deleted" marker
322	bool test_deleted(size_type bucknum) const {
323	// The num_deleted test is crucial for read(): after read(), the ht values
324	// are garbage, and we don't want to think some of them are deleted.
325	return (use_deleted && num_deleted > 0 &&
326	equals(delkey, get_key(table[bucknum])));
327	}
328	bool test_deleted(const iterator &it) const {
329	return (use_deleted && num_deleted > 0 &&
330	equals(delkey, get_key(*it)));
331	}
332	bool test_deleted(const const_iterator &it) const {
333	return (use_deleted && num_deleted > 0 &&
334	equals(delkey, get_key(*it)));
335	}
336	// Set it so test_deleted is true. true if object didn't used to be deleted
337	// See below (at erase()) to explain why we allow const_iterators
338	bool set_deleted(const_iterator &it) {
339	assert(use_deleted); // bad if set_deleted_key() wasn't called
340	bool retval = !test_deleted(it);
341	set_key(const_cast<key_type>(&get_key(it)), delkey);
342	return retval;
343	}
344	// Set it so test_deleted is false. true if object used to be deleted
345	bool clear_deleted(const_iterator &it) {
346	assert(use_deleted); // bad if set_deleted_key() wasn't called
347	// happens automatically when we assign something else in its place
348	return test_deleted(it);
349	}
350
351	// EMPTY HELPER FUNCTIONS
352	// This lets the user describe a key that will indicate empty (unused)
353	// table entries. This key should be an "impossible" entry --
354	// if you try to insert it for real, you won't be able to retrieve it!
355	// (NB: while you pass in an entire value, only the key part is looked
356	// at. This is just because I don't know how to assign just a key.)
357	public:
358	// These are public so the iterators can use them
359	// True if the item at position bucknum is "empty" marker
360	bool test_empty(size_type bucknum) const {
361	assert(use_empty); // we always need to know what's empty!
362	return equals(emptykey, get_key(table[bucknum]));
363	}
364	bool test_empty(const iterator &it) const {
365	assert(use_empty); // we always need to know what's empty!
366	return equals(emptykey, get_key(*it));
367	}
368	bool test_empty(const const_iterator &it) const {
369	assert(use_empty); // we always need to know what's empty!
370	return equals(emptykey, get_key(*it));
371	}
372
373	private:
374	// You can either set a range empty or an individual element
375	void set_empty(size_type bucknum) {
376	assert(use_empty);
377	set_key(const_cast<key_type*>(&get_key(table[bucknum])), emptykey);
378	}
379	void set_empty(size_type buckstart, size_type buckend) {
380	assert(use_empty);
381	if ( empty_is_zero ) // we can empty a lot of buckets at once
382	memset(table + buckstart, 0, (buckend-buckstart) * sizeof(*table));
383	else
384	for ( ; buckstart < buckend; buckstart++ )
385	set_empty(buckstart);
386	}
387
388	public:
389	void set_empty_key(const key_type &key) {
390	// Once you set the empty key, you can't change it
391	assert(!use_empty);
392	use_empty = true;
393	set_key(&emptykey, key);
394	empty_is_zero = true; // true if key is all 0's
395	for ( size_t i = 0; i < sizeof(emptykey); ++i )
396	if ( (reinterpret_cast<const char *>(&emptykey))[i] != 0 ) {
397	empty_is_zero = false; // won't be able to optimize :-(
398	break;
399	}
400	empty_is_zero = false; // empty heuristic seems to hurt
401
402	assert(!table); // must set before first use
403	// num_buckets was set in constructor even though table was NULL
404	table = (value_type ) malloc(num_buckets sizeof(*table));
405	assert(table);
406	set_empty(0, num_buckets);
407	}
408
409	// FUNCTIONS CONCERNING SIZE
410	public:
411	size_type size() const { return num_elements - num_deleted; }
412	// Buckets are always a power of 2
413	size_type max_size() const { return (size_type(-1) >> 1U) + 1; }
414	bool empty() const { return size() == 0; }
415	size_type bucket_count() const { return num_buckets; }
416	size_type max_bucket_count() const { return max_size(); }
417
418	private:
419	// Because of the above, size_type(-1) is never legal; use it for errors
420	static const size_type ILLEGAL_BUCKET = size_type(-1);
421
422	private:
423	// This is the smallest size a hashtable can be without being too crowded
424	// If you like, you can give a min #buckets as well as a min #elts
425	size_type min_size(size_type num_elts, size_type min_buckets_wanted) {
426	size_type sz = HT_MIN_BUCKETS; // min buckets allowed
427	while ( sz < min_buckets_wanted \|\| num_elts >= sz * HT_OCCUPANCY_FLT )
428	sz *= 2;
429	return sz;
430	}
431
432	// Used after a string of deletes
433	void maybe_shrink() {
434	assert(num_elements >= num_deleted);
435	assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
436	assert(bucket_count() >= HT_MIN_BUCKETS);
437
438	if ( (num_elements-num_deleted) <= shrink_threshold &&
439	bucket_count() > HT_MIN_BUCKETS ) {
440	size_type sz = bucket_count() / 2; // find how much we should shrink
441	while ( sz > HT_MIN_BUCKETS &&
442	(num_elements - num_deleted) <= sz * HT_EMPTY_FLT )
443	sz /= 2; // stay a power of 2
444	dense_hashtable tmp(*this, sz); // Do the actual resizing
445	swap(tmp); // now we are tmp
446	}
447	consider_shrink = false; // because we just considered it
448	}
449
450	// We'll let you resize a hashtable -- though this makes us copy all!
451	// When you resize, you say, "make it big enough for this many more elements"
452	void resize_delta(size_type delta, size_type min_buckets_wanted = 0) {
453	if ( consider_shrink ) // see if lots of deletes happened
454	maybe_shrink();
455	if ( bucket_count() > min_buckets_wanted &&
456	(num_elements + delta) <= enlarge_threshold )
457	return; // we're ok as we are
458
459	const size_type resize_to = min_size(num_elements + delta,
460	min_buckets_wanted);
461	if ( resize_to > bucket_count() ) { // we don't have enough buckets
462	dense_hashtable tmp(*this, resize_to);
463	swap(tmp); // now we are tmp
464	}
465	}
466
467	// Used to actually do the rehashing when we grow/shrink a hashtable
468	void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted = 0) {
469	clear(); // clear table, set num_deleted to 0
470
471	// If we need to change the size of our table, do it now
472	const size_type resize_to = min_size(ht.size(), min_buckets_wanted);
473	if ( resize_to > bucket_count() ) { // we don't have enough buckets
474	table = (value_type ) realloc(table, resize_to sizeof(*table));
475	assert(table != NULL);
476	set_empty(num_buckets, resize_to); // empty everything between them
477	num_buckets = resize_to;
478	reset_thresholds();
479	}
480
481	// We use a normal iterator to get non-deleted bcks from ht
482	// We could use insert() here, but since we know there are
483	// no duplicates and no deleted items, we can be more efficient
484	assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
485	for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
486	size_type num_probes = 0; // how many times we've probed
487	size_type bucknum;
488	const size_type bucket_count_minus_one = bucket_count() - 1;
489	for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
490	!test_empty(bucknum); // not empty
491	bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
492	++num_probes;
493	assert(num_probes < bucket_count()); // or else the hashtable is full
494	}
495	set_value(&table[bucknum], *it); // copies the value to here
496	num_elements++;
497	}
498	}
499
500	// Required by the spec for hashed associative container
501	public:
502	// Though the docs say this should be num_buckets, I think it's much
503	// more useful as req_elements. As a special feature, calling with
504	// req_elements==0 will cause us to shrink if we can, saving space.
505	void resize(size_type req_elements) { // resize to this or larger
506	if ( consider_shrink \|\| req_elements == 0 )
507	maybe_shrink();
508	if ( req_elements > num_elements )
509	return resize_delta(req_elements - num_elements, 0);
510	}
511
512
513	// CONSTRUCTORS -- as required by the specs, we take a size,
514	// but also let you specify a hashfunction, key comparator,
515	// and key extractor. We also define a copy constructor and =.
516	// DESTRUCTOR -- needs to free the table
517	explicit dense_hashtable(size_type n = 0,
518	const HashFcn& hf = HashFcn(),
519	const EqualKey& eql = EqualKey(),
520	const ExtractKey& ext = ExtractKey())
521	: hash(hf), equals(eql), get_key(ext), num_deleted(0),
522	use_deleted(false), use_empty(false),
523	empty_is_zero(false), delkey(), emptykey(),
524	table(NULL), num_buckets(min_size(0, n)), num_elements(0) {
525	// table is NULL until emptykey is set. However, we set num_buckets
526	// here so we know how much space to allocate once emptykey is set
527	reset_thresholds();
528	}
529
530	// As a convenience for resize(), we allow an optional second argument
531	// which lets you make this new hashtable a different size than ht
532	dense_hashtable(const dense_hashtable& ht, size_type min_buckets_wanted = 0)
533	: hash(ht.hash), equals(ht.equals), get_key(ht.get_key), num_deleted(0),
534	use_deleted(ht.use_deleted), use_empty(ht.use_empty),
535	empty_is_zero(ht.empty_is_zero), delkey(ht.delkey), emptykey(ht.emptykey),
536	table(NULL), num_buckets(min_size(0, min_buckets_wanted)),
537	num_elements(0) {
538	reset_thresholds();
539	copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries
540	}
541
542	dense_hashtable& operator= (const dense_hashtable& ht) {
543	if (&ht == this) return *this; // don't copy onto ourselves
544	clear();
545	hash = ht.hash;
546	equals = ht.equals;
547	get_key = ht.get_key;
548	use_deleted = ht.use_deleted;
549	use_empty = ht.use_empty;
550	empty_is_zero = ht.empty_is_zero;
551	set_key(&delkey, ht.delkey);
552	set_key(&emptykey, ht.emptykey);
553	copy_from(ht); // sets num_deleted to 0 too
554	return *this;
555	}
556
557	~dense_hashtable() {
558	if (table) free(table);
559	}
560
561	// Many STL algorithms use swap instead of copy constructors
562	void swap(dense_hashtable& ht) {
563	STL_NAMESPACE::swap(hash, ht.hash);
564	STL_NAMESPACE::swap(equals, ht.equals);
565	STL_NAMESPACE::swap(get_key, ht.get_key);
566	STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
567	STL_NAMESPACE::swap(use_deleted, ht.use_deleted);
568	STL_NAMESPACE::swap(use_empty, ht.use_empty);
569	STL_NAMESPACE::swap(empty_is_zero, ht.empty_is_zero);
570	{ key_type tmp; // for annoying reasons, swap() doesn't work
571	set_key(&tmp, delkey);
572	set_key(&delkey, ht.delkey);
573	set_key(&ht.delkey, tmp);
574	}
575	{ key_type tmp; // for annoying reasons, swap() doesn't work
576	set_key(&tmp, emptykey);
577	set_key(&emptykey, ht.emptykey);
578	set_key(&ht.emptykey, tmp);
579	}
580	STL_NAMESPACE::swap(table, ht.table);
581	STL_NAMESPACE::swap(num_buckets, ht.num_buckets);
582	STL_NAMESPACE::swap(num_elements, ht.num_elements);
583	reset_thresholds();
584	ht.reset_thresholds();
585	}
586
587	// It's always nice to be able to clear a table without deallocating it
588	void clear() {
589	num_buckets = min_size(0,0); // our new size
590	reset_thresholds();
591	table = (value_type ) realloc(table, num_buckets sizeof(*table));
592	assert(table);
593	set_empty(0, num_buckets);
594	num_elements = 0;
595	num_deleted = 0;
596	}
597
598
599	// LOOKUP ROUTINES
600	private:
601	// Returns a pair of positions: 1st where the object is, 2nd where
602	// it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET
603	// if object is not found; 2nd is ILLEGAL_BUCKET if it is.
604	// Note: because of deletions where-to-insert is not trivial: it's the
605	// first deleted bucket we see, as long as we don't find the key later
606	pair<size_type, size_type> find_position(const key_type &key) const {
607	size_type num_probes = 0; // how many times we've probed
608	const size_type bucket_count_minus_one = bucket_count() - 1;
609	size_type bucknum = hash(key) & bucket_count_minus_one;
610	size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
611	while ( 1 ) { // probe until something happens
612	if ( test_empty(bucknum) ) { // bucket is empty
613	if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert
614	return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
615	else
616	return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
617
618	} else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
619	if ( insert_pos == ILLEGAL_BUCKET )
620	insert_pos = bucknum;
621
622	} else if ( equals(key, get_key(table[bucknum])) ) {
623	return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
624	}
625	++num_probes; // we're doing another probe
626	bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
627	assert(num_probes < bucket_count()); // don't probe too many times!
628	}
629	}
630
631	public:
632	iterator find(const key_type& key) {
633	if ( size() == 0 ) return end();
634	pair<size_type, size_type> pos = find_position(key);
635	if ( pos.first == ILLEGAL_BUCKET ) // alas, not there
636	return end();
637	else
638	return iterator(this, table + pos.first, table + num_buckets, false);
639	}
640
641	const_iterator find(const key_type& key) const {
642	if ( size() == 0 ) return end();
643	pair<size_type, size_type> pos = find_position(key);
644	if ( pos.first == ILLEGAL_BUCKET ) // alas, not there
645	return end();
646	else
647	return const_iterator(this, table + pos.first, table+num_buckets, false);
648	}
649
650	// Counts how many elements have key key. For maps, it's either 0 or 1.
651	size_type count(const key_type &key) const {
652	pair<size_type, size_type> pos = find_position(key);
653	return pos.first == ILLEGAL_BUCKET ? 0 : 1;
654	}
655
656	// Likewise, equal_range doesn't really make sense for us. Oh well.
657	pair<iterator,iterator> equal_range(const key_type& key) {
658	const iterator pos = find(key); // either an iterator or end
659	return pair<iterator,iterator>(pos, pos);
660	}
661	pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
662	const const_iterator pos = find(key); // either an iterator or end
663	return pair<iterator,iterator>(pos, pos);
664	}
665
666
667	// INSERTION ROUTINES
668	private:
669	// If you know *this is big enough to hold obj, use this routine
670	pair<iterator, bool> insert_noresize(const value_type& obj) {
671	const pair<size_type,size_type> pos = find_position(get_key(obj));
672	if ( pos.first != ILLEGAL_BUCKET) { // object was already there
673	return pair<iterator,bool>(iterator(this, table + pos.first,
674	table + num_buckets, false),
675	false); // false: we didn't insert
676	} else { // pos.second says where to put it
677	if ( test_deleted(pos.second) ) { // just replace if it's been del.
678	const_iterator delpos(this, table + pos.second, // shrug:
679	table + num_buckets, false);// shouldn't need const
680	clear_deleted(delpos);
681	assert( num_deleted > 0);
682	--num_deleted; // used to be, now it isn't
683	} else {
684	++num_elements; // replacing an empty bucket
685	}
686	set_value(&table[pos.second], obj);
687	return pair<iterator,bool>(iterator(this, table + pos.second,
688	table + num_buckets, false),
689	true); // true: we did insert
690	}
691	}
692
693	public:
694	// This is the normal insert routine, used by the outside world
695	pair<iterator, bool> insert(const value_type& obj) {
696	resize_delta(1); // adding an object, grow if need be
697	return insert_noresize(obj);
698	}
699
700	// When inserting a lot at a time, we specialize on the type of iterator
701	template <class InputIterator>
702	void insert(InputIterator f, InputIterator l) {
703	// specializes on iterator type
704	insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
705	}
706
707	// Iterator supports operator-, resize before inserting
708	template <class ForwardIterator>
709	void insert(ForwardIterator f, ForwardIterator l,
710	STL_NAMESPACE::forward_iterator_tag) {
711	size_type n = STL_NAMESPACE::distance(f, l); // TODO(csilvers): standard?
712	resize_delta(n);
713	for ( ; n > 0; --n, ++f)
714	insert_noresize(*f);
715	}
716
717	// Arbitrary iterator, can't tell how much to resize
718	template <class InputIterator>
719	void insert(InputIterator f, InputIterator l,
720	STL_NAMESPACE::input_iterator_tag) {
721	for ( ; f != l; ++f)
722	insert(*f);
723	}
724
725	// DELETION ROUTINES
726	size_type erase(const key_type& key) {
727	const_iterator pos = find(key); // shrug: shouldn't need to be const
728	if ( pos != end() ) {
729	assert(!test_deleted(pos)); // or find() shouldn't have returned it
730	set_deleted(pos);
731	++num_deleted;
732	consider_shrink = true; // will think about shrink after next insert
733	return 1; // because we deleted one thing
734	} else {
735	return 0; // because we deleted nothing
736	}
737	}
738
739	// This is really evil: really it should be iterator, not const_iterator.
740	// But...the only reason keys are const is to allow lookup.
741	// Since that's a moot issue for deleted keys, we allow const_iterators
742	void erase(const_iterator pos) {
743	if ( pos == end() ) return; // sanity check
744	if ( set_deleted(pos) ) { // true if object has been newly deleted
745	++num_deleted;
746	consider_shrink = true; // will think about shrink after next insert
747	}
748	}
749
750	void erase(const_iterator f, const_iterator l) {
751	for ( ; f != l; ++f) {
752	if ( set_deleted(f) ) // should always be true
753	++num_deleted;
754	}
755	consider_shrink = true; // will think about shrink after next insert
756	}
757
758
759	// COMPARISON
760	bool operator==(const dense_hashtable& ht) const {
761	// We really want to check that the hash functions are the same
762	// but alas there's no way to do this. We just hope.
763	return ( num_deleted == ht.num_deleted && table == ht.table );
764	}
765	bool operator!=(const dense_hashtable& ht) const {
766	return !(*this == ht);
767	}
768
769
770	// I/O
771	// We support reading and writing hashtables to disk. Alas, since
772	// I don't know how to write a hasher or key_equal, you have to make
773	// sure everything but the table is the same. We compact before writing
774	//
775	// NOTE: These functions are currently TODO. They've not been implemented.
776	bool write_metadata(FILE *fp) {
777	squash_deleted(); // so we don't have to worry about delkey
778	return false; // TODO
779	}
780
781	bool read_metadata(FILE *fp) {
782	num_deleted = 0; // since we got rid before writing
783	assert(use_empty); // have to set this before calling us
784	if (table) free(table); // we'll make our own
785	// TODO: read magic number
786	// TODO: read num_buckets
787	reset_thresholds();
788	table = (value_type ) malloc(num_buckets sizeof(*table));
789	assert(table);
790	set_empty(0, num_buckets);
791	// TODO: read num_elements
792	for ( size_type i = 0; i < num_elements; ++i ) {
793	// TODO: read bucket_num
794	// TODO: set with non-empty, non-deleted value
795	}
796	return false; // TODO
797	}
798
799	// If your keys and values are simple enough, we can write them
800	// to disk for you. "simple enough" means no pointers.
801	// However, we don't try to normalize endianness
802	bool write_nopointer_data(FILE *fp) const {
803	for ( const_iterator it = begin(); it != end(); ++it ) {
804	// TODO: skip empty/deleted values
805	if ( !fwrite(&it, sizeof(it), 1, fp) ) return false;
806	}
807	return false; // TODO
808	}
809
810	// When reading, we have to override the potential const-ness of *it
811	bool read_nopointer_data(FILE *fp) {
812	for ( iterator it = begin(); it != end(); ++it ) {
813	// TODO: skip empty/deleted values
814	if ( !fread(reinterpret_cast<void>(&(it)), sizeof(*it), 1, fp) )
815	return false;
816	}
817	return false; // TODO
818	}
819
820	private:
821	// We need to enforce that our value_type is a Plain Old Data Type
822	// (so we know realloc and memmove will work). We use traits to
823	// enforce this. The following gives a compile-time error if
824	// is_POD_type is false (which is the default for user types).
825	//
826	// IF YOU GET AN ERROR HERE, make sure your class is a POD type,
827	// and if so tell the compiler via code similar to this:
828	// template<> struct __type_traits<classname> {
829	// typedef __true_type has_trivial_default_constructor;
830	// typedef __true_type has_trivial_copy_constructor;
831	// typedef __true_type has_trivial_assignment_operator;
832	// typedef __true_type has_trivial_destructor;
833	// typedef __true_type is_POD_type;
834	// };
835	//
836	// If this is part of a hash_map, you need to make sure both the
837	// Key and Value types are POD types, if they're user-defined.
838	#ifdef UNDERSTANDS_TYPE_TRAITS
839	static __true_type * const enforce_pod;
840	#endif
841
842	// The actual data
843	hasher hash; // required by hashed_associative_container
844	key_equal equals;
845	ExtractKey get_key;
846	size_type num_deleted; // how many occupied buckets are marked deleted
847	bool use_deleted; // false until delkey has been set
848	bool use_empty; // you must do this before you start
849	bool empty_is_zero; // can optimize this case when emptying
850	key_type delkey; // which key marks deleted entries
851	key_type emptykey; // which key marks unused entries
852	value_type *table;
853	size_type num_buckets;
854	size_type num_elements;
855	size_type shrink_threshold; // num_buckets * HT_EMPTY_FLT
856	size_type enlarge_threshold; // num_buckets * HT_OCCUPANCY_FLT
857	bool consider_shrink; // true if we should try to shrink before next insert
858
859	void reset_thresholds() {
860	enlarge_threshold = static_cast<size_type>(num_buckets*HT_OCCUPANCY_FLT);
861	shrink_threshold = static_cast<size_type>(num_buckets*HT_EMPTY_FLT);
862	consider_shrink = false; // whatever caused us to reset already considered
863	}
864	};
865
866	// We need a global swap as well
867	template <class V, class K, class HF, class ExK, class EqK, class A>
868	inline void swap(dense_hashtable<V,K,HF,ExK,EqK,A> &x,
869	dense_hashtable<V,K,HF,ExK,EqK,A> &y) {
870	x.swap(y);
871	}
872
873	#ifdef UNDERSTANDS_TYPE_TRAITS
874	template <class V, class K, class HF, class ExK, class EqK, class A>
875	__true_type * const dense_hashtable<V,K,HF,ExK,EqK,A>::enforce_pod =
876	static_cast<typename __type_traits<value_type>::is_POD_type *>(0);
877	#endif
878
879	#undef JUMP_
880
881	template <class V, class K, class HF, class ExK, class EqK, class A>
882	const typename dense_hashtable<V,K,HF,ExK,EqK,A>::size_type
883	dense_hashtable<V,K,HF,ExK,EqK,A>::ILLEGAL_BUCKET;
884
885	// How full we let the table get before we resize. Knuth says .8 is
886	// good -- higher causes us to probe too much, though saves memory
887	template <class V, class K, class HF, class ExK, class EqK, class A>
888	const float dense_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT = 0.5f;
889
890	// How empty we let the table get before we resize lower.
891	// It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
892	template <class V, class K, class HF, class ExK, class EqK, class A>
893	const float dense_hashtable<V,K,HF,ExK,EqK,A>::HT_EMPTY_FLT = 0.4 *
894	dense_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT;
895
896	_END_GOOGLE_NAMESPACE_
897
898	#endif /* _DENSEHASHTABLE_H_ */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: GTP/trunk/Lib/Vis/Preprocessing/src/sparsehash/src/google/sparsehash/densehashtable.h @ 2162

Download in other formats: