source: GTP/trunk/Lib/Vis/Preprocessing/src/sparsehash/src/google/sparsehash/densehashtable.h @ 2162

Revision 2162, 37.2 KB checked in by mattausch, 17 years ago (diff)

improved hash performance with google hashmap

Line 
1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Craig Silverstein
32//
33// A dense hashtable is a particular implementation of
34// a hashtable: one that is meant to minimize memory allocation.
35// It does this by using an array to store all the data.  We
36// steal a value from the key space to indicate "empty" array
37// elements (ie indices where no item lives) and another to indicate
38// "deleted" elements.
39//
40// (Note it is possible to change the value of the delete key
41// on the fly; you can even remove it, though after that point
42// the hashtable is insert_only until you set it again.  The empty
43// value however can't be changed.)
44//
45// This code is possibly a bit faster if the empty value is 0.
46//
47// To minimize allocation and pointer overhead, we use internal
48// probing, in which the hashtable is a single table, and collisions
49// are resolved by trying to insert again in another bucket.  The
50// most cache-efficient internal probing schemes are linear probing
51// (which suffers, alas, from clumping) and quadratic probing, which
52// is what we implement by default.
53//
54// Type requirements: T must be a Plain Old Data Type (POD Type), since
55// we use placement new but never call a destructor.  To ensure this,
56// you must defined a __type_traits struct for T if it is not a basic
57// type (see type_traits.h).  Also, those imposed by the requirements
58// of Random Access Container.
59//
60// You probably shouldn't use this code directly.  Use
61// <google/dense_hash_map> or <google/dense_hash_set> instead.
62
63// You can change the following below:
64// HT_OCCUPANCY_FLT      -- how full before we double size
65// HT_EMPTY_FLT          -- how empty before we halve size
66// HT_MIN_BUCKETS        -- default smallest bucket size
67//
68// How to decide what values to use?
69// HT_EMPTY_FLT's default of .4 * OCCUPANCY_FLT, is probably good.
70// HT_MIN_BUCKETS is probably unnecessary since you can specify
71// (indirectly) the starting number of buckets at construct-time.
72// For HT_OCCUPANCY_FLT, you can use this chart to try to trade-off
73// expected lookup time to the space taken up.  By default, this
74// code uses quadratic probing, though you can change it to linear
75// via _JUMP below if you really want to.
76//
77// From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
78// NUMBER OF PROBES / LOOKUP       Successful            Unsuccessful
79// Quadratic collision resolution   1 - ln(1-L) - L/2    1/(1-L) - L - ln(1-L)
80// Linear collision resolution     [1+1/(1-L)]/2         [1+1/(1-L)2]/2
81//
82// -- HT_OCCUPANCY_FLT --         0.10  0.50  0.60  0.75  0.80  0.90  0.99
83// QUADRATIC COLLISION RES.
84//    probes/successful lookup    1.05  1.44  1.62  2.01  2.21  2.85  5.11
85//    probes/unsuccessful lookup  1.11  2.19  2.82  4.64  5.81  11.4  103.6
86// LINEAR COLLISION RES.
87//    probes/successful lookup    1.06  1.5   1.75  2.5   3.0   5.5   50.5
88//    probes/unsuccessful lookup  1.12  2.5   3.6   8.5   13.0  50.0  5000.0
89
90#ifndef _DENSEHASHTABLE_H_
91#define _DENSEHASHTABLE_H_
92
93// The probing method
94// Linear probing
95// #define JUMP_(key, num_probes)    ( 1 )
96// Quadratic-ish probing
97#define JUMP_(key, num_probes)    ( num_probes )
98
99
100// Hashtable class, used to implement the hashed associative containers
101// hash_set and hash_map.
102
103#include <google/sparsehash/config.h>
104#include <assert.h>
105#include <algorithm>            // For swap(), eg
106
107_START_GOOGLE_NAMESPACE_
108
109using STL_NAMESPACE::pair;
110
111template <class Value, class Key, class HashFcn,
112          class ExtractKey, class EqualKey, class Alloc>
113class dense_hashtable;
114
115template <class V, class K, class HF, class ExK, class EqK, class A>
116struct dense_hashtable_iterator;
117
118template <class V, class K, class HF, class ExK, class EqK, class A>
119struct dense_hashtable_const_iterator;
120
121// We're just an array, but we need to skip over empty and deleted elements
122template <class V, class K, class HF, class ExK, class EqK, class A>
123struct dense_hashtable_iterator {
124 public:
125  typedef dense_hashtable<V,K,HF,ExK,EqK,A>                dense_hashtable;
126  typedef dense_hashtable_iterator<V,K,HF,ExK,EqK,A>       iterator;
127  typedef dense_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
128
129#ifdef UNDERSTANDS_ITERATOR_TAGS
130  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
131#endif
132  typedef V value_type;
133  typedef ptrdiff_t difference_type;
134  typedef size_t size_type;
135  typedef V& reference;                // Value
136  typedef V* pointer;
137
138  // "Real" constructor and default constructor
139  dense_hashtable_iterator(const dense_hashtable *h,
140                           pointer it, pointer it_end, bool advance)
141    : ht(h), pos(it), end(it_end)   {
142    if (advance)  advance_past_empty_and_deleted();
143  }
144  dense_hashtable_iterator() { }
145  // The default destructor is fine; we don't define one
146  // The default operator= is fine; we don't define one
147
148  // Happy dereferencer
149  reference operator*() const { return *pos; }
150  pointer operator->() const { return &(operator*()); }
151
152  // Arithmetic.  The only hard part is making sure that
153  // we're not on an empty or marked-deleted array element
154  void advance_past_empty_and_deleted() {
155    while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
156      ++pos;
157  }
158  iterator& operator++()   {
159    assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
160  }
161  iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
162
163  // Comparison.
164  bool operator==(const iterator& it) const { return pos == it.pos; }
165  bool operator!=(const iterator& it) const { return pos != it.pos; }
166
167
168  // The actual data
169  const dense_hashtable *ht;
170  pointer pos, end;
171};
172
173
174// Now do it all again, but with const-ness!
175template <class V, class K, class HF, class ExK, class EqK, class A>
176struct dense_hashtable_const_iterator {
177 public:
178  typedef dense_hashtable<V,K,HF,ExK,EqK,A>                dense_hashtable;
179  typedef dense_hashtable_iterator<V,K,HF,ExK,EqK,A>       iterator;
180  typedef dense_hashtable_const_iterator<V,K,HF,ExK,EqK,A> const_iterator;
181
182#ifdef UNDERSTANDS_ITERATOR_TAGS
183  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
184#endif
185  typedef V value_type;
186  typedef ptrdiff_t difference_type;
187  typedef size_t size_type;
188  typedef const V& reference;                // Value
189  typedef const V* pointer;
190
191  // "Real" constructor and default constructor
192  dense_hashtable_const_iterator(const dense_hashtable *h,
193                                 pointer it, pointer it_end, bool advance)
194    : ht(h), pos(it), end(it_end)   {
195    if (advance)  advance_past_empty_and_deleted();
196  }
197  dense_hashtable_const_iterator() { }
198  // This lets us convert regular iterators to const iterators
199  dense_hashtable_const_iterator(const iterator &it)
200    : ht(it.ht), pos(it.pos), end(it.end) { }
201  // The default destructor is fine; we don't define one
202  // The default operator= is fine; we don't define one
203
204  // Happy dereferencer
205  reference operator*() const { return *pos; }
206  pointer operator->() const { return &(operator*()); }
207
208  // Arithmetic.  The only hard part is making sure that
209  // we're not on an empty or marked-deleted array element
210  void advance_past_empty_and_deleted() {
211    while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) )
212      ++pos;
213  }
214  const_iterator& operator++()   {
215    assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this;
216  }
217  const_iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
218
219  // Comparison.
220  bool operator==(const const_iterator& it) const { return pos == it.pos; }
221  bool operator!=(const const_iterator& it) const { return pos != it.pos; }
222
223
224  // The actual data
225  const dense_hashtable *ht;
226  pointer pos, end;
227};
228
229template <class Value, class Key, class HashFcn,
230          class ExtractKey, class EqualKey, class Alloc>
231class dense_hashtable {
232 public:
233  typedef Key key_type;
234  typedef Value value_type;
235  typedef HashFcn hasher;
236  typedef EqualKey key_equal;
237
238  typedef size_t            size_type;
239  typedef ptrdiff_t         difference_type;
240  typedef value_type*       pointer;
241  typedef const value_type* const_pointer;
242  typedef value_type&       reference;
243  typedef const value_type& const_reference;
244  typedef dense_hashtable_iterator<Value, Key, HashFcn,
245                                   ExtractKey, EqualKey, Alloc>
246  iterator;
247
248  typedef dense_hashtable_const_iterator<Value, Key, HashFcn,
249                                          ExtractKey, EqualKey, Alloc>
250  const_iterator;
251
252  // How full we let the table get before we resize.  Knuth says .8 is
253  // good -- higher causes us to probe too much, though saves memory
254  static const float HT_OCCUPANCY_FLT; // = 0.8;
255
256  // How empty we let the table get before we resize lower.
257  // It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
258  static const float HT_EMPTY_FLT; // = 0.4 * HT_OCCUPANCY_FLT
259
260  // Minimum size we're willing to let hashtables be.
261  // Must be a power of two, and at least 4.
262  // Note, however, that for a given hashtable, the minimum size is
263  // determined by the first constructor arg, and may be >HT_MIN_BUCKETS.
264  static const size_t HT_MIN_BUCKETS  = 32;
265
266
267  // ITERATOR FUNCTIONS
268  iterator begin()             { return iterator(this, table,
269                                                 table + num_buckets, true); }
270  iterator end()               { return iterator(this, table + num_buckets,
271                                                 table + num_buckets, true); }
272  const_iterator begin() const { return const_iterator(this, table,
273                                                       table+num_buckets,true);}
274  const_iterator end() const   { return const_iterator(this, table + num_buckets,
275                                                       table+num_buckets,true);}
276
277  // ACCESSOR FUNCTIONS for the things we templatize on, basically
278  hasher hash_funct() const { return hash; }
279  key_equal key_eq() const  { return equals; }
280
281  // Annoyingly, we can't copy values around, because they might have
282  // const components (they're probably pair<const X, Y>).  We use
283  // placement new to get around this.  Arg.
284 private:
285  void set_value(value_type* dst, const value_type src) {
286    new(dst) value_type(src);
287  }
288
289  void set_key(key_type* dst, const key_type src) {
290    new(dst) key_type(src);   // used for set_deleted_key(), etc
291  }
292
293  // DELETE HELPER FUNCTIONS
294  // This lets the user describe a key that will indicate deleted
295  // table entries.  This key should be an "impossible" entry --
296  // if you try to insert it for real, you won't be able to retrieve it!
297  // (NB: while you pass in an entire value, only the key part is looked
298  // at.  This is just because I don't know how to assign just a key.)
299 private:
300  void squash_deleted() {           // gets rid of any deleted entries we have
301    if ( num_deleted ) {            // get rid of deleted before writing
302      dense_hashtable tmp(*this);   // copying will get rid of deleted
303      swap(tmp);                    // now we are tmp
304    }
305    assert(num_deleted == 0);
306  }
307
308 public:
309  void set_deleted_key(const key_type &key) {
310    // It's only safe to change what "deleted" means if we purge deleted guys
311    squash_deleted();
312    use_deleted = true;
313    set_key(&delkey, key);            // save the key
314  }
315  void clear_deleted_key() {
316    squash_deleted();
317    use_deleted = false;
318  }
319
320  // These are public so the iterators can use them
321  // True if the item at position bucknum is "deleted" marker
322  bool test_deleted(size_type bucknum) const {
323    // The num_deleted test is crucial for read(): after read(), the ht values
324    // are garbage, and we don't want to think some of them are deleted.
325    return (use_deleted && num_deleted > 0 &&
326            equals(delkey, get_key(table[bucknum])));
327  }
328  bool test_deleted(const iterator &it) const {
329    return (use_deleted && num_deleted > 0 &&
330            equals(delkey, get_key(*it)));
331  }
332  bool test_deleted(const const_iterator &it) const {
333    return (use_deleted && num_deleted > 0 &&
334            equals(delkey, get_key(*it)));
335  }
336  // Set it so test_deleted is true.  true if object didn't used to be deleted
337  // See below (at erase()) to explain why we allow const_iterators
338  bool set_deleted(const_iterator &it) {
339    assert(use_deleted);             // bad if set_deleted_key() wasn't called
340    bool retval = !test_deleted(it);
341    set_key(const_cast<key_type*>(&get_key(*it)), delkey);
342    return retval;
343  }
344  // Set it so test_deleted is false.  true if object used to be deleted
345  bool clear_deleted(const_iterator &it) {
346    assert(use_deleted);             // bad if set_deleted_key() wasn't called
347    // happens automatically when we assign something else in its place
348    return test_deleted(it);
349  }
350
351  // EMPTY HELPER FUNCTIONS
352  // This lets the user describe a key that will indicate empty (unused)
353  // table entries.  This key should be an "impossible" entry --
354  // if you try to insert it for real, you won't be able to retrieve it!
355  // (NB: while you pass in an entire value, only the key part is looked
356  // at.  This is just because I don't know how to assign just a key.)
357 public:
358  // These are public so the iterators can use them
359  // True if the item at position bucknum is "empty" marker
360  bool test_empty(size_type bucknum) const {
361    assert(use_empty);              // we always need to know what's empty!
362    return equals(emptykey, get_key(table[bucknum]));
363  }
364  bool test_empty(const iterator &it) const {
365    assert(use_empty);              // we always need to know what's empty!
366    return equals(emptykey, get_key(*it));
367  }
368  bool test_empty(const const_iterator &it) const {
369    assert(use_empty);              // we always need to know what's empty!
370    return equals(emptykey, get_key(*it));
371  }
372
373 private:
374  // You can either set a range empty or an individual element
375  void set_empty(size_type bucknum) {
376    assert(use_empty);
377    set_key(const_cast<key_type*>(&get_key(table[bucknum])), emptykey);
378  }
379  void set_empty(size_type buckstart, size_type buckend) {
380    assert(use_empty);
381    if ( empty_is_zero )          // we can empty a lot of buckets at once
382      memset(table + buckstart, 0, (buckend-buckstart) * sizeof(*table));
383    else
384      for ( ; buckstart < buckend; buckstart++ )
385        set_empty(buckstart);
386  }
387
388 public:
389  void set_empty_key(const key_type &key) {
390    // Once you set the empty key, you can't change it
391    assert(!use_empty);
392    use_empty = true;
393    set_key(&emptykey, key);
394    empty_is_zero = true;                    // true if key is all 0's
395    for ( size_t i = 0; i < sizeof(emptykey); ++i )
396      if ( (reinterpret_cast<const char *>(&emptykey))[i] != 0 ) {
397        empty_is_zero = false;               // won't be able to optimize :-(
398        break;
399      }
400    empty_is_zero = false;                   // empty heuristic seems to hurt
401
402    assert(!table);                  // must set before first use
403    // num_buckets was set in constructor even though table was NULL
404    table = (value_type *) malloc(num_buckets * sizeof(*table));
405    assert(table);
406    set_empty(0, num_buckets);
407  }
408
409  // FUNCTIONS CONCERNING SIZE
410 public:
411  size_type size() const      { return num_elements - num_deleted; }
412  // Buckets are always a power of 2
413  size_type max_size() const  { return (size_type(-1) >> 1U) + 1; }
414  bool empty() const          { return size() == 0; }
415  size_type bucket_count() const      { return num_buckets; }
416  size_type max_bucket_count() const  { return max_size(); }
417
418 private:
419  // Because of the above, size_type(-1) is never legal; use it for errors
420  static const size_type ILLEGAL_BUCKET = size_type(-1);
421
422 private:
423  // This is the smallest size a hashtable can be without being too crowded
424  // If you like, you can give a min #buckets as well as a min #elts
425  size_type min_size(size_type num_elts, size_type min_buckets_wanted) {
426    size_type sz = HT_MIN_BUCKETS;             // min buckets allowed
427    while ( sz < min_buckets_wanted || num_elts >= sz * HT_OCCUPANCY_FLT )
428      sz *= 2;
429    return sz;
430  }
431
432  // Used after a string of deletes
433  void maybe_shrink() {
434    assert(num_elements >= num_deleted);
435    assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
436    assert(bucket_count() >= HT_MIN_BUCKETS);
437
438    if ( (num_elements-num_deleted) <= shrink_threshold &&
439         bucket_count() > HT_MIN_BUCKETS ) {
440      size_type sz = bucket_count() / 2;    // find how much we should shrink
441      while ( sz > HT_MIN_BUCKETS &&
442              (num_elements - num_deleted) <= sz * HT_EMPTY_FLT )
443        sz /= 2;                            // stay a power of 2
444      dense_hashtable tmp(*this, sz);       // Do the actual resizing
445      swap(tmp);                            // now we are tmp
446    }
447    consider_shrink = false;                // because we just considered it
448  }
449
450  // We'll let you resize a hashtable -- though this makes us copy all!
451  // When you resize, you say, "make it big enough for this many more elements"
452  void resize_delta(size_type delta, size_type min_buckets_wanted = 0) {
453    if ( consider_shrink )                   // see if lots of deletes happened
454      maybe_shrink();
455    if ( bucket_count() > min_buckets_wanted &&
456         (num_elements + delta) <= enlarge_threshold )
457      return;                                // we're ok as we are
458
459    const size_type resize_to = min_size(num_elements + delta,
460                                         min_buckets_wanted);
461    if ( resize_to > bucket_count() ) {      // we don't have enough buckets
462      dense_hashtable tmp(*this, resize_to);
463      swap(tmp);                             // now we are tmp
464    }
465  }
466
467  // Used to actually do the rehashing when we grow/shrink a hashtable
468  void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted = 0) {
469    clear();            // clear table, set num_deleted to 0
470
471    // If we need to change the size of our table, do it now
472    const size_type resize_to = min_size(ht.size(), min_buckets_wanted);
473    if ( resize_to > bucket_count() ) {      // we don't have enough buckets
474      table = (value_type *) realloc(table, resize_to * sizeof(*table));
475      assert(table != NULL);
476      set_empty(num_buckets, resize_to);     // empty everything between them
477      num_buckets = resize_to;
478      reset_thresholds();
479    }
480
481    // We use a normal iterator to get non-deleted bcks from ht
482    // We could use insert() here, but since we know there are
483    // no duplicates and no deleted items, we can be more efficient
484    assert((bucket_count() & (bucket_count()-1)) == 0);      // a power of two
485    for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
486      size_type num_probes = 0;              // how many times we've probed
487      size_type bucknum;
488      const size_type bucket_count_minus_one = bucket_count() - 1;
489      for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
490           !test_empty(bucknum);                               // not empty
491           bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
492        ++num_probes;
493        assert(num_probes < bucket_count()); // or else the hashtable is full
494      }
495      set_value(&table[bucknum], *it);       // copies the value to here
496      num_elements++;
497    }
498  }
499
500  // Required by the spec for hashed associative container
501 public:
502  // Though the docs say this should be num_buckets, I think it's much
503  // more useful as req_elements.  As a special feature, calling with
504  // req_elements==0 will cause us to shrink if we can, saving space.
505  void resize(size_type req_elements) {       // resize to this or larger
506    if ( consider_shrink || req_elements == 0 )
507      maybe_shrink();
508    if ( req_elements > num_elements )
509      return resize_delta(req_elements - num_elements, 0);
510  }
511
512
513  // CONSTRUCTORS -- as required by the specs, we take a size,
514  // but also let you specify a hashfunction, key comparator,
515  // and key extractor.  We also define a copy constructor and =.
516  // DESTRUCTOR -- needs to free the table
517  explicit dense_hashtable(size_type n = 0,
518                           const HashFcn& hf = HashFcn(),
519                           const EqualKey& eql = EqualKey(),
520                           const ExtractKey& ext = ExtractKey())
521    : hash(hf), equals(eql), get_key(ext), num_deleted(0),
522      use_deleted(false), use_empty(false),
523      empty_is_zero(false), delkey(), emptykey(),
524      table(NULL), num_buckets(min_size(0, n)), num_elements(0) {
525    // table is NULL until emptykey is set.  However, we set num_buckets
526    // here so we know how much space to allocate once emptykey is set
527    reset_thresholds();
528  }
529
530  // As a convenience for resize(), we allow an optional second argument
531  // which lets you make this new hashtable a different size than ht
532  dense_hashtable(const dense_hashtable& ht, size_type min_buckets_wanted = 0)
533    : hash(ht.hash), equals(ht.equals), get_key(ht.get_key), num_deleted(0),
534      use_deleted(ht.use_deleted), use_empty(ht.use_empty),
535      empty_is_zero(ht.empty_is_zero), delkey(ht.delkey), emptykey(ht.emptykey),
536      table(NULL), num_buckets(min_size(0, min_buckets_wanted)),
537      num_elements(0) {
538    reset_thresholds();
539    copy_from(ht, min_buckets_wanted);   // copy_from() ignores deleted entries
540  }
541
542  dense_hashtable& operator= (const dense_hashtable& ht) {
543    if (&ht == this)  return *this;        // don't copy onto ourselves
544    clear();
545    hash = ht.hash;
546    equals = ht.equals;
547    get_key = ht.get_key;
548    use_deleted = ht.use_deleted;
549    use_empty = ht.use_empty;
550    empty_is_zero = ht.empty_is_zero;
551    set_key(&delkey, ht.delkey);
552    set_key(&emptykey, ht.emptykey);
553    copy_from(ht);                         // sets num_deleted to 0 too
554    return *this;
555  }
556
557  ~dense_hashtable() {
558    if (table)  free(table);
559  }
560
561  // Many STL algorithms use swap instead of copy constructors
562  void swap(dense_hashtable& ht) {
563    STL_NAMESPACE::swap(hash, ht.hash);
564    STL_NAMESPACE::swap(equals, ht.equals);
565    STL_NAMESPACE::swap(get_key, ht.get_key);
566    STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
567    STL_NAMESPACE::swap(use_deleted, ht.use_deleted);
568    STL_NAMESPACE::swap(use_empty, ht.use_empty);
569    STL_NAMESPACE::swap(empty_is_zero, ht.empty_is_zero);
570    { key_type tmp;     // for annoying reasons, swap() doesn't work
571      set_key(&tmp, delkey);
572      set_key(&delkey, ht.delkey);
573      set_key(&ht.delkey, tmp);
574    }
575    { key_type tmp;     // for annoying reasons, swap() doesn't work
576      set_key(&tmp, emptykey);
577      set_key(&emptykey, ht.emptykey);
578      set_key(&ht.emptykey, tmp);
579    }
580    STL_NAMESPACE::swap(table, ht.table);
581    STL_NAMESPACE::swap(num_buckets, ht.num_buckets);
582    STL_NAMESPACE::swap(num_elements, ht.num_elements);
583    reset_thresholds();
584    ht.reset_thresholds();
585  }
586
587  // It's always nice to be able to clear a table without deallocating it
588  void clear() {
589    num_buckets = min_size(0,0);          // our new size
590    reset_thresholds();
591    table = (value_type *) realloc(table, num_buckets * sizeof(*table));
592    assert(table);
593    set_empty(0, num_buckets);
594    num_elements = 0;
595    num_deleted = 0;
596  }
597
598
599  // LOOKUP ROUTINES
600 private:
601  // Returns a pair of positions: 1st where the object is, 2nd where
602  // it would go if you wanted to insert it.  1st is ILLEGAL_BUCKET
603  // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
604  // Note: because of deletions where-to-insert is not trivial: it's the
605  // first deleted bucket we see, as long as we don't find the key later
606  pair<size_type, size_type> find_position(const key_type &key) const {
607    size_type num_probes = 0;              // how many times we've probed
608    const size_type bucket_count_minus_one = bucket_count() - 1;
609    size_type bucknum = hash(key) & bucket_count_minus_one;
610    size_type insert_pos = ILLEGAL_BUCKET; // where we would insert
611    while ( 1 ) {                          // probe until something happens
612      if ( test_empty(bucknum) ) {         // bucket is empty
613        if ( insert_pos == ILLEGAL_BUCKET )   // found no prior place to insert
614          return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
615        else
616          return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
617
618      } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
619        if ( insert_pos == ILLEGAL_BUCKET )
620          insert_pos = bucknum;
621
622      } else if ( equals(key, get_key(table[bucknum])) ) {
623        return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
624      }
625      ++num_probes;                        // we're doing another probe
626      bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
627      assert(num_probes < bucket_count()); // don't probe too many times!
628    }
629  }
630
631 public:
632  iterator find(const key_type& key) {
633    if ( size() == 0 ) return end();
634    pair<size_type, size_type> pos = find_position(key);
635    if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
636      return end();
637    else
638      return iterator(this, table + pos.first, table + num_buckets, false);
639  }
640
641  const_iterator find(const key_type& key) const {
642    if ( size() == 0 ) return end();
643    pair<size_type, size_type> pos = find_position(key);
644    if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
645      return end();
646    else
647      return const_iterator(this, table + pos.first, table+num_buckets, false);
648  }
649
650  // Counts how many elements have key key.  For maps, it's either 0 or 1.
651  size_type count(const key_type &key) const {
652    pair<size_type, size_type> pos = find_position(key);
653    return pos.first == ILLEGAL_BUCKET ? 0 : 1;
654  }
655
656  // Likewise, equal_range doesn't really make sense for us.  Oh well.
657  pair<iterator,iterator> equal_range(const key_type& key) {
658    const iterator pos = find(key);      // either an iterator or end
659    return pair<iterator,iterator>(pos, pos);
660  }
661  pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
662    const const_iterator pos = find(key);      // either an iterator or end
663    return pair<iterator,iterator>(pos, pos);
664  }
665
666
667  // INSERTION ROUTINES
668 private:
669  // If you know *this is big enough to hold obj, use this routine
670  pair<iterator, bool> insert_noresize(const value_type& obj) {
671    const pair<size_type,size_type> pos = find_position(get_key(obj));
672    if ( pos.first != ILLEGAL_BUCKET) {      // object was already there
673      return pair<iterator,bool>(iterator(this, table + pos.first,
674                                          table + num_buckets, false),
675                                 false);          // false: we didn't insert
676    } else {                                 // pos.second says where to put it
677      if ( test_deleted(pos.second) ) {      // just replace if it's been del.
678        const_iterator delpos(this, table + pos.second,              // shrug:
679                              table + num_buckets, false);// shouldn't need const
680        clear_deleted(delpos);
681        assert( num_deleted > 0);
682        --num_deleted;                       // used to be, now it isn't
683      } else {
684        ++num_elements;                      // replacing an empty bucket
685      }
686      set_value(&table[pos.second], obj);
687      return pair<iterator,bool>(iterator(this, table + pos.second,
688                                          table + num_buckets, false),
689                                 true);           // true: we did insert
690    }
691  }
692
693 public:
694  // This is the normal insert routine, used by the outside world
695  pair<iterator, bool> insert(const value_type& obj) {
696    resize_delta(1);                      // adding an object, grow if need be
697    return insert_noresize(obj);
698  }
699
700  // When inserting a lot at a time, we specialize on the type of iterator
701  template <class InputIterator>
702  void insert(InputIterator f, InputIterator l) {
703    // specializes on iterator type
704    insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
705  }
706
707  // Iterator supports operator-, resize before inserting
708  template <class ForwardIterator>
709  void insert(ForwardIterator f, ForwardIterator l,
710              STL_NAMESPACE::forward_iterator_tag) {
711    size_type n = STL_NAMESPACE::distance(f, l);   // TODO(csilvers): standard?
712    resize_delta(n);
713    for ( ; n > 0; --n, ++f)
714      insert_noresize(*f);
715  }
716
717  // Arbitrary iterator, can't tell how much to resize
718  template <class InputIterator>
719  void insert(InputIterator f, InputIterator l,
720              STL_NAMESPACE::input_iterator_tag) {
721    for ( ; f != l; ++f)
722      insert(*f);
723  }
724
725  // DELETION ROUTINES
726  size_type erase(const key_type& key) {
727    const_iterator pos = find(key);   // shrug: shouldn't need to be const
728    if ( pos != end() ) {
729      assert(!test_deleted(pos));  // or find() shouldn't have returned it
730      set_deleted(pos);
731      ++num_deleted;
732      consider_shrink = true;      // will think about shrink after next insert
733      return 1;                    // because we deleted one thing
734    } else {
735      return 0;                    // because we deleted nothing
736    }
737  }
738
739  // This is really evil: really it should be iterator, not const_iterator.
740  // But...the only reason keys are const is to allow lookup.
741  // Since that's a moot issue for deleted keys, we allow const_iterators
742  void erase(const_iterator pos) {
743    if ( pos == end() ) return;    // sanity check
744    if ( set_deleted(pos) ) {      // true if object has been newly deleted
745      ++num_deleted;
746      consider_shrink = true;      // will think about shrink after next insert
747    }
748  }
749
750  void erase(const_iterator f, const_iterator l) {
751    for ( ; f != l; ++f) {
752      if ( set_deleted(f)  )       // should always be true
753        ++num_deleted;
754    }
755    consider_shrink = true;        // will think about shrink after next insert
756  }
757
758
759  // COMPARISON
760  bool operator==(const dense_hashtable& ht) const {
761    // We really want to check that the hash functions are the same
762    // but alas there's no way to do this.  We just hope.
763    return ( num_deleted == ht.num_deleted && table == ht.table );
764  }
765  bool operator!=(const dense_hashtable& ht) const {
766    return !(*this == ht);
767  }
768
769
770  // I/O
771  // We support reading and writing hashtables to disk.  Alas, since
772  // I don't know how to write a hasher or key_equal, you have to make
773  // sure everything but the table is the same.  We compact before writing
774  //
775  // NOTE: These functions are currently TODO.  They've not been implemented.
776  bool write_metadata(FILE *fp) {
777    squash_deleted();           // so we don't have to worry about delkey
778    return false;               // TODO
779  }
780
781  bool read_metadata(FILE *fp) {
782    num_deleted = 0;            // since we got rid before writing
783    assert(use_empty);          // have to set this before calling us
784    if (table)  free(table);    // we'll make our own
785    // TODO: read magic number
786    // TODO: read num_buckets
787    reset_thresholds();
788    table = (value_type *) malloc(num_buckets * sizeof(*table));
789    assert(table);
790    set_empty(0, num_buckets);
791    // TODO: read num_elements
792    for ( size_type i = 0; i < num_elements; ++i ) {
793      // TODO: read bucket_num
794      // TODO: set with non-empty, non-deleted value
795    }
796    return false;               // TODO
797  }
798
799  // If your keys and values are simple enough, we can write them
800  // to disk for you.  "simple enough" means no pointers.
801  // However, we don't try to normalize endianness
802  bool write_nopointer_data(FILE *fp) const {
803    for ( const_iterator it = begin(); it != end(); ++it ) {
804      // TODO: skip empty/deleted values
805      if ( !fwrite(&*it, sizeof(*it), 1, fp) )  return false;
806    }
807    return false;        // TODO
808  }
809
810  // When reading, we have to override the potential const-ness of *it
811  bool read_nopointer_data(FILE *fp) {
812    for ( iterator it = begin(); it != end(); ++it ) {
813      // TODO: skip empty/deleted values
814      if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
815        return false;
816    }
817    return false;        // TODO
818  }
819
820 private:
821  // We need to enforce that our value_type is a Plain Old Data Type
822  // (so we know realloc and memmove will work).  We use traits to
823  // enforce this.  The following gives a compile-time error if
824  // is_POD_type is false (which is the default for user types).
825  //
826  // IF YOU GET AN ERROR HERE, make sure your class is a POD type,
827  // and if so tell the compiler via code similar to this:
828  // template<> struct __type_traits<classname> {
829  //   typedef __true_type    has_trivial_default_constructor;
830  //   typedef __true_type    has_trivial_copy_constructor;
831  //   typedef __true_type    has_trivial_assignment_operator;
832  //   typedef __true_type    has_trivial_destructor;
833  //   typedef __true_type    is_POD_type;
834  // };
835  //
836  // If this is part of a hash_map, you need to make sure both the
837  // Key and Value types are POD types, if they're user-defined.
838#ifdef UNDERSTANDS_TYPE_TRAITS
839  static __true_type * const enforce_pod;
840#endif
841
842  // The actual data
843  hasher hash;                      // required by hashed_associative_container
844  key_equal equals;
845  ExtractKey get_key;
846  size_type num_deleted;        // how many occupied buckets are marked deleted
847  bool use_deleted;                          // false until delkey has been set
848  bool use_empty;                          // you must do this before you start
849  bool empty_is_zero;                   // can optimize this case when emptying
850  key_type delkey;                           // which key marks deleted entries
851  key_type emptykey;                          // which key marks unused entries
852  value_type *table;
853  size_type num_buckets;
854  size_type num_elements;
855  size_type shrink_threshold;                     // num_buckets * HT_EMPTY_FLT
856  size_type enlarge_threshold;                // num_buckets * HT_OCCUPANCY_FLT
857  bool consider_shrink;   // true if we should try to shrink before next insert
858
859  void reset_thresholds() {
860    enlarge_threshold = static_cast<size_type>(num_buckets*HT_OCCUPANCY_FLT);
861    shrink_threshold = static_cast<size_type>(num_buckets*HT_EMPTY_FLT);
862    consider_shrink = false;   // whatever caused us to reset already considered
863  }
864};
865
866// We need a global swap as well
867template <class V, class K, class HF, class ExK, class EqK, class A>
868inline void swap(dense_hashtable<V,K,HF,ExK,EqK,A> &x,
869                 dense_hashtable<V,K,HF,ExK,EqK,A> &y) {
870  x.swap(y);
871}
872
873#ifdef UNDERSTANDS_TYPE_TRAITS
874template <class V, class K, class HF, class ExK, class EqK, class A>
875__true_type * const dense_hashtable<V,K,HF,ExK,EqK,A>::enforce_pod =
876static_cast<typename __type_traits<value_type>::is_POD_type *>(0);
877#endif
878
879#undef JUMP_
880
881template <class V, class K, class HF, class ExK, class EqK, class A>
882const typename dense_hashtable<V,K,HF,ExK,EqK,A>::size_type
883  dense_hashtable<V,K,HF,ExK,EqK,A>::ILLEGAL_BUCKET;
884
885// How full we let the table get before we resize.  Knuth says .8 is
886// good -- higher causes us to probe too much, though saves memory
887template <class V, class K, class HF, class ExK, class EqK, class A>
888const float dense_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT = 0.5f;
889
890// How empty we let the table get before we resize lower.
891// It should be less than OCCUPANCY_FLT / 2 or we thrash resizing
892template <class V, class K, class HF, class ExK, class EqK, class A>
893const float dense_hashtable<V,K,HF,ExK,EqK,A>::HT_EMPTY_FLT = 0.4 *
894dense_hashtable<V,K,HF,ExK,EqK,A>::HT_OCCUPANCY_FLT;
895
896_END_GOOGLE_NAMESPACE_
897
898#endif /* _DENSEHASHTABLE_H_ */
Note: See TracBrowser for help on using the repository browser.