source: GTP/trunk/Lib/Vis/Preprocessing/src/sparsehash/src/google/dense_hash_set @ 2162

Revision 2162, 9.6 KB checked in by mattausch, 17 years ago (diff)

improved hash performance with google hashmap

Line 
1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Craig Silverstein
32//
33// This is just a very thin wrapper over densehashtable.h, just
34// like sgi stl's stl_hash_set is a very thin wrapper over
35// stl_hashtable.  The major thing we define is operator[], because
36// we have a concept of a data_type which stl_hashtable doesn't
37// (it only has a key and a value).
38//
39// This is more different from dense_hash_map than you might think,
40// because all iterators for sets are const (you obviously can't
41// change the key, and for sets there is no value).
42//
43// NOTE: this is exactly like sparse_hash_set.h, with the word
44// "sparse" replaced by "dense", except for the addition of
45// set_empty_key().
46//
47//   YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION.
48//
49// Otherwise your program will die in mysterious ways.
50//
51// In other respects, we adhere mostly to the STL semantics for
52// hash-set.  One important exception is that insert() invalidates
53// iterators entirely.  On the plus side, though, erase() doesn't
54// invalidate iterators at all, or even change the ordering of elements.
55//
56// Here are a few "power user" tips:
57//
58//    1) set_deleted_key():
59//         If you want to use erase() you must call set_deleted_key(),
60//         in addition to set_empty_key(), after construction.
61//
62//    2) resize(0):
63//         When an item is deleted, its memory isn't freed right
64//         away.  This allows you to iterate over a hashtable,
65//         and call erase(), without invalidating the iterator.
66//         To force the memory to be freed, call resize(0).
67//
68// Guide to what kind of hash_set to use:
69//   (1) dense_hash_set: fastest, uses the most memory
70//   (2) sparse_hash_set: slowest, uses the least memory
71//   (3) hash_set (STL): in the middle
72// Typically I use sparse_hash_set when I care about space and/or when
73// I need to save the hashtable on disk.  I use hash_set otherwise.  I
74// don't personally use dense_hash_set ever; the only use of
75// dense_hash_set I know of is to work around malloc() bugs in some
76// systems (dense_hash_set has a particularly simple allocation scheme).
77//
78// - dense_hash_set has, typically, a factor of 2 memory overhead (if your
79//   data takes up X bytes, the hash_set uses X more bytes in overhead).
80// - sparse_hash_set has about 2 bits overhead per entry.
81// - sparse_hash_map can be 3-7 times slower than the others for lookup and,
82//   especially, inserts.  See time_hash_map.cc for details.
83//
84// See /usr/(local/)?doc/sparsehash-0.1/dense_hash_set.html
85// for information about how to use this class.
86
87#ifndef _DENSE_HASH_SET_H_
88#define _DENSE_HASH_SET_H_
89
90#include <google/sparsehash/config.h>
91#include <stdio.h>                   // for FILE * in read()/write()
92#include <algorithm>                 // for the default template args
93#include <functional>                // for equal_to
94#include <memory>                    // for alloc<>
95#include <google/sparsehash/hash_fun.h>
96#include <google/sparsehash/densehashtable.h>
97
98
99_START_GOOGLE_NAMESPACE_
100
101using STL_NAMESPACE::pair;
102
103template <class Value,
104          class HashFcn = HASH_NAMESPACE::hash<Value>,
105          class EqualKey = STL_NAMESPACE::equal_to<Value>,
106          class Alloc = STL_NAMESPACE::allocator<Value> >
107class dense_hash_set {
108 private:
109
110  // Apparently identity is not stl-standard, so we define our own
111  struct Identity {
112    Value& operator()(Value& v) const { return v; }
113    const Value& operator()(const Value& v) const { return v; }
114  };
115
116  // The actual data
117  typedef dense_hashtable<Value, Value, HashFcn, Identity, EqualKey, Alloc> ht;
118  ht rep;
119
120 public:
121  typedef typename ht::key_type key_type;
122  typedef typename ht::value_type value_type;
123  typedef typename ht::hasher hasher;
124  typedef typename ht::key_equal key_equal;
125
126  typedef typename ht::size_type size_type;
127  typedef typename ht::difference_type difference_type;
128  typedef typename ht::const_pointer pointer;
129  typedef typename ht::const_pointer const_pointer;
130  typedef typename ht::const_reference reference;
131  typedef typename ht::const_reference const_reference;
132
133  typedef typename ht::const_iterator iterator;
134  typedef typename ht::const_iterator const_iterator;
135
136
137  // Iterator functions -- recall all iterators are const
138  iterator begin() const              { return rep.begin(); }
139  iterator end() const                { return rep.end(); }
140
141
142  // Accessor functions
143  hasher hash_funct() const { return rep.hash_funct(); }
144  key_equal key_eq() const  { return rep.key_eq(); }
145
146
147  // Constructors
148  explicit dense_hash_set(size_type n = 0,
149                          const hasher& hf = hasher(),
150                          const key_equal& eql = key_equal())
151    : rep(n, hf, eql) { }
152 
153  template <class InputIterator>
154  dense_hash_set(InputIterator f, InputIterator l,
155                 size_type n = 0,
156                 const hasher& hf = hasher(),
157                 const key_equal& eql = key_equal()) {
158    rep.insert(f, l);
159  }
160  // We use the default copy constructor
161  // We use the default operator=()
162  // We use the default destructor
163
164  void clear()                        { rep.clear(); }
165  void swap(dense_hash_set& hs)       { rep.swap(hs.rep); }
166
167
168  // Functions concerning size
169  size_type size() const              { return rep.size(); }
170  size_type max_size() const          { return rep.max_size(); }
171  bool empty() const                  { return rep.empty(); }
172  size_type bucket_count() const      { return rep.bucket_count(); }
173  size_type max_bucket_count() const  { return rep.max_bucket_count(); }
174
175  void resize(size_type hint)         { rep.resize(hint); }
176
177
178  // Lookup routines
179  iterator find(const key_type& key) const           { return rep.find(key); }
180
181  size_type count(const key_type& key) const         { return rep.count(key); }
182 
183  pair<iterator, iterator> equal_range(const key_type& key) const {
184    return rep.equal_range(key);
185  }
186
187  // Insertion routines
188  pair<iterator, bool> insert(const value_type& obj) {
189    pair<typename ht::iterator, bool> p = rep.insert(obj);
190    return pair<iterator, bool>(p.first, p.second);   // const to non-const
191  }
192  template <class InputIterator>
193  void insert(InputIterator f, InputIterator l)      { rep.insert(f, l); }
194  void insert(const_iterator f, const_iterator l)    { rep.insert(f, l); }
195  // required for std::insert_iterator; the passed-in iterator is ignored
196  iterator insert(iterator, const value_type& obj)   { return insert(obj).first; }
197
198
199  // Deletion and empty routines
200  // THESE ARE NON-STANDARD!  I make you specify an "impossible" key
201  // value to identify deleted and empty buckets.  You can change the
202  // deleted key as time goes on, or get rid of it entirely to be insert-only.
203  void set_empty_key(const key_type& key)     { rep.set_empty_key(key); }
204  void set_deleted_key(const key_type& key)   { rep.set_deleted_key(key); }
205  void clear_deleted_key()                    { rep.clear_deleted_key(); }
206
207  // These are standard
208  size_type erase(const key_type& key)               { return rep.erase(key); }
209  void erase(iterator it)                            { rep.erase(it); }
210  void erase(iterator f, iterator l)                 { rep.erase(f, l); }
211
212
213  // Comparison
214  bool operator==(const dense_hash_set& hs) const    { return rep == hs.rep; }
215  bool operator!=(const dense_hash_set& hs) const    { return rep != hs.rep; }
216
217
218  // I/O -- this is an add-on for writing metainformation to disk
219  bool write_metadata(FILE *fp)       { return rep.write_metadata(fp); }
220  bool read_metadata(FILE *fp)        { return rep.read_metadata(fp); }
221  bool write_nopointer_data(FILE *fp) { return rep.write_nopointer_data(fp); }
222  bool read_nopointer_data(FILE *fp)  { return rep.read_nopointer_data(fp); }
223};
224
225template <class Val, class HashFcn, class EqualKey, class Alloc>
226inline void swap(dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
227                 dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) {
228  hs1.swap(hs2);
229}
230
231_END_GOOGLE_NAMESPACE_
232
233#endif /* _DENSE_HASH_SET_H_ */
Note: See TracBrowser for help on using the repository browser.