1 | // Copyright (c) 2005, Google Inc.
|
---|
2 | // All rights reserved.
|
---|
3 | //
|
---|
4 | // Redistribution and use in source and binary forms, with or without
|
---|
5 | // modification, are permitted provided that the following conditions are
|
---|
6 | // met:
|
---|
7 | //
|
---|
8 | // * Redistributions of source code must retain the above copyright
|
---|
9 | // notice, this list of conditions and the following disclaimer.
|
---|
10 | // * Redistributions in binary form must reproduce the above
|
---|
11 | // copyright notice, this list of conditions and the following disclaimer
|
---|
12 | // in the documentation and/or other materials provided with the
|
---|
13 | // distribution.
|
---|
14 | // * Neither the name of Google Inc. nor the names of its
|
---|
15 | // contributors may be used to endorse or promote products derived from
|
---|
16 | // this software without specific prior written permission.
|
---|
17 | //
|
---|
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
---|
19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
---|
20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
---|
21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
---|
22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
---|
23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
---|
24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
---|
25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
---|
26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
---|
27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
---|
28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
---|
29 |
|
---|
30 | // ---
|
---|
31 | // Author: Craig Silverstein
|
---|
32 | //
|
---|
33 | // This is just a very thin wrapper over densehashtable.h, just
|
---|
34 | // like sgi stl's stl_hash_set is a very thin wrapper over
|
---|
35 | // stl_hashtable. The major thing we define is operator[], because
|
---|
36 | // we have a concept of a data_type which stl_hashtable doesn't
|
---|
37 | // (it only has a key and a value).
|
---|
38 | //
|
---|
39 | // This is more different from dense_hash_map than you might think,
|
---|
40 | // because all iterators for sets are const (you obviously can't
|
---|
41 | // change the key, and for sets there is no value).
|
---|
42 | //
|
---|
43 | // NOTE: this is exactly like sparse_hash_set.h, with the word
|
---|
44 | // "sparse" replaced by "dense", except for the addition of
|
---|
45 | // set_empty_key().
|
---|
46 | //
|
---|
47 | // YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION.
|
---|
48 | //
|
---|
49 | // Otherwise your program will die in mysterious ways.
|
---|
50 | //
|
---|
51 | // In other respects, we adhere mostly to the STL semantics for
|
---|
52 | // hash-set. One important exception is that insert() invalidates
|
---|
53 | // iterators entirely. On the plus side, though, erase() doesn't
|
---|
54 | // invalidate iterators at all, or even change the ordering of elements.
|
---|
55 | //
|
---|
56 | // Here are a few "power user" tips:
|
---|
57 | //
|
---|
58 | // 1) set_deleted_key():
|
---|
59 | // If you want to use erase() you must call set_deleted_key(),
|
---|
60 | // in addition to set_empty_key(), after construction.
|
---|
61 | //
|
---|
62 | // 2) resize(0):
|
---|
63 | // When an item is deleted, its memory isn't freed right
|
---|
64 | // away. This allows you to iterate over a hashtable,
|
---|
65 | // and call erase(), without invalidating the iterator.
|
---|
66 | // To force the memory to be freed, call resize(0).
|
---|
67 | //
|
---|
68 | // Guide to what kind of hash_set to use:
|
---|
69 | // (1) dense_hash_set: fastest, uses the most memory
|
---|
70 | // (2) sparse_hash_set: slowest, uses the least memory
|
---|
71 | // (3) hash_set (STL): in the middle
|
---|
72 | // Typically I use sparse_hash_set when I care about space and/or when
|
---|
73 | // I need to save the hashtable on disk. I use hash_set otherwise. I
|
---|
74 | // don't personally use dense_hash_set ever; the only use of
|
---|
75 | // dense_hash_set I know of is to work around malloc() bugs in some
|
---|
76 | // systems (dense_hash_set has a particularly simple allocation scheme).
|
---|
77 | //
|
---|
78 | // - dense_hash_set has, typically, a factor of 2 memory overhead (if your
|
---|
79 | // data takes up X bytes, the hash_set uses X more bytes in overhead).
|
---|
80 | // - sparse_hash_set has about 2 bits overhead per entry.
|
---|
81 | // - sparse_hash_map can be 3-7 times slower than the others for lookup and,
|
---|
82 | // especially, inserts. See time_hash_map.cc for details.
|
---|
83 | //
|
---|
84 | // See /usr/(local/)?doc/sparsehash-0.1/dense_hash_set.html
|
---|
85 | // for information about how to use this class.
|
---|
86 |
|
---|
87 | #ifndef _DENSE_HASH_SET_H_
|
---|
88 | #define _DENSE_HASH_SET_H_
|
---|
89 |
|
---|
90 | #include <google/sparsehash/config.h>
|
---|
91 | #include <stdio.h> // for FILE * in read()/write()
|
---|
92 | #include <algorithm> // for the default template args
|
---|
93 | #include <functional> // for equal_to
|
---|
94 | #include <memory> // for alloc<>
|
---|
95 | #include <google/sparsehash/hash_fun.h>
|
---|
96 | #include <google/sparsehash/densehashtable.h>
|
---|
97 |
|
---|
98 |
|
---|
99 | _START_GOOGLE_NAMESPACE_
|
---|
100 |
|
---|
101 | using STL_NAMESPACE::pair;
|
---|
102 |
|
---|
103 | template <class Value,
|
---|
104 | class HashFcn = HASH_NAMESPACE::hash<Value>,
|
---|
105 | class EqualKey = STL_NAMESPACE::equal_to<Value>,
|
---|
106 | class Alloc = STL_NAMESPACE::allocator<Value> >
|
---|
107 | class dense_hash_set {
|
---|
108 | private:
|
---|
109 |
|
---|
110 | // Apparently identity is not stl-standard, so we define our own
|
---|
111 | struct Identity {
|
---|
112 | Value& operator()(Value& v) const { return v; }
|
---|
113 | const Value& operator()(const Value& v) const { return v; }
|
---|
114 | };
|
---|
115 |
|
---|
116 | // The actual data
|
---|
117 | typedef dense_hashtable<Value, Value, HashFcn, Identity, EqualKey, Alloc> ht;
|
---|
118 | ht rep;
|
---|
119 |
|
---|
120 | public:
|
---|
121 | typedef typename ht::key_type key_type;
|
---|
122 | typedef typename ht::value_type value_type;
|
---|
123 | typedef typename ht::hasher hasher;
|
---|
124 | typedef typename ht::key_equal key_equal;
|
---|
125 |
|
---|
126 | typedef typename ht::size_type size_type;
|
---|
127 | typedef typename ht::difference_type difference_type;
|
---|
128 | typedef typename ht::const_pointer pointer;
|
---|
129 | typedef typename ht::const_pointer const_pointer;
|
---|
130 | typedef typename ht::const_reference reference;
|
---|
131 | typedef typename ht::const_reference const_reference;
|
---|
132 |
|
---|
133 | typedef typename ht::const_iterator iterator;
|
---|
134 | typedef typename ht::const_iterator const_iterator;
|
---|
135 |
|
---|
136 |
|
---|
137 | // Iterator functions -- recall all iterators are const
|
---|
138 | iterator begin() const { return rep.begin(); }
|
---|
139 | iterator end() const { return rep.end(); }
|
---|
140 |
|
---|
141 |
|
---|
142 | // Accessor functions
|
---|
143 | hasher hash_funct() const { return rep.hash_funct(); }
|
---|
144 | key_equal key_eq() const { return rep.key_eq(); }
|
---|
145 |
|
---|
146 |
|
---|
147 | // Constructors
|
---|
148 | explicit dense_hash_set(size_type n = 0,
|
---|
149 | const hasher& hf = hasher(),
|
---|
150 | const key_equal& eql = key_equal())
|
---|
151 | : rep(n, hf, eql) { }
|
---|
152 |
|
---|
153 | template <class InputIterator>
|
---|
154 | dense_hash_set(InputIterator f, InputIterator l,
|
---|
155 | size_type n = 0,
|
---|
156 | const hasher& hf = hasher(),
|
---|
157 | const key_equal& eql = key_equal()) {
|
---|
158 | rep.insert(f, l);
|
---|
159 | }
|
---|
160 | // We use the default copy constructor
|
---|
161 | // We use the default operator=()
|
---|
162 | // We use the default destructor
|
---|
163 |
|
---|
164 | void clear() { rep.clear(); }
|
---|
165 | void swap(dense_hash_set& hs) { rep.swap(hs.rep); }
|
---|
166 |
|
---|
167 |
|
---|
168 | // Functions concerning size
|
---|
169 | size_type size() const { return rep.size(); }
|
---|
170 | size_type max_size() const { return rep.max_size(); }
|
---|
171 | bool empty() const { return rep.empty(); }
|
---|
172 | size_type bucket_count() const { return rep.bucket_count(); }
|
---|
173 | size_type max_bucket_count() const { return rep.max_bucket_count(); }
|
---|
174 |
|
---|
175 | void resize(size_type hint) { rep.resize(hint); }
|
---|
176 |
|
---|
177 |
|
---|
178 | // Lookup routines
|
---|
179 | iterator find(const key_type& key) const { return rep.find(key); }
|
---|
180 |
|
---|
181 | size_type count(const key_type& key) const { return rep.count(key); }
|
---|
182 |
|
---|
183 | pair<iterator, iterator> equal_range(const key_type& key) const {
|
---|
184 | return rep.equal_range(key);
|
---|
185 | }
|
---|
186 |
|
---|
187 | // Insertion routines
|
---|
188 | pair<iterator, bool> insert(const value_type& obj) {
|
---|
189 | pair<typename ht::iterator, bool> p = rep.insert(obj);
|
---|
190 | return pair<iterator, bool>(p.first, p.second); // const to non-const
|
---|
191 | }
|
---|
192 | template <class InputIterator>
|
---|
193 | void insert(InputIterator f, InputIterator l) { rep.insert(f, l); }
|
---|
194 | void insert(const_iterator f, const_iterator l) { rep.insert(f, l); }
|
---|
195 | // required for std::insert_iterator; the passed-in iterator is ignored
|
---|
196 | iterator insert(iterator, const value_type& obj) { return insert(obj).first; }
|
---|
197 |
|
---|
198 |
|
---|
199 | // Deletion and empty routines
|
---|
200 | // THESE ARE NON-STANDARD! I make you specify an "impossible" key
|
---|
201 | // value to identify deleted and empty buckets. You can change the
|
---|
202 | // deleted key as time goes on, or get rid of it entirely to be insert-only.
|
---|
203 | void set_empty_key(const key_type& key) { rep.set_empty_key(key); }
|
---|
204 | void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
|
---|
205 | void clear_deleted_key() { rep.clear_deleted_key(); }
|
---|
206 |
|
---|
207 | // These are standard
|
---|
208 | size_type erase(const key_type& key) { return rep.erase(key); }
|
---|
209 | void erase(iterator it) { rep.erase(it); }
|
---|
210 | void erase(iterator f, iterator l) { rep.erase(f, l); }
|
---|
211 |
|
---|
212 |
|
---|
213 | // Comparison
|
---|
214 | bool operator==(const dense_hash_set& hs) const { return rep == hs.rep; }
|
---|
215 | bool operator!=(const dense_hash_set& hs) const { return rep != hs.rep; }
|
---|
216 |
|
---|
217 |
|
---|
218 | // I/O -- this is an add-on for writing metainformation to disk
|
---|
219 | bool write_metadata(FILE *fp) { return rep.write_metadata(fp); }
|
---|
220 | bool read_metadata(FILE *fp) { return rep.read_metadata(fp); }
|
---|
221 | bool write_nopointer_data(FILE *fp) { return rep.write_nopointer_data(fp); }
|
---|
222 | bool read_nopointer_data(FILE *fp) { return rep.read_nopointer_data(fp); }
|
---|
223 | };
|
---|
224 |
|
---|
225 | template <class Val, class HashFcn, class EqualKey, class Alloc>
|
---|
226 | inline void swap(dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
|
---|
227 | dense_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) {
|
---|
228 | hs1.swap(hs2);
|
---|
229 | }
|
---|
230 |
|
---|
231 | _END_GOOGLE_NAMESPACE_
|
---|
232 |
|
---|
233 | #endif /* _DENSE_HASH_SET_H_ */
|
---|